Merge remote-tracking branch 'upstream/main' into fix/4018-anti-fabrication-guardrails
# Conflicts: # src/resources/extensions/gsd/prompts/discuss-prepared.md
This commit is contained in:
commit
d5e4938320
221 changed files with 13259 additions and 4860 deletions
13
.github/workflows/ci.yml
vendored
13
.github/workflows/ci.yml
vendored
|
|
@ -155,7 +155,7 @@ jobs:
|
|||
run: npm run test:coverage
|
||||
|
||||
windows-portability:
|
||||
timeout-minutes: 15
|
||||
timeout-minutes: 25
|
||||
needs: detect-changes
|
||||
if: >-
|
||||
needs.detect-changes.outputs.docs-only != 'true'
|
||||
|
|
@ -180,12 +180,17 @@ jobs:
|
|||
- name: Typecheck extensions
|
||||
run: npm run typecheck:extensions
|
||||
|
||||
- name: Run unit tests
|
||||
run: npm run test:unit
|
||||
|
||||
- name: Run package tests
|
||||
run: npm run test:packages
|
||||
|
||||
- name: Run Windows portability tests
|
||||
run: >-
|
||||
node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs
|
||||
--experimental-strip-types --test
|
||||
src/tests/windows-portability.test.ts
|
||||
src/resources/extensions/gsd/tests/validate-directory.test.ts
|
||||
src/tests/integration/web-mode-windows-hide.test.ts
|
||||
|
||||
rtk-portability:
|
||||
timeout-minutes: 20
|
||||
needs: detect-changes
|
||||
|
|
|
|||
83
CHANGELOG.md
83
CHANGELOG.md
|
|
@ -6,6 +6,82 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [2.71.0] - 2026-04-11
|
||||
|
||||
### Added
|
||||
- **mcp-server**: add secure_env_collect tool via MCP form elicitation
|
||||
|
||||
### Fixed
|
||||
- **tui**: clear pinned output on message_end to prevent duplicate display
|
||||
- **tui**: clear pinned latest output on turn completion
|
||||
- **tui**: restore pinned output above editor during tool execution
|
||||
- TOCTOU file locking race conditions in event log and custom workflow graph
|
||||
- **tui**: mask secure extension input values in interactive mode
|
||||
- **claude-code**: harden MCP elicitation schema handling
|
||||
- **claude-code**: accept secure_env_collect MCP elicitation forms
|
||||
- **interactive**: keep MCP tool output ordered and restore secure prompt fallback
|
||||
- **interactive**: preserve MCP tool output stream ordering
|
||||
- **gsd**: resolve workflow MCP test typing regressions
|
||||
- **mcp**: return isError flag on workflow tool execution failures
|
||||
- **discuss**: add structuredQuestionsAvailable conditional to all gates
|
||||
- **discuss**: add multi-round questioning to new-project discuss phase
|
||||
- **gsd**: harden claude-code workflow MCP bootstrap
|
||||
- **web**: drop provisional pre-tool question text
|
||||
|
||||
### Changed
|
||||
- extract deriveStateFromDb logic into composable helpers
|
||||
- **pr**: drop web-layer changes from MCP stream-order fix
|
||||
|
||||
## [2.70.1] - 2026-04-11
|
||||
|
||||
### Fixed
|
||||
- **routing**: address codex review — complete interactive bypass and accurate banner
|
||||
- **routing**: skip dynamic routing for interactive dispatches, always show model changes (#3962)
|
||||
- **ci**: trim windows portability integration load
|
||||
- **ci**: narrow windows portability coverage
|
||||
- **ci**: skip validate-pack in windows portability job
|
||||
- **ci**: unblock windows portability follow-up
|
||||
- **windows**: harden portability across runtime and tooling
|
||||
- **auto**: use pathToFileURL for cross-platform import and reconcile regression test
|
||||
- **auto**: resolve resource-loader.js from GSD_PKG_ROOT on resume (#3949)
|
||||
- **mcp-server**: importLocalModule resolves src/ paths from dist/ context
|
||||
- **gsd**: surface scoped doctor health warnings
|
||||
- **gsd**: skip skipped slices in milestone prompts
|
||||
- **gsd**: handle doubled-backtick pre-exec paths
|
||||
- **update**: fetch latest version from registry
|
||||
|
||||
## [2.70.0] - 2026-04-10
|
||||
|
||||
### Added
|
||||
- **mcp-server**: expose ask_user_questions via elicitation
|
||||
|
||||
### Fixed
|
||||
- **pi-ai**: remove Anthropic OAuth flow for TOS compliance
|
||||
- **mcp-server**: hydrate model credentials into env
|
||||
- **mcp-server**: hydrate stored tool credentials on startup
|
||||
- **gsd**: auto-enable cmux when detected instead of prompting
|
||||
- **mcp-server**: URL scheme regex no longer matches Windows drive letters
|
||||
|
||||
## [2.69.0] - 2026-04-10
|
||||
|
||||
### Added
|
||||
- **gsd**: implement ADR-005 multi-model provider and tool strategy
|
||||
- **gsd**: complete ADR-004 capability-aware model routing implementation
|
||||
|
||||
### Fixed
|
||||
- **gsd**: add missing directories to codebase generator exclude list
|
||||
- **gsd**: wire ADR-005 infrastructure into live paths
|
||||
- **gsd**: replace empty catch with logWarning for CI compliance
|
||||
- **gsd**: merge enhanced context sections into standard template, clean up stale gate patterns
|
||||
- **gsd**: remove broken discuss-prepared template, inject briefs into discuss.md
|
||||
|
||||
## [2.68.1] - 2026-04-10
|
||||
|
||||
### Fixed
|
||||
- **ci**: update FILE-SYSTEM-MAP.md path after docs reorganization
|
||||
- **test**: update discord invite test path after docs reorganization
|
||||
- **gsd**: resolve resource-loader import for deployed extensions
|
||||
|
||||
## [2.68.0] - 2026-04-10
|
||||
|
||||
### Added
|
||||
|
|
@ -2664,7 +2740,12 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||
### Changed
|
||||
- License updated to MIT
|
||||
|
||||
[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.68.0...HEAD
|
||||
[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.71.0...HEAD
|
||||
[2.71.0]: https://github.com/gsd-build/gsd-2/compare/v2.70.1...v2.71.0
|
||||
[2.70.1]: https://github.com/gsd-build/gsd-2/compare/v2.70.0...v2.70.1
|
||||
[2.70.0]: https://github.com/gsd-build/gsd-2/compare/v2.69.0...v2.70.0
|
||||
[2.69.0]: https://github.com/gsd-build/gsd-2/compare/v2.68.1...v2.69.0
|
||||
[2.68.1]: https://github.com/gsd-build/gsd-2/compare/v2.68.0...v2.68.1
|
||||
[2.68.0]: https://github.com/gsd-build/gsd-2/compare/v2.67.0...v2.68.0
|
||||
[2.67.0]: https://github.com/gsd-build/gsd-2/compare/v2.66.1...v2.67.0
|
||||
[2.66.1]: https://github.com/gsd-build/gsd-2/compare/v2.66.0...v2.66.1
|
||||
|
|
|
|||
118
README.md
118
README.md
|
|
@ -21,42 +21,49 @@ One command. Walk away. Come back to a built project with clean git history.
|
|||
|
||||
> GSD now provisions a managed [RTK](https://github.com/rtk-ai/rtk) binary on supported macOS, Linux, and Windows installs to compress shell-command output in `bash`, `async_bash`, `bg_shell`, and verification flows. GSD forces `RTK_TELEMETRY_DISABLED=1` for all managed invocations. Set `GSD_RTK_DISABLED=1` to disable the integration.
|
||||
|
||||
> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues.
|
||||
> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/user-docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues.
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## What's New in v2.68
|
||||
## What's New in v2.71
|
||||
|
||||
### MCP Workflow Tools
|
||||
### MCP Secure Env Collect
|
||||
|
||||
- **Full workflow over MCP** — slice replanning, milestone management, slice completion, task completion, and core planning tools are now exposed over MCP for external integrations.
|
||||
- **Transport-gated MCP** — workflow tool availability adapts to provider transport capabilities automatically.
|
||||
- **Write gate enforcement** — workflow MCP respects write gates, preventing unauthorized state mutations from external clients.
|
||||
- **Secure credential collection over MCP** — the new `secure_env_collect` tool uses MCP form elicitation to collect secrets (API keys, tokens) from external clients without exposing values in tool output. Masks input in interactive mode.
|
||||
- **Hardened elicitation schema** — MCP elicitation schema handling is stricter, with proper validation and fallback for providers that don't support forms.
|
||||
|
||||
### Reliability & Recovery
|
||||
### MCP Reliability
|
||||
|
||||
- **False degraded-mode fix** — eliminates spurious degraded-mode warnings when the DB hasn't been initialized yet.
|
||||
- **Stale session resume suppression** — prevents stale interrupted-session resume prompts from hijacking fresh sessions.
|
||||
- **Merge conflict recovery** — `autoCommitDirtyState` guarded with cwd restore on `MergeConflictError`.
|
||||
- **Auto-resume hardening** — `autoStartTime` restored on resume, managed resources resynced on auto resume.
|
||||
- **Stream ordering preserved** — MCP tool output now renders in the correct order, fixing interleaved output in Claude Code and other MCP clients.
|
||||
- **isError flag propagation** — workflow tool execution failures now correctly return `isError: true`, so MCP clients can distinguish success from failure.
|
||||
- **Multi-round discuss questions** — new-project discuss phase supports multi-round questioning with structured question gates.
|
||||
|
||||
### TUI & Developer Experience
|
||||
### TUI Fixes
|
||||
|
||||
- **Contextual tips system** — TUI and web terminal now surface contextual tips based on workflow state.
|
||||
- **Claude Code MCP streaming** — real-time streaming and tool output rendering for Claude Code MCP connections.
|
||||
- **Pinned output restored** — pinned output bar displays above the editor during tool execution again.
|
||||
- **Turn completion cleanup** — pinned latest output is cleared on turn completion, preventing stale output from persisting.
|
||||
- **Secure input masking** — extension input values are masked in interactive mode when collecting secrets.
|
||||
|
||||
### Infrastructure
|
||||
### Reliability & Internals
|
||||
|
||||
- **Weekly model registry refresh** — CI workflow auto-regenerates the model registry on a weekly schedule.
|
||||
- **Codebase cache auto-refresh** — stale codebase cache is refreshed automatically without manual intervention.
|
||||
- **TOCTOU file locking** — race conditions in event log and custom workflow graph file locking are fixed with proper atomic lock acquisition.
|
||||
- **State derive refactor** — `deriveStateFromDb` god function extracted into composable, testable helpers.
|
||||
- **Windows portability** — hardened cross-platform portability across runtime, tooling, and CI.
|
||||
- **Model routing transparency** — dynamic routing is skipped for interactive dispatches; model changes are always shown in the banner.
|
||||
- **Capability-aware routing (ADR-004)** — full implementation of capability scoring, `before_model_select` hook, and task metadata extraction.
|
||||
- **Multi-model provider strategy (ADR-005)** — infrastructure for multi-provider model selection wired into live paths.
|
||||
|
||||
See the full [Changelog](./CHANGELOG.md) for details on every release.
|
||||
|
||||
<details>
|
||||
<summary>Previous highlights (v2.67 and earlier)</summary>
|
||||
<summary>Previous highlights (v2.70 and earlier)</summary>
|
||||
|
||||
- **Full workflow over MCP (v2.68)** — slice replanning, milestone management, slice completion, task completion, and core planning tools exposed over MCP
|
||||
- **Transport-gated MCP (v2.68)** — workflow tool availability adapts to provider transport capabilities automatically
|
||||
- **Contextual tips system (v2.68)** — TUI and web terminal surface contextual tips based on workflow state
|
||||
- **Ask user questions over MCP (v2.70)** — interactive questions exposed via elicitation for external integrations
|
||||
- **Tiered Context Injection (M005)** — relevance-scoped context with 65%+ token reduction
|
||||
- **Resilient transient error recovery** — defers to Core RetryHandler and fixes cmdCtx race conditions
|
||||
- **Anthropic subscription routing** — auto-routed through Claude Code CLI provider with proper display names
|
||||
|
|
@ -86,30 +93,35 @@ See the full [Changelog](./CHANGELOG.md) for details on every release.
|
|||
|
||||
## Documentation
|
||||
|
||||
Full documentation is available at **[gsd.build](https://gsd.build)** (powered by Mintlify) and in the [`docs/`](./docs/) directory:
|
||||
Full documentation is in the [`docs/`](./docs/) directory:
|
||||
|
||||
- **[Getting Started](./docs/getting-started.md)** — install, first run, basic usage
|
||||
- **[Auto Mode](./docs/auto-mode.md)** — autonomous execution deep-dive
|
||||
- **[Configuration](./docs/configuration.md)** — all preferences, models, git, and hooks
|
||||
- **[Custom Models](./docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies)
|
||||
- **[Token Optimization](./docs/token-optimization.md)** — profiles, context compression, complexity routing
|
||||
- **[Cost Management](./docs/cost-management.md)** — budgets, tracking, projections
|
||||
- **[Git Strategy](./docs/git-strategy.md)** — worktree isolation, branching, merge behavior
|
||||
- **[Parallel Orchestration](./docs/parallel-orchestration.md)** — run multiple milestones simultaneously
|
||||
- **[Working in Teams](./docs/working-in-teams.md)** — unique IDs, shared artifacts
|
||||
- **[Skills](./docs/skills.md)** — bundled skills, discovery, custom authoring
|
||||
- **[Commands Reference](./docs/commands.md)** — all commands and keyboard shortcuts
|
||||
- **[Architecture](./docs/architecture.md)** — system design and dispatch pipeline
|
||||
- **[Troubleshooting](./docs/troubleshooting.md)** — common issues, doctor, forensics, recovery
|
||||
- **[CI/CD Pipeline](./docs/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod)
|
||||
- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration
|
||||
- **[Visualizer](./docs/visualizer.md)** — workflow visualizer with stats and discussion status
|
||||
- **[Remote Questions](./docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed
|
||||
- **[Dynamic Model Routing](./docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure
|
||||
- **[Web Interface](./docs/web-interface.md)** — browser-based project management and real-time progress
|
||||
- **[Pipeline Simplification (ADR-003)](./docs/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion
|
||||
### User Guides
|
||||
|
||||
- **[Getting Started](./docs/user-docs/getting-started.md)** — install, first run, basic usage
|
||||
- **[Auto Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive
|
||||
- **[Configuration](./docs/user-docs/configuration.md)** — all preferences, models, git, and hooks
|
||||
- **[Custom Models](./docs/user-docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies)
|
||||
- **[Token Optimization](./docs/user-docs/token-optimization.md)** — profiles, context compression, complexity routing
|
||||
- **[Cost Management](./docs/user-docs/cost-management.md)** — budgets, tracking, projections
|
||||
- **[Git Strategy](./docs/user-docs/git-strategy.md)** — worktree isolation, branching, merge behavior
|
||||
- **[Parallel Orchestration](./docs/user-docs/parallel-orchestration.md)** — run multiple milestones simultaneously
|
||||
- **[Working in Teams](./docs/user-docs/working-in-teams.md)** — unique IDs, shared artifacts
|
||||
- **[Skills](./docs/user-docs/skills.md)** — bundled skills, discovery, custom authoring
|
||||
- **[Commands Reference](./docs/user-docs/commands.md)** — all commands and keyboard shortcuts
|
||||
- **[Troubleshooting](./docs/user-docs/troubleshooting.md)** — common issues, doctor, forensics, recovery
|
||||
- **[Visualizer](./docs/user-docs/visualizer.md)** — workflow visualizer with stats and discussion status
|
||||
- **[Remote Questions](./docs/user-docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed
|
||||
- **[Dynamic Model Routing](./docs/user-docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure
|
||||
- **[Web Interface](./docs/user-docs/web-interface.md)** — browser-based project management and real-time progress
|
||||
- **[Migration from v1](./docs/user-docs/migration.md)** — `.planning` → `.gsd` migration
|
||||
- **[Docker Sandbox](./docker/README.md)** — run GSD auto mode in an isolated Docker container
|
||||
- **[Migration from v1](./docs/migration.md)** — `.planning` → `.gsd` migration
|
||||
|
||||
### Developer Docs
|
||||
|
||||
- **[Architecture](./docs/dev/architecture.md)** — system design and dispatch pipeline
|
||||
- **[CI/CD Pipeline](./docs/dev/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod)
|
||||
- **[Pipeline Simplification (ADR-003)](./docs/dev/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion
|
||||
- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -325,7 +337,7 @@ gsd headless query
|
|||
gsd headless dispatch plan
|
||||
```
|
||||
|
||||
Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed.
|
||||
Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/user-docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed.
|
||||
|
||||
**Multi-session orchestration** — headless mode supports file-based IPC in `.gsd/parallel/` for coordinating multiple GSD workers across milestones. Build orchestrators that spawn, monitor, and budget-cap a fleet of GSD workers.
|
||||
|
||||
|
|
@ -498,9 +510,8 @@ auto_report: true
|
|||
| `verification_commands`| Array of shell commands to run after task execution (e.g., `["npm run lint", "npm run test"]`) |
|
||||
| `verification_auto_fix`| Auto-retry on verification failures (default: true) |
|
||||
| `verification_max_retries` | Max retries for verification failures (default: 2) |
|
||||
| `require_slice_discussion` | Pause auto-mode before each slice for human discussion review |
|
||||
| `phases.require_slice_discussion` | Pause auto-mode before each slice for human discussion review |
|
||||
| `auto_report` | Auto-generate HTML reports after milestone completion (default: true) |
|
||||
| `searchExcludeDirs` | Directories to exclude from `@` file autocomplete (e.g., `["node_modules", ".git", "dist"]`) |
|
||||
|
||||
### Agent Instructions
|
||||
|
||||
|
|
@ -530,7 +541,7 @@ token_profile: budget # or balanced (default), quality
|
|||
|
||||
**Budget pressure** graduates model downgrading as you approach your budget ceiling — 50%, 75%, and 90% thresholds progressively shift work to cheaper tiers.
|
||||
|
||||
See the full [Token Optimization Guide](./docs/token-optimization.md) for details.
|
||||
See the full [Token Optimization Guide](./docs/user-docs/token-optimization.md) for details.
|
||||
|
||||
### Bundled Tools
|
||||
|
||||
|
|
@ -565,13 +576,15 @@ GSD ships with 24 extensions, all loaded automatically:
|
|||
|
||||
### Bundled Agents
|
||||
|
||||
Three specialized subagents for delegated work:
|
||||
Five specialized subagents for delegated work:
|
||||
|
||||
| Agent | Role |
|
||||
| -------------- | ------------------------------------------------------------ |
|
||||
| **Scout** | Fast codebase recon — returns compressed context for handoff |
|
||||
| **Researcher** | Web research — finds and synthesizes current information |
|
||||
| **Worker** | General-purpose execution in an isolated context window |
|
||||
| Agent | Role |
|
||||
| ------------------- | ------------------------------------------------------------ |
|
||||
| **Scout** | Fast codebase recon — returns compressed context for handoff |
|
||||
| **Researcher** | Web research — finds and synthesizes current information |
|
||||
| **Worker** | General-purpose execution in an isolated context window |
|
||||
| **JavaScript Pro** | JavaScript-specialized execution and debugging |
|
||||
| **TypeScript Pro** | TypeScript-specialized execution and debugging |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -646,9 +659,8 @@ gsd (CLI binary)
|
|||
├─ resource-loader.ts Syncs bundled extensions + agents to ~/.gsd/agent/
|
||||
└─ src/resources/
|
||||
├─ extensions/gsd/ Core GSD extension (auto, state, commands, ...)
|
||||
├─ extensions/... 23 supporting extensions
|
||||
├─ agents/ scout, researcher, worker
|
||||
├─ AGENTS.md Agent routing instructions
|
||||
├─ extensions/... 21 supporting extensions
|
||||
├─ agents/ scout, researcher, worker, javascript-pro, typescript-pro
|
||||
└─ GSD-WORKFLOW.md Manual bootstrap protocol
|
||||
```
|
||||
|
||||
|
|
|
|||
67
docs/dev/ADR-005-multi-model-provider-tool-strategy.md
Normal file
67
docs/dev/ADR-005-multi-model-provider-tool-strategy.md
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
# ADR-005: Multi-Model, Multi-Provider, and Tool Strategy
|
||||
|
||||
**Status:** Accepted
|
||||
**Date:** 2026-03-27
|
||||
**Deciders:** Jeremy McSpadden
|
||||
**Related:** ADR-004 (capability-aware model routing), ADR-003 (pipeline simplification), [Issue #2790](https://github.com/gsd-build/gsd-2/issues/2790)
|
||||
|
||||
## Context
|
||||
|
||||
PR #2755 lands capability-aware model routing (ADR-004), extending the router from a one-dimensional complexity-tier system to a two-dimensional system that scores models across 7 capability dimensions. GSD can now intelligently pick the best model for a task from a heterogeneous pool.
|
||||
|
||||
But model selection is only one piece of the multi-model puzzle. The system faces structural gaps as users configure diverse provider pools:
|
||||
|
||||
1. **Tool compatibility is assumed, not verified** — Every registered tool is sent to every model regardless of provider capabilities.
|
||||
2. **No tool-aware model routing** — ADR-004 scores 7 capability dimensions but none encode whether a model can actually use the tools a task requires.
|
||||
3. **Provider failover loses context fidelity** — Cross-provider switches silently degrade conversation quality (thinking blocks dropped, tool IDs remapped).
|
||||
4. **Tool availability is static across a session** — The same tools are presented regardless of the selected model's capabilities.
|
||||
5. **No provider capability registry** — Provider quirks are scattered across `*-shared.ts` files.
|
||||
|
||||
## Decision
|
||||
|
||||
Introduce a provider capability registry and tool compatibility layer that integrates with ADR-004's capability-aware model router.
|
||||
|
||||
### Design Principles
|
||||
|
||||
1. **Layered on ADR-004, not replacing it.** Capability scoring remains primary. This adds tool compatibility as a hard constraint.
|
||||
2. **Hard constraints filter; soft scores rank.** Tool support is binary — it filters the eligible set before scoring.
|
||||
3. **Provider knowledge is declarative, not scattered.** Provider capabilities move to an explicit registry.
|
||||
4. **Tool sets adapt to model capabilities.** Active tool set adjusts when the router selects a different model.
|
||||
5. **Graceful degradation preserved.** Unknown providers get full tool access — same as today.
|
||||
|
||||
### Implementation Phases
|
||||
|
||||
1. **Phase 1:** Provider Capabilities Registry (`packages/pi-ai/src/providers/provider-capabilities.ts`)
|
||||
2. **Phase 2:** Tool Compatibility Metadata (extend `ToolDefinition` with `compatibility` field)
|
||||
3. **Phase 3:** Tool-compatibility filter in routing pipeline + `ProviderSwitchReport` in `transform-messages.ts`
|
||||
4. **Phase 4:** `adjustToolSet` extension hook
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
- Eliminates silent tool failures when routing to incompatible providers
|
||||
- Makes cross-provider routing safe by default
|
||||
- Provider knowledge becomes queryable (registry vs scattered code)
|
||||
- Cross-provider context loss becomes visible via `ProviderSwitchReport`
|
||||
|
||||
### Negative
|
||||
- More metadata to maintain (provider capabilities, tool compatibility)
|
||||
- Tool filtering adds a pipeline step (sub-millisecond, O(models × tools))
|
||||
- Risk of over-filtering (mitigated: opt-in per tool, permissive defaults)
|
||||
|
||||
### Neutral
|
||||
- Existing behavior unchanged without metadata
|
||||
- ADR-004 scoring is unmodified
|
||||
- Provider implementations simplify over time as registry replaces scattered workarounds
|
||||
|
||||
## Appendix: Architecture Reference
|
||||
|
||||
| File | Role |
|
||||
|------|------|
|
||||
| `packages/pi-ai/src/providers/register-builtins.ts` | Provider registration |
|
||||
| `packages/pi-ai/src/providers/*-shared.ts` | Provider-specific handling |
|
||||
| `packages/pi-ai/src/providers/transform-messages.ts` | Cross-provider normalization |
|
||||
| `packages/pi-ai/src/types.ts` | Core types |
|
||||
| `packages/pi-coding-agent/src/core/extensions/types.ts` | ToolDefinition, ExtensionAPI |
|
||||
| `src/resources/extensions/gsd/model-router.ts` | Capability scoring (ADR-004) |
|
||||
| `src/resources/extensions/gsd/auto-model-selection.ts` | Model selection orchestration |
|
||||
|
|
@ -86,18 +86,15 @@ Implication for GSD2:
|
|||
|
||||
These are directionally correct because GSD is using the user's own local Claude Code installation as the authenticated Anthropic surface.
|
||||
|
||||
### Medium/high-risk pieces
|
||||
### Medium/high-risk pieces — RESOLVED
|
||||
|
||||
- `packages/pi-ai/src/utils/oauth/anthropic.ts`
|
||||
Still implements a first-party-looking Anthropic OAuth flow for GSD itself using `claude.ai/oauth/authorize` and `platform.claude.com/v1/oauth/token`.
|
||||
- `packages/pi-ai/src/utils/oauth/index.ts`
|
||||
Still registers `anthropicOAuthProvider` as a built-in OAuth provider.
|
||||
- `src/web/onboarding-service.ts`
|
||||
Still advertises Anthropic as `supportsOAuth: true`, which keeps the web onboarding surface inconsistent with the TUI stance.
|
||||
- `packages/daemon/src/orchestrator.ts`
|
||||
Reads Anthropic OAuth credentials from `~/.gsd/agent/auth.json`, refreshes them, and then uses the access token for Anthropic API calls.
|
||||
All Anthropic OAuth code paths have been removed:
|
||||
|
||||
The key risk is not just stale UI. The repo still contains code paths where GSD can behave as a third-party Anthropic OAuth client and then convert that credential into direct API access.
|
||||
- `packages/pi-ai/src/utils/oauth/anthropic.ts` — **Deleted.** No longer implements Anthropic OAuth flow.
|
||||
- `packages/pi-ai/src/utils/oauth/index.ts` — **Updated.** `anthropicOAuthProvider` removed from built-in registry.
|
||||
- `src/web/onboarding-service.ts` — **Updated.** Anthropic set to `supportsOAuth: false`.
|
||||
- `packages/daemon/src/orchestrator.ts` — **Updated.** OAuth token refresh removed; requires `ANTHROPIC_API_KEY` env var.
|
||||
- `packages/pi-ai/src/providers/anthropic.ts` — **Updated.** OAuth client branch removed; `isOAuthToken` always returns false.
|
||||
|
||||
## Recommended Policy For GSD2
|
||||
|
||||
|
|
@ -149,14 +146,14 @@ This is the best long-term UX because it separates:
|
|||
- API-billed usage
|
||||
- cloud-routed usage
|
||||
|
||||
## Concrete Repo Follow-ups
|
||||
## Concrete Repo Follow-ups — COMPLETED
|
||||
|
||||
1. Delete or disable `packages/pi-ai/src/utils/oauth/anthropic.ts`.
|
||||
2. Remove `anthropicOAuthProvider` from `packages/pi-ai/src/utils/oauth/index.ts`.
|
||||
3. Change `src/web/onboarding-service.ts` so Anthropic does not claim OAuth support.
|
||||
4. Audit `packages/daemon/src/orchestrator.ts` and any other callers that treat Anthropic OAuth access tokens as API credentials.
|
||||
5. Update docs/UI labels to prefer `anthropic-api` for direct API usage and `claude-code` for subscription usage.
|
||||
6. Add tests that fail if Anthropic subscription OAuth is reintroduced through the onboarding/provider registry.
|
||||
1. ~~Delete or disable `packages/pi-ai/src/utils/oauth/anthropic.ts`.~~ **Done** — file deleted.
|
||||
2. ~~Remove `anthropicOAuthProvider` from `packages/pi-ai/src/utils/oauth/index.ts`.~~ **Done.**
|
||||
3. ~~Change `src/web/onboarding-service.ts` so Anthropic does not claim OAuth support.~~ **Done.**
|
||||
4. ~~Audit `packages/daemon/src/orchestrator.ts` and any other callers that treat Anthropic OAuth access tokens as API credentials.~~ **Done** — daemon now requires `ANTHROPIC_API_KEY`.
|
||||
5. ~~Update docs/UI labels to prefer `anthropic-api` for direct API usage and `claude-code` for subscription usage.~~ **Done** — providers.md and getting-started.md updated.
|
||||
6. Add tests that fail if Anthropic subscription OAuth is reintroduced through the onboarding/provider registry. — **TODO.**
|
||||
|
||||
## Decision Rule
|
||||
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ Recommended verification order:
|
|||
|
||||
- Use absolute paths for local executables and scripts when possible.
|
||||
- For `stdio` servers, prefer setting required environment variables directly in the MCP config instead of relying on an interactive shell profile.
|
||||
- GSD and `gsd-mcp-server` both hydrate supported model and tool keys saved in `~/.gsd/agent/auth.json`, so MCP configs can safely reference them through `${ENV_VAR}` placeholders without committing raw credentials.
|
||||
- If a server is team-shared and safe to commit, `.mcp.json` is usually the better home.
|
||||
- If a server depends on machine-local paths, personal services, or local-only secrets, prefer `.gsd/mcp.json`.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,74 +1,311 @@
|
|||
# Getting Started
|
||||
# Getting Started with GSD
|
||||
|
||||
## Install
|
||||
GSD is an AI coding agent that handles planning, execution, verification, and shipping so you can focus on what to build. This guide walks you through installation on macOS, Windows, and Linux, then gets you running your first session.
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
| Requirement | Minimum | Recommended |
|
||||
|-------------|---------|-------------|
|
||||
| **[Node.js](https://nodejs.org/)** | 22.0.0 | 24 LTS |
|
||||
| **[Git](https://git-scm.com/)** | 2.20+ | Latest |
|
||||
| **LLM API key** | Any supported provider | Anthropic (Claude) |
|
||||
|
||||
Don't have Node.js or Git yet? Follow the OS-specific instructions below.
|
||||
|
||||
---
|
||||
|
||||
## Install by Operating System
|
||||
|
||||
### macOS
|
||||
|
||||
> **Downloads:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/mac) | [Homebrew](https://brew.sh/)
|
||||
|
||||
**Step 1 — Install Homebrew** (skip if you already have it):
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||
```
|
||||
|
||||
**Step 2 — Install Node.js and Git:**
|
||||
|
||||
```bash
|
||||
brew install node git
|
||||
```
|
||||
|
||||
**Step 3 — Verify dependencies are installed:**
|
||||
|
||||
```bash
|
||||
node --version # should print v22.x or higher
|
||||
git --version # should print 2.20+
|
||||
```
|
||||
|
||||
**Step 4 — Install GSD:**
|
||||
|
||||
```bash
|
||||
npm install -g gsd-pi
|
||||
```
|
||||
|
||||
Requires Node.js ≥ 22.0.0 (24 LTS recommended) and Git.
|
||||
|
||||
> **`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [Troubleshooting](./troubleshooting.md#command-not-found-gsd-after-install) for details.
|
||||
|
||||
GSD checks for updates once every 24 hours. When a new version is available, you'll see an interactive prompt at startup with the option to update immediately or skip. You can also update from within a session with `/gsd update`.
|
||||
|
||||
### Set up API keys
|
||||
|
||||
If you use a non-Anthropic model, you'll need a search API key for web search. Run `/gsd config` to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects:
|
||||
**Step 5 — Set up your LLM provider:**
|
||||
|
||||
```bash
|
||||
# Inside any GSD session:
|
||||
/gsd config
|
||||
```
|
||||
# Option A: Set an environment variable (Anthropic recommended)
|
||||
export ANTHROPIC_API_KEY="sk-ant-..."
|
||||
|
||||
See [Global API Keys](./configuration.md#global-api-keys-gsd-config) for details on supported keys.
|
||||
|
||||
### Set up custom MCP servers
|
||||
|
||||
If you want GSD to call local or external MCP servers, add project-local config in `.mcp.json` or `.gsd/mcp.json`.
|
||||
|
||||
See [Configuration → MCP Servers](./configuration.md#mcp-servers) for examples and verification steps.
|
||||
|
||||
### VS Code Extension
|
||||
|
||||
GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions. The extension provides:
|
||||
|
||||
- **`@gsd` chat participant** — talk to the agent in VS Code Chat
|
||||
- **Sidebar dashboard** — connection status, model info, token usage, quick actions
|
||||
- **Full command palette** — start/stop agent, switch models, export sessions
|
||||
|
||||
The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC.
|
||||
|
||||
### Web Interface
|
||||
|
||||
GSD also has a browser-based interface. Run `gsd --web` to start a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](./web-interface.md) for details.
|
||||
|
||||
## First Launch
|
||||
|
||||
Run `gsd` in any directory:
|
||||
|
||||
```bash
|
||||
gsd
|
||||
```
|
||||
|
||||
GSD displays a welcome screen showing your version, active model, and available tool keys. Then on first launch, it runs a setup wizard:
|
||||
|
||||
1. **LLM Provider** — select from 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth flows handle Claude Max and Copilot subscriptions automatically; otherwise paste an API key.
|
||||
2. **Tool API Keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any.
|
||||
|
||||
If you have an existing Pi installation, provider credentials are imported automatically.
|
||||
|
||||
For detailed setup instructions for specific providers (OpenRouter, Ollama, LM Studio, vLLM, and more), see the [Provider Setup Guide](./providers.md).
|
||||
|
||||
Re-run the wizard anytime with:
|
||||
|
||||
```bash
|
||||
# Option B: Use the built-in config wizard
|
||||
gsd config
|
||||
```
|
||||
|
||||
## Choose a Model
|
||||
To persist the key, add the export line to `~/.zshrc`:
|
||||
|
||||
GSD auto-selects a default model after login. Switch later with:
|
||||
```bash
|
||||
echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.zshrc
|
||||
source ~/.zshrc
|
||||
```
|
||||
|
||||
See [Provider Setup Guide](./providers.md) for all 20+ supported providers.
|
||||
|
||||
**Step 6 — Launch GSD:**
|
||||
|
||||
```bash
|
||||
cd ~/my-project # navigate to any project
|
||||
gsd # start a session
|
||||
```
|
||||
|
||||
**Step 7 — Verify everything works:**
|
||||
|
||||
```bash
|
||||
gsd --version # prints the installed version
|
||||
```
|
||||
|
||||
Inside the session, type `/model` to confirm your LLM is connected.
|
||||
|
||||
> **Apple Silicon PATH fix:** If `gsd` isn't found after install, npm's global bin may not be in your PATH:
|
||||
> ```bash
|
||||
> echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc
|
||||
> source ~/.zshrc
|
||||
> ```
|
||||
|
||||
> **oh-my-zsh conflict:** The oh-my-zsh git plugin defines `alias gsd='git svn dcommit'`. Fix with `unalias gsd 2>/dev/null` in `~/.zshrc`, or use `gsd-cli` instead.
|
||||
|
||||
---
|
||||
|
||||
### Windows
|
||||
|
||||
> **Downloads:** [Node.js](https://nodejs.org/) | [Git for Windows](https://git-scm.com/download/win) | [Windows Terminal](https://aka.ms/terminal)
|
||||
|
||||
#### Option A: winget (recommended for Windows 10/11)
|
||||
|
||||
**Step 1 — Install Node.js and Git:**
|
||||
|
||||
```powershell
|
||||
winget install OpenJS.NodeJS.LTS
|
||||
winget install Git.Git
|
||||
```
|
||||
|
||||
**Step 2 — Restart your terminal** (close and reopen PowerShell or Windows Terminal).
|
||||
|
||||
**Step 3 — Verify dependencies are installed:**
|
||||
|
||||
```powershell
|
||||
node --version # should print v22.x or higher
|
||||
git --version # should print 2.20+
|
||||
```
|
||||
|
||||
**Step 4 — Install GSD:**
|
||||
|
||||
```powershell
|
||||
npm install -g gsd-pi
|
||||
```
|
||||
|
||||
**Step 5 — Set up your LLM provider:**
|
||||
|
||||
```powershell
|
||||
# Option A: Set an environment variable (current session)
|
||||
$env:ANTHROPIC_API_KEY = "sk-ant-..."
|
||||
|
||||
# Option B: Use the built-in config wizard
|
||||
gsd config
|
||||
```
|
||||
|
||||
To persist the key permanently, add it via System Settings > Environment Variables, or run:
|
||||
|
||||
```powershell
|
||||
[System.Environment]::SetEnvironmentVariable("ANTHROPIC_API_KEY", "sk-ant-...", "User")
|
||||
```
|
||||
|
||||
See [Provider Setup Guide](./providers.md) for all 20+ supported providers.
|
||||
|
||||
**Step 6 — Launch GSD:**
|
||||
|
||||
```powershell
|
||||
cd C:\Users\you\my-project # navigate to any project
|
||||
gsd # start a session
|
||||
```
|
||||
|
||||
**Step 7 — Verify everything works:**
|
||||
|
||||
```powershell
|
||||
gsd --version # prints the installed version
|
||||
```
|
||||
|
||||
Inside the session, type `/model` to confirm your LLM is connected.
|
||||
|
||||
#### Option B: Manual install
|
||||
|
||||
1. Download and install [Node.js LTS](https://nodejs.org/) — check **"Add to PATH"** during setup
|
||||
2. Download and install [Git for Windows](https://git-scm.com/download/win) — use default options
|
||||
3. Open a **new** terminal, then follow Steps 3-7 above
|
||||
|
||||
> **Windows tips:**
|
||||
> - Use **Windows Terminal** or **PowerShell** for the best experience. Command Prompt works but has limited color support.
|
||||
> - If `gsd` isn't recognized, restart your terminal. Windows needs a fresh terminal to pick up new PATH entries.
|
||||
> - **WSL2** also works — install WSL, then follow the Linux instructions inside your distro.
|
||||
|
||||
---
|
||||
|
||||
### Linux
|
||||
|
||||
> **Downloads:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/linux) | [nvm](https://github.com/nvm-sh/nvm)
|
||||
|
||||
Pick your distro, then follow the steps.
|
||||
|
||||
#### Ubuntu / Debian
|
||||
|
||||
**Step 1 — Install Node.js and Git:**
|
||||
|
||||
```bash
|
||||
curl -fsSL https://deb.nodesource.com/setup_24.x | sudo -E bash -
|
||||
sudo apt-get install -y nodejs git
|
||||
```
|
||||
|
||||
#### Fedora / RHEL / CentOS
|
||||
|
||||
**Step 1 — Install Node.js and Git:**
|
||||
|
||||
```bash
|
||||
curl -fsSL https://rpm.nodesource.com/setup_24.x | sudo bash -
|
||||
sudo dnf install -y nodejs git
|
||||
```
|
||||
|
||||
#### Arch Linux
|
||||
|
||||
**Step 1 — Install Node.js and Git:**
|
||||
|
||||
```bash
|
||||
sudo pacman -S nodejs npm git
|
||||
```
|
||||
|
||||
#### Using nvm (any distro)
|
||||
|
||||
**Step 1 — Install nvm, then Node.js:**
|
||||
|
||||
```bash
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.0/install.sh | bash
|
||||
source ~/.bashrc # or ~/.zshrc
|
||||
nvm install 24
|
||||
nvm use 24
|
||||
```
|
||||
|
||||
#### All distros: Steps 2-7
|
||||
|
||||
**Step 2 — Verify dependencies are installed:**
|
||||
|
||||
```bash
|
||||
node --version # should print v22.x or higher
|
||||
git --version # should print 2.20+
|
||||
```
|
||||
|
||||
**Step 3 — Install GSD:**
|
||||
|
||||
```bash
|
||||
npm install -g gsd-pi
|
||||
```
|
||||
|
||||
**Step 4 — Set up your LLM provider:**
|
||||
|
||||
```bash
|
||||
# Option A: Set an environment variable (Anthropic recommended)
|
||||
export ANTHROPIC_API_KEY="sk-ant-..."
|
||||
|
||||
# Option B: Use the built-in config wizard
|
||||
gsd config
|
||||
```
|
||||
|
||||
To persist the key, add the export line to `~/.bashrc` (or `~/.zshrc`):
|
||||
|
||||
```bash
|
||||
echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.bashrc
|
||||
source ~/.bashrc
|
||||
```
|
||||
|
||||
See [Provider Setup Guide](./providers.md) for all 20+ supported providers.
|
||||
|
||||
**Step 5 — Launch GSD:**
|
||||
|
||||
```bash
|
||||
cd ~/my-project # navigate to any project
|
||||
gsd # start a session
|
||||
```
|
||||
|
||||
**Step 6 — Verify everything works:**
|
||||
|
||||
```bash
|
||||
gsd --version # prints the installed version
|
||||
```
|
||||
|
||||
Inside the session, type `/model` to confirm your LLM is connected.
|
||||
|
||||
> **Permission errors on `npm install -g`?** Don't use `sudo npm`. Fix npm's global directory instead:
|
||||
> ```bash
|
||||
> mkdir -p ~/.npm-global
|
||||
> npm config set prefix '~/.npm-global'
|
||||
> echo 'export PATH="$HOME/.npm-global/bin:$PATH"' >> ~/.bashrc
|
||||
> source ~/.bashrc
|
||||
> npm install -g gsd-pi
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
### Docker (any OS)
|
||||
|
||||
> **Downloads:** [Docker Desktop](https://www.docker.com/products/docker-desktop/)
|
||||
|
||||
Run GSD in an isolated sandbox without installing Node.js on your host.
|
||||
|
||||
**Step 1 — Install Docker Desktop** (4.58+ required).
|
||||
|
||||
**Step 2 — Clone the GSD repo:**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/gsd-build/gsd-2.git
|
||||
cd gsd-2/docker
|
||||
```
|
||||
|
||||
**Step 3 — Create and enter a sandbox:**
|
||||
|
||||
```bash
|
||||
docker sandbox create --template . --name gsd-sandbox
|
||||
docker sandbox exec -it gsd-sandbox bash
|
||||
```
|
||||
|
||||
**Step 4 — Set your API key and run GSD:**
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_API_KEY="sk-ant-..."
|
||||
gsd auto "implement the feature described in issue #42"
|
||||
```
|
||||
|
||||
See [Docker Sandbox docs](../../docker/README.md) for full configuration, resource limits, and compose files.
|
||||
|
||||
---
|
||||
|
||||
## After Installation
|
||||
|
||||
### Choose a Model
|
||||
|
||||
GSD auto-selects a default model after provider setup. Switch anytime inside a session:
|
||||
|
||||
```
|
||||
/model
|
||||
|
|
@ -76,18 +313,20 @@ GSD auto-selects a default model after login. Switch later with:
|
|||
|
||||
Or configure per-phase models in preferences — see [Configuration](./configuration.md).
|
||||
|
||||
---
|
||||
|
||||
## Two Ways to Work
|
||||
|
||||
### Step Mode — `/gsd`
|
||||
|
||||
Type `/gsd` inside a session. GSD executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next.
|
||||
|
||||
- **No `.gsd/` directory** → starts a discussion flow to capture your project vision
|
||||
- **Milestone exists, no roadmap** → discuss or research the milestone
|
||||
- **Roadmap exists, slices pending** → plan the next slice or execute a task
|
||||
- **Mid-task** → resume where you left off
|
||||
- **No `.gsd/` directory** — starts a discussion flow to capture your project vision
|
||||
- **Milestone exists, no roadmap** — discuss or research the milestone
|
||||
- **Roadmap exists, slices pending** — plan the next slice or execute a task
|
||||
- **Mid-task** — resume where you left off
|
||||
|
||||
Step mode is the on-ramp. You stay in the loop, reviewing output between each step.
|
||||
Step mode keeps you in the loop, reviewing output between each step.
|
||||
|
||||
### Auto Mode — `/gsd auto`
|
||||
|
||||
|
|
@ -99,9 +338,11 @@ Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, ve
|
|||
|
||||
See [Auto Mode](./auto-mode.md) for full details.
|
||||
|
||||
## Two Terminals, One Project
|
||||
---
|
||||
|
||||
The recommended workflow: auto mode in one terminal, steering from another.
|
||||
## Recommended Workflow: Two Terminals
|
||||
|
||||
Run auto mode in one terminal, steer from another.
|
||||
|
||||
**Terminal 1 — let it build:**
|
||||
|
||||
|
|
@ -121,9 +362,9 @@ gsd
|
|||
|
||||
Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically.
|
||||
|
||||
## Project Structure
|
||||
---
|
||||
|
||||
GSD organizes work into a hierarchy:
|
||||
## How GSD Organizes Work
|
||||
|
||||
```
|
||||
Milestone → a shippable version (4-10 slices)
|
||||
|
|
@ -138,25 +379,45 @@ All state lives on disk in `.gsd/`:
|
|||
```
|
||||
.gsd/
|
||||
PROJECT.md — what the project is right now
|
||||
REQUIREMENTS.md — requirement contract (active/validated/deferred)
|
||||
REQUIREMENTS.md — requirement contract
|
||||
DECISIONS.md — append-only architectural decisions
|
||||
KNOWLEDGE.md — cross-session rules, patterns, and lessons
|
||||
RUNTIME.md — runtime context: API endpoints, env vars, services (v2.39)
|
||||
KNOWLEDGE.md — cross-session rules and patterns
|
||||
STATE.md — quick-glance status
|
||||
milestones/
|
||||
M001/
|
||||
M001-ROADMAP.md — slice plan with risk levels and dependencies
|
||||
M001-CONTEXT.md — scope and goals from discussion
|
||||
M001-ROADMAP.md — slice plan with dependencies
|
||||
slices/
|
||||
S01/
|
||||
S01-PLAN.md — task decomposition
|
||||
S01-SUMMARY.md — what happened
|
||||
S01-UAT.md — human test script
|
||||
tasks/
|
||||
T01-PLAN.md
|
||||
T01-SUMMARY.md
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## VS Code Extension
|
||||
|
||||
GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions:
|
||||
|
||||
- **`@gsd` chat participant** — talk to the agent in VS Code Chat
|
||||
- **Sidebar dashboard** — connection status, model info, token usage
|
||||
- **Full command palette** — start/stop agent, switch models, export sessions
|
||||
|
||||
The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC.
|
||||
|
||||
---
|
||||
|
||||
## Web Interface
|
||||
|
||||
GSD has a browser-based interface for visual project management:
|
||||
|
||||
```bash
|
||||
gsd --web
|
||||
```
|
||||
|
||||
See [Web Interface](./web-interface.md) for details.
|
||||
|
||||
---
|
||||
|
||||
## Resume a Session
|
||||
|
||||
```bash
|
||||
|
|
@ -165,36 +426,48 @@ gsd --continue # or gsd -c
|
|||
|
||||
Resumes the most recent session for the current directory.
|
||||
|
||||
To browse and pick from all saved sessions:
|
||||
Browse all saved sessions:
|
||||
|
||||
```bash
|
||||
gsd sessions
|
||||
```
|
||||
|
||||
Shows each session's date, message count, and first-message preview so you can choose which one to resume.
|
||||
---
|
||||
|
||||
## Updating GSD
|
||||
|
||||
GSD checks for updates every 24 hours and prompts at startup. You can also update manually:
|
||||
|
||||
```bash
|
||||
npm update -g gsd-pi
|
||||
```
|
||||
|
||||
Or from within a session:
|
||||
|
||||
```
|
||||
/gsd update
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Troubleshooting
|
||||
|
||||
| Problem | Fix |
|
||||
|---------|-----|
|
||||
| `command not found: gsd` | Add npm global bin to PATH (see OS-specific notes above) |
|
||||
| `gsd` runs `git svn dcommit` | oh-my-zsh conflict — `unalias gsd` or use `gsd-cli` |
|
||||
| Permission errors on `npm install -g` | Fix npm prefix (see Linux notes) or use nvm |
|
||||
| Can't connect to LLM | Check API key with `gsd config`, verify network access |
|
||||
| `gsd` hangs on start | Check Node.js version: `node --version` (need 22+) |
|
||||
|
||||
For more, see [Troubleshooting](./troubleshooting.md).
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Auto Mode](./auto-mode.md) — deep dive into autonomous execution
|
||||
- [Configuration](./configuration.md) — model selection, timeouts, budgets
|
||||
- [Commands Reference](./commands.md) — all commands and shortcuts
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### `gsd` command runs `git svn dcommit` instead of GSD
|
||||
|
||||
The [oh-my-zsh git plugin](https://github.com/ohmyzsh/ohmyzsh/tree/master/plugins/git) defines `alias gsd='git svn dcommit'`, which shadows the GSD binary.
|
||||
|
||||
**Option 1** — Remove the alias in your `~/.zshrc` (add after the `source $ZSH/oh-my-zsh.sh` line):
|
||||
|
||||
```bash
|
||||
unalias gsd 2>/dev/null
|
||||
```
|
||||
|
||||
**Option 2** — Use the alternative binary name:
|
||||
|
||||
```bash
|
||||
gsd-cli
|
||||
```
|
||||
|
||||
Both `gsd` and `gsd-cli` point to the same binary.
|
||||
- [Provider Setup](./providers.md) — detailed setup for every provider
|
||||
- [Working in Teams](./working-in-teams.md) — multi-developer workflows
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ Step-by-step setup instructions for every LLM provider GSD supports. If you ran
|
|||
|
||||
| Provider | Auth Method | Env Variable | Config File |
|
||||
|----------|-------------|-------------|-------------|
|
||||
| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | — |
|
||||
| Anthropic | API key | `ANTHROPIC_API_KEY` | — |
|
||||
| OpenAI | API key | `OPENAI_API_KEY` | — |
|
||||
| Google Gemini | API key | `GEMINI_API_KEY` | — |
|
||||
| OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` |
|
||||
|
|
@ -55,25 +55,91 @@ Built-in providers have models pre-registered in GSD. You only need to supply cr
|
|||
|
||||
**Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching.
|
||||
|
||||
**Option A — Browser sign-in (recommended):**
|
||||
|
||||
```bash
|
||||
gsd config
|
||||
# Choose "Sign in with your browser" → "Anthropic (Claude)"
|
||||
```
|
||||
|
||||
Or inside a session: `/login`
|
||||
|
||||
**Option B — API key:**
|
||||
**Option A — API key (recommended):**
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_API_KEY="sk-ant-..."
|
||||
```
|
||||
|
||||
Or paste it during `gsd config` when prompted.
|
||||
Or run `gsd config` and paste your key when prompted.
|
||||
|
||||
**Get a key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys)
|
||||
|
||||
**Option B — Claude Code CLI:**
|
||||
|
||||
If you have a Claude Pro or Max subscription, you can authenticate through Anthropic's official Claude Code CLI. Install it, sign in with `claude`, then GSD will detect and route through it automatically:
|
||||
|
||||
```bash
|
||||
# Install Claude Code CLI (see https://docs.anthropic.com/en/docs/claude-code)
|
||||
claude
|
||||
# Sign in when prompted, then start GSD
|
||||
gsd
|
||||
```
|
||||
|
||||
GSD detects your local Claude Code installation and uses it as the authenticated Anthropic surface. This is the TOS-compliant path for subscription users — GSD never handles your subscription credentials directly.
|
||||
|
||||
> **Note:** GSD does not support browser-based OAuth sign-in for Anthropic. Use an API key or the Claude Code CLI instead.
|
||||
|
||||
**Option C — Use your Claude Pro/Max plan with GSD inside Claude Code:**
|
||||
|
||||
If you already have a Claude Pro or Max subscription and want to use GSD's planning, execution, and milestone orchestration directly from Claude Code — without switching to a separate terminal — you can connect GSD as an MCP server. This gives Claude Code access to GSD's full workflow toolset via the [Model Context Protocol](https://modelcontextprotocol.io), so you get GSD's structured project management powered by your existing Claude plan.
|
||||
|
||||
**Automatic setup (recommended):**
|
||||
|
||||
When GSD detects a Claude Code model during startup, it automatically writes a `.mcp.json` file in your project root with the GSD workflow MCP server configured. No manual steps needed — just start GSD once with Claude Code as the provider and the config is created for you.
|
||||
|
||||
You can also trigger this manually from inside a GSD session:
|
||||
|
||||
```bash
|
||||
/gsd mcp init
|
||||
```
|
||||
|
||||
This writes (or updates) the `gsd-workflow` entry in your project's `.mcp.json`. Claude Code discovers this file automatically on its next session start.
|
||||
|
||||
**Manual setup:**
|
||||
|
||||
If you prefer to configure it yourself, add GSD to your project's `.mcp.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"gsd": {
|
||||
"command": "npx",
|
||||
"args": ["gsd-mcp-server"],
|
||||
"env": {
|
||||
"GSD_CLI_PATH": "/path/to/gsd"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Or if `gsd-mcp-server` is installed globally:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"gsd": {
|
||||
"command": "gsd-mcp-server"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can also add this to `~/.claude/settings.json` under `mcpServers` to make GSD available across all projects.
|
||||
|
||||
**What's exposed:**
|
||||
|
||||
The MCP server provides GSD's full workflow tool surface — milestone planning, task completion, slice management, roadmap reassessment, journal queries, and more. Session management tools (`gsd_execute`, `gsd_status`, `gsd_result`, `gsd_cancel`) let Claude Code start and monitor GSD auto-mode sessions. See [Commands → MCP Server Mode](./commands.md#mcp-server-mode) for the full tool list.
|
||||
|
||||
**Verify the connection:**
|
||||
|
||||
From inside a GSD session, check that the MCP server is reachable:
|
||||
|
||||
```bash
|
||||
/gsd mcp status
|
||||
```
|
||||
|
||||
### OpenAI
|
||||
|
||||
```bash
|
||||
|
|
|
|||
65
gitbook/README.md
Normal file
65
gitbook/README.md
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# What is GSD?
|
||||
|
||||
GSD is an AI-powered development agent that turns project ideas into working software. Describe what you want to build, and GSD researches, plans, codes, tests, and commits — with clean git history and full cost tracking.
|
||||
|
||||
## How It Works
|
||||
|
||||
GSD breaks your project into manageable pieces and works through them systematically:
|
||||
|
||||
```
|
||||
You describe your project
|
||||
↓
|
||||
GSD creates a milestone with slices (features)
|
||||
↓
|
||||
Each slice is decomposed into tasks
|
||||
↓
|
||||
Tasks are executed one at a time in fresh AI sessions
|
||||
↓
|
||||
Code is committed, verified, and the next task begins
|
||||
```
|
||||
|
||||
You can stay hands-on with **step mode** (reviewing each step) or let GSD run autonomously with **auto mode** while you grab coffee.
|
||||
|
||||
## Key Features
|
||||
|
||||
- **Autonomous execution** — `/gsd auto` runs research, planning, coding, testing, and committing without intervention
|
||||
- **20+ LLM providers** — Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, local models, and more
|
||||
- **Git isolation** — Each milestone works in its own worktree branch, merged cleanly when done
|
||||
- **Cost tracking** — Real-time token usage, budget ceilings, and automatic model downgrading
|
||||
- **Crash recovery** — Sessions resume automatically after interruptions
|
||||
- **Skills system** — Domain-specific instruction sets for frameworks, languages, and tools
|
||||
- **Parallel milestones** — Run multiple milestones simultaneously in isolated worktrees
|
||||
- **Remote questions** — Get Discord, Slack, or Telegram notifications when GSD needs input
|
||||
- **Web interface** — Browser-based dashboard with real-time progress
|
||||
- **VS Code extension** — Chat participant, sidebar dashboard, and full command palette
|
||||
- **Headless mode** — Run in CI pipelines, cron jobs, and scripted automation
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Install
|
||||
npm install -g gsd-pi
|
||||
|
||||
# Launch
|
||||
gsd
|
||||
|
||||
# Start autonomous mode
|
||||
/gsd auto
|
||||
```
|
||||
|
||||
See [Installation](getting-started/installation.md) for detailed setup instructions.
|
||||
|
||||
## Two Ways to Work
|
||||
|
||||
| Mode | Command | Best For |
|
||||
|------|---------|----------|
|
||||
| **Step** | `/gsd` | Staying in the loop, reviewing each step |
|
||||
| **Auto** | `/gsd auto` | Walking away, overnight builds, batch work |
|
||||
|
||||
The recommended workflow: run auto mode in one terminal, steer from another. See [Step Mode](core-concepts/step-mode.md) and [Auto Mode](core-concepts/auto-mode.md).
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Node.js** 22.0.0 or later (24 LTS recommended)
|
||||
- **Git** installed and configured
|
||||
- An API key for at least one LLM provider (or use browser sign-in for Anthropic/GitHub Copilot)
|
||||
49
gitbook/SUMMARY.md
Normal file
49
gitbook/SUMMARY.md
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# Table of contents
|
||||
|
||||
* [What is GSD?](README.md)
|
||||
|
||||
## Getting Started
|
||||
|
||||
* [Installation](getting-started/installation.md)
|
||||
* [Your First Project](getting-started/first-project.md)
|
||||
* [Choosing a Model](getting-started/choosing-a-model.md)
|
||||
|
||||
## Core Concepts
|
||||
|
||||
* [How GSD Organizes Work](core-concepts/project-structure.md)
|
||||
* [Step Mode](core-concepts/step-mode.md)
|
||||
* [Auto Mode](core-concepts/auto-mode.md)
|
||||
|
||||
## Configuration
|
||||
|
||||
* [Preferences](configuration/preferences.md)
|
||||
* [Provider Setup](configuration/providers.md)
|
||||
* [Custom Models](configuration/custom-models.md)
|
||||
* [Git & Worktrees](configuration/git-settings.md)
|
||||
* [Notifications](configuration/notifications.md)
|
||||
* [MCP Servers](configuration/mcp-servers.md)
|
||||
|
||||
## Features
|
||||
|
||||
* [Cost Management](features/cost-management.md)
|
||||
* [Token Optimization](features/token-optimization.md)
|
||||
* [Dynamic Model Routing](features/dynamic-model-routing.md)
|
||||
* [Skills](features/skills.md)
|
||||
* [Captures & Triage](features/captures.md)
|
||||
* [Workflow Visualizer](features/visualizer.md)
|
||||
* [Workflow Templates](features/workflow-templates.md)
|
||||
* [Web Interface](features/web-interface.md)
|
||||
* [Remote Questions](features/remote-questions.md)
|
||||
* [Working in Teams](features/teams.md)
|
||||
* [Parallel Orchestration](features/parallel.md)
|
||||
* [Headless & CI Mode](features/headless.md)
|
||||
* [GitHub Sync](features/github-sync.md)
|
||||
|
||||
## Reference
|
||||
|
||||
* [Commands](reference/commands.md)
|
||||
* [Keyboard Shortcuts](reference/keyboard-shortcuts.md)
|
||||
* [CLI Flags](reference/cli-flags.md)
|
||||
* [Environment Variables](reference/environment-variables.md)
|
||||
* [Troubleshooting](reference/troubleshooting.md)
|
||||
* [Migration from v1](reference/migration.md)
|
||||
131
gitbook/configuration/custom-models.md
Normal file
131
gitbook/configuration/custom-models.md
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
# Custom Models
|
||||
|
||||
Define custom models and providers in `~/.gsd/agent/models.json`. This lets you add models not in the default registry — self-hosted endpoints, fine-tuned models, proxies, or new provider releases.
|
||||
|
||||
## File Location
|
||||
|
||||
GSD looks for models.json at:
|
||||
1. `~/.gsd/agent/models.json` (primary)
|
||||
2. `~/.pi/agent/models.json` (fallback)
|
||||
|
||||
The file reloads each time you open `/model` — no restart needed.
|
||||
|
||||
## Basic Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"my-provider": {
|
||||
"baseUrl": "https://my-endpoint.example.com/v1",
|
||||
"apiKey": "MY_PROVIDER_API_KEY",
|
||||
"api": "openai-completions",
|
||||
"models": [
|
||||
{
|
||||
"id": "model-id-here",
|
||||
"name": "Friendly Model Name",
|
||||
"reasoning": false,
|
||||
"input": ["text"],
|
||||
"contextWindow": 128000,
|
||||
"maxTokens": 16384,
|
||||
"cost": { "input": 0.15, "output": 0.60, "cacheRead": 0.015, "cacheWrite": 0.19 }
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## API Key Resolution
|
||||
|
||||
The `apiKey` field can be:
|
||||
|
||||
- **An environment variable name**: `"OPENROUTER_API_KEY"` — GSD resolves it automatically
|
||||
- **A literal value**: `"sk-abc123..."` — used directly
|
||||
- **A dummy value**: `"not-needed"` — for local servers that don't require auth
|
||||
|
||||
## Compatibility Flags
|
||||
|
||||
Local and non-standard servers often need compatibility adjustments:
|
||||
|
||||
```json
|
||||
{
|
||||
"compat": {
|
||||
"supportsDeveloperRole": false,
|
||||
"supportsReasoningEffort": false,
|
||||
"supportsUsageInStreaming": false,
|
||||
"thinkingFormat": "qwen"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Flag | Default | Purpose |
|
||||
|------|---------|---------|
|
||||
| `supportsDeveloperRole` | `true` | Set `false` if the server doesn't support the `developer` message role |
|
||||
| `supportsReasoningEffort` | `true` | Set `false` if the server doesn't support reasoning effort parameters |
|
||||
| `supportsUsageInStreaming` | `true` | Set `false` if streaming responses don't include token usage |
|
||||
| `thinkingFormat` | — | Set `"qwen"` for Qwen thinking mode, `"qwen-chat-template"` for chat template variant |
|
||||
|
||||
## Custom Headers
|
||||
|
||||
For proxies that need extra headers:
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"litellm-proxy": {
|
||||
"baseUrl": "https://litellm.example.com/v1",
|
||||
"apiKey": "MY_API_KEY",
|
||||
"api": "openai-completions",
|
||||
"headers": {
|
||||
"x-custom-header": "value"
|
||||
},
|
||||
"models": [...]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Model Overrides
|
||||
|
||||
Override specific model settings without redefining the entire model:
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"modelOverrides": {
|
||||
"anthropic/claude-sonnet-4": {
|
||||
"compat": {
|
||||
"openRouterRouting": {
|
||||
"only": ["amazon-bedrock"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Cost Tracking
|
||||
|
||||
For accurate cost tracking with custom models, add the `cost` field (per million tokens):
|
||||
|
||||
```json
|
||||
"cost": {
|
||||
"input": 0.15,
|
||||
"output": 0.60,
|
||||
"cacheRead": 0.015,
|
||||
"cacheWrite": 0.19
|
||||
}
|
||||
```
|
||||
|
||||
Without this, cost shows $0.00 — which is the expected default for custom models.
|
||||
|
||||
## Community Extensions
|
||||
|
||||
For providers not built into GSD, community extensions add full provider support:
|
||||
|
||||
| Extension | Provider | Install |
|
||||
|-----------|----------|---------|
|
||||
| `pi-dashscope` | Alibaba DashScope (Qwen3, GLM-5, etc.) | `gsd install npm:pi-dashscope` |
|
||||
148
gitbook/configuration/git-settings.md
Normal file
148
gitbook/configuration/git-settings.md
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Git & Worktrees
|
||||
|
||||
GSD uses git for milestone isolation and sequential commits. The strategy is fully automated — you don't need to manage branches manually.
|
||||
|
||||
## Isolation Modes
|
||||
|
||||
GSD supports three isolation modes, configured via `git.isolation` in preferences:
|
||||
|
||||
| Mode | Working Directory | Branch | Best For |
|
||||
|------|-------------------|--------|----------|
|
||||
| `worktree` (default) | `.gsd/worktrees/<MID>/` | `milestone/<MID>` | Most projects — full isolation |
|
||||
| `branch` | Project root | `milestone/<MID>` | Submodule-heavy repos |
|
||||
| `none` | Project root | Current branch | Hot-reload workflows |
|
||||
|
||||
### Worktree Mode (Default)
|
||||
|
||||
Each milestone gets its own git worktree and branch. All execution happens inside the worktree. On completion, everything is squash-merged to main as one clean commit. The worktree and branch are then cleaned up.
|
||||
|
||||
Changes in a milestone can't interfere with your main working copy.
|
||||
|
||||
### Branch Mode
|
||||
|
||||
Work happens in the project root on a `milestone/<MID>` branch. No worktree directory is created. Useful when worktrees cause problems with submodules or hardcoded paths.
|
||||
|
||||
### None Mode
|
||||
|
||||
Work happens directly on your current branch. No worktree, no milestone branch. GSD still commits with conventional commit messages. Use this when file isolation breaks dev tooling (file watchers, hot-reload, etc.).
|
||||
|
||||
## Branching Model
|
||||
|
||||
```
|
||||
main ────────────────────────────────────────────
|
||||
│ ↑
|
||||
└── milestone/M001 (worktree) ─────────────┘
|
||||
commit: feat: core types
|
||||
commit: feat: markdown parser
|
||||
commit: feat: file writer
|
||||
→ squash-merged to main
|
||||
```
|
||||
|
||||
## Workflow Modes
|
||||
|
||||
Set `mode` for sensible defaults instead of configuring each setting individually:
|
||||
|
||||
```yaml
|
||||
mode: solo # personal projects
|
||||
mode: team # shared repos
|
||||
```
|
||||
|
||||
| Setting | `solo` | `team` |
|
||||
|---------|--------|--------|
|
||||
| `git.auto_push` | `true` | `false` |
|
||||
| `git.push_branches` | `false` | `true` |
|
||||
| `git.pre_merge_check` | `false` | `true` |
|
||||
| `unique_milestone_ids` | `false` | `true` |
|
||||
|
||||
Mode defaults are the lowest priority — any explicit preference overrides them.
|
||||
|
||||
## Git Preferences
|
||||
|
||||
```yaml
|
||||
git:
|
||||
auto_push: false # push after commits
|
||||
push_branches: false # push milestone branch to remote
|
||||
remote: origin # git remote name
|
||||
snapshots: true # WIP snapshot commits during long tasks
|
||||
pre_merge_check: auto # validation before merge
|
||||
commit_type: feat # override conventional commit prefix
|
||||
main_branch: main # primary branch name
|
||||
merge_strategy: squash # "squash" or "merge"
|
||||
isolation: worktree # "worktree", "branch", or "none"
|
||||
commit_docs: true # commit .gsd/ artifacts to git
|
||||
manage_gitignore: true # let GSD manage .gitignore
|
||||
auto_pr: false # create PR on milestone completion
|
||||
pr_target_branch: develop # PR target branch
|
||||
```
|
||||
|
||||
## Automatic Pull Requests
|
||||
|
||||
For teams using Gitflow or branch-based workflows:
|
||||
|
||||
```yaml
|
||||
git:
|
||||
auto_push: true
|
||||
auto_pr: true
|
||||
pr_target_branch: develop
|
||||
```
|
||||
|
||||
When a milestone completes, GSD pushes the branch and creates a PR targeting your specified branch. Requires `gh` CLI installed and authenticated.
|
||||
|
||||
## Post-Worktree Hook
|
||||
|
||||
Run a script after worktree creation (copy `.env` files, symlink assets, etc.):
|
||||
|
||||
```yaml
|
||||
git:
|
||||
worktree_post_create: .gsd/hooks/post-worktree-create
|
||||
```
|
||||
|
||||
Example hook:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
cp "$SOURCE_DIR/.env" "$WORKTREE_DIR/.env"
|
||||
ln -sf "$SOURCE_DIR/assets" "$WORKTREE_DIR/assets"
|
||||
```
|
||||
|
||||
## Keeping `.gsd/` Local
|
||||
|
||||
For teams where only some members use GSD:
|
||||
|
||||
```yaml
|
||||
git:
|
||||
commit_docs: false
|
||||
```
|
||||
|
||||
This adds `.gsd/` to `.gitignore` entirely. You get structured planning without affecting teammates who don't use GSD.
|
||||
|
||||
## Commit Format
|
||||
|
||||
Commits use conventional commit format with GSD metadata:
|
||||
|
||||
```
|
||||
feat: core type definitions
|
||||
|
||||
GSD-Task: M001/S01/T01
|
||||
```
|
||||
|
||||
## Manual Worktree Management
|
||||
|
||||
Use `/worktree` (or `/wt`) for manual worktree operations:
|
||||
|
||||
```
|
||||
/worktree create
|
||||
/worktree switch
|
||||
/worktree merge
|
||||
/worktree remove
|
||||
```
|
||||
|
||||
## Self-Healing
|
||||
|
||||
GSD automatically recovers from common git issues:
|
||||
|
||||
- **Detached HEAD** — reattaches to the correct branch
|
||||
- **Stale lock files** — removes `index.lock` from crashed processes
|
||||
- **Orphaned worktrees** — detects and cleans up abandoned worktrees
|
||||
|
||||
Run `/gsd doctor` to check git health manually.
|
||||
65
gitbook/configuration/mcp-servers.md
Normal file
65
gitbook/configuration/mcp-servers.md
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# MCP Servers
|
||||
|
||||
GSD can connect to external MCP (Model Context Protocol) servers for local tools, internal APIs, self-hosted services, or integrations not built in as native extensions.
|
||||
|
||||
## Configuration Files
|
||||
|
||||
GSD reads MCP config from these project-local paths:
|
||||
|
||||
- `.mcp.json` — repo-shared config (safe to commit)
|
||||
- `.gsd/mcp.json` — local-only config (not shared)
|
||||
|
||||
If both exist, server names are merged and the first definition found wins.
|
||||
|
||||
## Supported Transports
|
||||
|
||||
| Transport | Config Shape | Use When |
|
||||
|-----------|-------------|----------|
|
||||
| `stdio` | `command` + optional `args`, `env`, `cwd` | Launching a local MCP server |
|
||||
| `http` | `url` | Connecting to an already-running server |
|
||||
|
||||
## Examples
|
||||
|
||||
### stdio Server
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"my-server": {
|
||||
"type": "stdio",
|
||||
"command": "/absolute/path/to/python3",
|
||||
"args": ["/absolute/path/to/server.py"],
|
||||
"env": {
|
||||
"API_URL": "http://localhost:8000"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### HTTP Server
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"my-http-server": {
|
||||
"url": "http://localhost:8080/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Verifying a Server
|
||||
|
||||
After adding config, verify from a GSD session:
|
||||
|
||||
1. `mcp_servers` — confirms GSD sees the config
|
||||
2. `mcp_discover(server="my-server")` — confirms the server starts and responds
|
||||
3. `mcp_call(server="my-server", tool="<tool>", args={...})` — confirms a real tool call works
|
||||
|
||||
## Tips
|
||||
|
||||
- Use **absolute paths** for executables and scripts
|
||||
- Set required **environment variables** directly in the MCP config's `env` block
|
||||
- Use `.mcp.json` for team-shared servers; `.gsd/mcp.json` for machine-local ones
|
||||
- If a server depends on local paths or personal secrets, keep it in `.gsd/mcp.json`
|
||||
38
gitbook/configuration/notifications.md
Normal file
38
gitbook/configuration/notifications.md
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# Notifications
|
||||
|
||||
GSD sends desktop notifications during auto mode to keep you informed without watching the terminal.
|
||||
|
||||
## Configuration
|
||||
|
||||
```yaml
|
||||
notifications:
|
||||
enabled: true
|
||||
on_complete: true # notify on unit completion
|
||||
on_error: true # notify on errors
|
||||
on_budget: true # notify on budget thresholds
|
||||
on_milestone: true # notify when milestone finishes
|
||||
on_attention: true # notify when manual attention needed
|
||||
```
|
||||
|
||||
## macOS Setup
|
||||
|
||||
GSD uses `terminal-notifier` when available, falling back to `osascript`.
|
||||
|
||||
**Recommended:** Install `terminal-notifier` for reliable delivery:
|
||||
|
||||
```bash
|
||||
brew install terminal-notifier
|
||||
```
|
||||
|
||||
**Why?** The `osascript` fallback attributes notifications to your terminal app (Ghostty, iTerm2, etc.), which may not have notification permissions. `terminal-notifier` registers as its own app and prompts for permission on first use.
|
||||
|
||||
### Notifications Not Appearing?
|
||||
|
||||
1. Check **System Settings → Notifications** for your terminal app
|
||||
2. Install `terminal-notifier` (recommended)
|
||||
3. Test with:
|
||||
```bash
|
||||
terminal-notifier -title "GSD" -message "working!" -sound Glass
|
||||
```
|
||||
|
||||
If your terminal app doesn't appear in Notification settings, it may need to send at least one notification first to register. See [Troubleshooting](../reference/troubleshooting.md) for more details.
|
||||
238
gitbook/configuration/preferences.md
Normal file
238
gitbook/configuration/preferences.md
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
# Preferences
|
||||
|
||||
GSD preferences live in YAML frontmatter markdown files. You can configure them globally or per-project.
|
||||
|
||||
## Managing Preferences
|
||||
|
||||
```
|
||||
/gsd prefs # open the global preferences wizard
|
||||
/gsd prefs project # open the project preferences wizard
|
||||
/gsd prefs status # show current values and where they come from
|
||||
```
|
||||
|
||||
## Preference Files
|
||||
|
||||
| Scope | Path | Applies To |
|
||||
|-------|------|-----------|
|
||||
| Global | `~/.gsd/PREFERENCES.md` | All projects |
|
||||
| Project | `.gsd/PREFERENCES.md` | Current project only |
|
||||
|
||||
**How they merge:**
|
||||
- **Scalar fields** (`budget_ceiling`, `token_profile`): project wins if defined
|
||||
- **Array fields** (`always_use_skills`, etc.): concatenated (global first, then project)
|
||||
- **Object fields** (`models`, `git`, `auto_supervisor`): shallow-merged, project overrides per-key
|
||||
|
||||
## Quick Example
|
||||
|
||||
```yaml
|
||||
---
|
||||
version: 1
|
||||
|
||||
# Model selection
|
||||
models:
|
||||
research: claude-sonnet-4-6
|
||||
planning: claude-opus-4-6
|
||||
execution: claude-sonnet-4-6
|
||||
completion: claude-sonnet-4-6
|
||||
|
||||
# Token optimization
|
||||
token_profile: balanced
|
||||
|
||||
# Budget
|
||||
budget_ceiling: 25.00
|
||||
budget_enforcement: pause
|
||||
|
||||
# Supervision
|
||||
auto_supervisor:
|
||||
soft_timeout_minutes: 15
|
||||
hard_timeout_minutes: 25
|
||||
|
||||
# Git
|
||||
git:
|
||||
auto_push: true
|
||||
merge_strategy: squash
|
||||
isolation: worktree
|
||||
|
||||
# Verification
|
||||
verification_commands:
|
||||
- npm run lint
|
||||
- npm run test
|
||||
|
||||
# Notifications
|
||||
notifications:
|
||||
on_milestone: true
|
||||
on_attention: true
|
||||
---
|
||||
```
|
||||
|
||||
## All Settings
|
||||
|
||||
### `models`
|
||||
|
||||
Per-phase model selection. See [Choosing a Model](../getting-started/choosing-a-model.md).
|
||||
|
||||
```yaml
|
||||
models:
|
||||
research: claude-sonnet-4-6
|
||||
planning:
|
||||
model: claude-opus-4-6
|
||||
fallbacks:
|
||||
- openrouter/z-ai/glm-5
|
||||
execution: claude-sonnet-4-6
|
||||
execution_simple: claude-haiku-4-5
|
||||
completion: claude-sonnet-4-6
|
||||
subagent: claude-sonnet-4-6
|
||||
```
|
||||
|
||||
### `token_profile`
|
||||
|
||||
Coordinates model selection, phase skipping, and context compression. Values: `budget`, `balanced` (default), `quality`. See [Token Optimization](../features/token-optimization.md).
|
||||
|
||||
### `budget_ceiling`
|
||||
|
||||
Maximum USD to spend during auto mode:
|
||||
|
||||
```yaml
|
||||
budget_ceiling: 50.00
|
||||
```
|
||||
|
||||
### `budget_enforcement`
|
||||
|
||||
What happens when the ceiling is reached:
|
||||
|
||||
| Value | Behavior |
|
||||
|-------|----------|
|
||||
| `warn` | Log a warning, continue |
|
||||
| `pause` | Pause auto mode (default) |
|
||||
| `halt` | Stop auto mode entirely |
|
||||
|
||||
### `auto_supervisor`
|
||||
|
||||
Timeout thresholds for auto mode:
|
||||
|
||||
```yaml
|
||||
auto_supervisor:
|
||||
soft_timeout_minutes: 20 # warn AI to wrap up
|
||||
idle_timeout_minutes: 10 # detect stalls
|
||||
hard_timeout_minutes: 30 # pause auto mode
|
||||
```
|
||||
|
||||
### `verification_commands`
|
||||
|
||||
Shell commands that run after every task execution:
|
||||
|
||||
```yaml
|
||||
verification_commands:
|
||||
- npm run lint
|
||||
- npm run test
|
||||
verification_auto_fix: true # auto-retry on failure (default)
|
||||
verification_max_retries: 2 # max attempts (default: 2)
|
||||
```
|
||||
|
||||
### `phases`
|
||||
|
||||
Fine-grained control over which phases run:
|
||||
|
||||
```yaml
|
||||
phases:
|
||||
skip_research: false
|
||||
skip_reassess: false
|
||||
skip_slice_research: true
|
||||
reassess_after_slice: true
|
||||
require_slice_discussion: false
|
||||
```
|
||||
|
||||
### `skill_discovery`
|
||||
|
||||
| Value | Behavior |
|
||||
|-------|----------|
|
||||
| `auto` | Skills found and applied automatically |
|
||||
| `suggest` | Skills identified but not auto-applied (default) |
|
||||
| `off` | Skill discovery disabled |
|
||||
|
||||
### `dynamic_routing`
|
||||
|
||||
Automatic model selection by task complexity. See [Dynamic Model Routing](../features/dynamic-model-routing.md).
|
||||
|
||||
```yaml
|
||||
dynamic_routing:
|
||||
enabled: true
|
||||
escalate_on_failure: true
|
||||
budget_pressure: true
|
||||
```
|
||||
|
||||
### `git`
|
||||
|
||||
Git behavior. See [Git & Worktrees](git-settings.md).
|
||||
|
||||
```yaml
|
||||
git:
|
||||
auto_push: false
|
||||
merge_strategy: squash
|
||||
isolation: worktree
|
||||
commit_docs: true
|
||||
auto_pr: false
|
||||
```
|
||||
|
||||
### `notifications`
|
||||
|
||||
See [Notifications](notifications.md).
|
||||
|
||||
```yaml
|
||||
notifications:
|
||||
enabled: true
|
||||
on_complete: true
|
||||
on_error: true
|
||||
on_milestone: true
|
||||
on_attention: true
|
||||
```
|
||||
|
||||
### `remote_questions`
|
||||
|
||||
Route questions to Slack, Discord, or Telegram. See [Remote Questions](../features/remote-questions.md).
|
||||
|
||||
```yaml
|
||||
remote_questions:
|
||||
channel: discord
|
||||
channel_id: "1234567890123456789"
|
||||
timeout_minutes: 5
|
||||
```
|
||||
|
||||
### `parallel`
|
||||
|
||||
Run multiple milestones simultaneously. See [Parallel Orchestration](../features/parallel.md).
|
||||
|
||||
```yaml
|
||||
parallel:
|
||||
enabled: false
|
||||
max_workers: 2
|
||||
budget_ceiling: 50.00
|
||||
```
|
||||
|
||||
### `custom_instructions`
|
||||
|
||||
Durable instructions appended to every session:
|
||||
|
||||
```yaml
|
||||
custom_instructions:
|
||||
- "Always use TypeScript strict mode"
|
||||
- "Prefer functional patterns over classes"
|
||||
```
|
||||
|
||||
For project-specific patterns, use `.gsd/KNOWLEDGE.md` instead — it's injected into every agent prompt automatically.
|
||||
|
||||
### `context_pause_threshold`
|
||||
|
||||
Context window usage percentage at which auto mode pauses:
|
||||
|
||||
```yaml
|
||||
context_pause_threshold: 80 # pause at 80%
|
||||
```
|
||||
|
||||
### `show_token_cost`
|
||||
|
||||
Show per-prompt and cumulative session token cost in the footer:
|
||||
|
||||
```yaml
|
||||
show_token_cost: true
|
||||
```
|
||||
277
gitbook/configuration/providers.md
Normal file
277
gitbook/configuration/providers.md
Normal file
|
|
@ -0,0 +1,277 @@
|
|||
# Provider Setup
|
||||
|
||||
Step-by-step setup instructions for every LLM provider GSD supports. If you ran the onboarding wizard (`gsd config`) and picked a provider, you may already be configured — check with `/model` inside a session.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| Provider | Auth Method | Environment Variable |
|
||||
|----------|-------------|---------------------|
|
||||
| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` |
|
||||
| OpenAI | API key | `OPENAI_API_KEY` |
|
||||
| Google Gemini | API key | `GEMINI_API_KEY` |
|
||||
| OpenRouter | API key | `OPENROUTER_API_KEY` |
|
||||
| Groq | API key | `GROQ_API_KEY` |
|
||||
| xAI (Grok) | API key | `XAI_API_KEY` |
|
||||
| Mistral | API key | `MISTRAL_API_KEY` |
|
||||
| GitHub Copilot | OAuth | `GH_TOKEN` |
|
||||
| Amazon Bedrock | IAM credentials | `AWS_PROFILE` or `AWS_ACCESS_KEY_ID` |
|
||||
| Vertex AI | ADC | `GOOGLE_APPLICATION_CREDENTIALS` |
|
||||
| Azure OpenAI | API key | `AZURE_OPENAI_API_KEY` |
|
||||
| Ollama | None (local) | — |
|
||||
| LM Studio | None (local) | — |
|
||||
| vLLM / SGLang | None (local) | — |
|
||||
|
||||
## Built-in Providers
|
||||
|
||||
### Anthropic (Claude)
|
||||
|
||||
**Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching.
|
||||
|
||||
**Option A — Browser sign-in (recommended):**
|
||||
|
||||
```bash
|
||||
gsd config
|
||||
# Choose "Sign in with your browser" → "Anthropic (Claude)"
|
||||
```
|
||||
|
||||
Or inside a session: `/login`
|
||||
|
||||
**Option B — API key:**
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_API_KEY="sk-ant-..."
|
||||
```
|
||||
|
||||
### OpenAI
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY="sk-..."
|
||||
```
|
||||
|
||||
Or run `gsd config` and choose "Paste an API key" then "OpenAI".
|
||||
|
||||
### Google Gemini
|
||||
|
||||
```bash
|
||||
export GEMINI_API_KEY="..."
|
||||
```
|
||||
|
||||
### OpenRouter
|
||||
|
||||
OpenRouter aggregates 200+ models from multiple providers behind a single API key.
|
||||
|
||||
1. Get a key at [openrouter.ai/keys](https://openrouter.ai/keys)
|
||||
2. Set it:
|
||||
```bash
|
||||
export OPENROUTER_API_KEY="sk-or-..."
|
||||
```
|
||||
3. In GSD, type `/model` to select an OpenRouter model (prefixed with `openrouter/`)
|
||||
|
||||
To add models not in the built-in list, add them to `~/.gsd/agent/models.json`. See [Custom Models](custom-models.md).
|
||||
|
||||
### Groq
|
||||
|
||||
```bash
|
||||
export GROQ_API_KEY="gsk_..."
|
||||
```
|
||||
|
||||
### xAI (Grok)
|
||||
|
||||
```bash
|
||||
export XAI_API_KEY="xai-..."
|
||||
```
|
||||
|
||||
### Mistral
|
||||
|
||||
```bash
|
||||
export MISTRAL_API_KEY="..."
|
||||
```
|
||||
|
||||
### GitHub Copilot
|
||||
|
||||
Uses OAuth — sign in through the browser:
|
||||
|
||||
```bash
|
||||
gsd config
|
||||
# Choose "Sign in with your browser" → "GitHub Copilot"
|
||||
```
|
||||
|
||||
Requires an active GitHub Copilot subscription.
|
||||
|
||||
### Amazon Bedrock
|
||||
|
||||
Bedrock uses AWS IAM credentials:
|
||||
|
||||
```bash
|
||||
# Named profile
|
||||
export AWS_PROFILE="my-profile"
|
||||
|
||||
# Or IAM keys
|
||||
export AWS_ACCESS_KEY_ID="AKIA..."
|
||||
export AWS_SECRET_ACCESS_KEY="..."
|
||||
export AWS_REGION="us-east-1"
|
||||
|
||||
# Or bearer token
|
||||
export AWS_BEARER_TOKEN_BEDROCK="..."
|
||||
```
|
||||
|
||||
ECS task roles and IRSA (Kubernetes) are also detected automatically.
|
||||
|
||||
### Anthropic on Vertex AI
|
||||
|
||||
```bash
|
||||
gcloud auth application-default login
|
||||
export ANTHROPIC_VERTEX_PROJECT_ID="my-project-id"
|
||||
```
|
||||
|
||||
### Azure OpenAI
|
||||
|
||||
```bash
|
||||
export AZURE_OPENAI_API_KEY="..."
|
||||
```
|
||||
|
||||
## Local Providers
|
||||
|
||||
Local providers run on your machine. They require a `models.json` configuration file at `~/.gsd/agent/models.json` because GSD needs to know the endpoint URL and available models.
|
||||
|
||||
The file reloads each time you open `/model` — no restart needed.
|
||||
|
||||
### Ollama
|
||||
|
||||
1. Install and start Ollama:
|
||||
```bash
|
||||
brew install ollama
|
||||
ollama serve
|
||||
```
|
||||
|
||||
2. Pull a model:
|
||||
```bash
|
||||
ollama pull llama3.1:8b
|
||||
```
|
||||
|
||||
3. Create `~/.gsd/agent/models.json`:
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"ollama": {
|
||||
"baseUrl": "http://localhost:11434/v1",
|
||||
"api": "openai-completions",
|
||||
"apiKey": "ollama",
|
||||
"compat": {
|
||||
"supportsDeveloperRole": false,
|
||||
"supportsReasoningEffort": false
|
||||
},
|
||||
"models": [
|
||||
{ "id": "llama3.1:8b" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
4. In GSD, type `/model` and select your Ollama model.
|
||||
|
||||
### LM Studio
|
||||
|
||||
1. Install [LM Studio](https://lmstudio.ai)
|
||||
2. Go to "Local Server" tab, load a model, click "Start Server" (default port 1234)
|
||||
3. Create `~/.gsd/agent/models.json`:
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"lm-studio": {
|
||||
"baseUrl": "http://localhost:1234/v1",
|
||||
"api": "openai-completions",
|
||||
"apiKey": "lm-studio",
|
||||
"compat": {
|
||||
"supportsDeveloperRole": false,
|
||||
"supportsReasoningEffort": false
|
||||
},
|
||||
"models": [
|
||||
{ "id": "your-model-name" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### vLLM
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"vllm": {
|
||||
"baseUrl": "http://localhost:8000/v1",
|
||||
"api": "openai-completions",
|
||||
"apiKey": "vllm",
|
||||
"compat": {
|
||||
"supportsDeveloperRole": false,
|
||||
"supportsReasoningEffort": false,
|
||||
"supportsUsageInStreaming": false
|
||||
},
|
||||
"models": [
|
||||
{ "id": "meta-llama/Llama-3.1-8B-Instruct" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### SGLang
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"sglang": {
|
||||
"baseUrl": "http://localhost:30000/v1",
|
||||
"api": "openai-completions",
|
||||
"apiKey": "sglang",
|
||||
"compat": {
|
||||
"supportsDeveloperRole": false,
|
||||
"supportsReasoningEffort": false
|
||||
},
|
||||
"models": [
|
||||
{ "id": "meta-llama/Llama-3.1-8B-Instruct" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Custom OpenAI-Compatible Endpoints
|
||||
|
||||
Any server that implements the OpenAI Chat Completions API can work with GSD — proxies (LiteLLM, Portkey, Helicone), self-hosted inference, new providers.
|
||||
|
||||
**Quickest path:**
|
||||
|
||||
```bash
|
||||
gsd config
|
||||
# Choose "Paste an API key" → "Custom (OpenAI-compatible)"
|
||||
# Enter: base URL, API key, model ID
|
||||
```
|
||||
|
||||
This writes `~/.gsd/agent/models.json` for you. See [Custom Models](custom-models.md) for manual setup.
|
||||
|
||||
## Verifying Your Setup
|
||||
|
||||
1. Launch GSD: `gsd`
|
||||
2. Check available models: `/model`
|
||||
3. Select your model from the picker
|
||||
4. Send a test message to confirm it responds
|
||||
|
||||
If the model doesn't appear, check:
|
||||
- The environment variable is set in the current shell
|
||||
- `models.json` is valid JSON
|
||||
- The server is running (for local providers)
|
||||
|
||||
## Common Issues
|
||||
|
||||
| Problem | Cause | Fix |
|
||||
|---------|-------|-----|
|
||||
| "Authentication failed" with valid key | Key not visible to GSD | Export in the same terminal, or save via `gsd config` |
|
||||
| OpenRouter models not in `/model` | No API key set | Set `OPENROUTER_API_KEY` and restart |
|
||||
| Ollama returns empty responses | Server not running or model not pulled | Run `ollama serve` and `ollama pull <model>` |
|
||||
| LM Studio model ID mismatch | ID doesn't match server | Check LM Studio's server tab for the exact identifier |
|
||||
| `developer` role error | Local server doesn't support it | Set `compat.supportsDeveloperRole: false` |
|
||||
| `stream_options` error | Server doesn't support streaming usage | Set `compat.supportsUsageInStreaming: false` |
|
||||
| Cost shows $0.00 | Default for custom models | Add `cost` field to model definition |
|
||||
183
gitbook/core-concepts/auto-mode.md
Normal file
183
gitbook/core-concepts/auto-mode.md
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
# Auto Mode
|
||||
|
||||
Auto mode is GSD's autonomous execution engine. Run `/gsd auto`, walk away, come back to built software with clean git history.
|
||||
|
||||
## Starting Auto Mode
|
||||
|
||||
```
|
||||
/gsd auto
|
||||
```
|
||||
|
||||
GSD reads `.gsd/STATE.md`, determines the next unit of work, creates a fresh AI session with all relevant context, and lets the AI execute. When it finishes, GSD reads disk state again and dispatches the next unit. This continues until the milestone is complete.
|
||||
|
||||
## The Execution Loop
|
||||
|
||||
Each slice flows through phases automatically:
|
||||
|
||||
```
|
||||
Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice
|
||||
↓ (all done)
|
||||
Validate Milestone
|
||||
```
|
||||
|
||||
- **Plan** — scouts the codebase, researches docs, decomposes the slice into tasks
|
||||
- **Execute** — runs each task in a fresh context window
|
||||
- **Complete** — writes summary, UAT script, marks roadmap, commits
|
||||
- **Reassess** — checks if the roadmap still makes sense after what was learned
|
||||
- **Validate** — after all slices, verifies success criteria were actually met
|
||||
|
||||
## Controlling Auto Mode
|
||||
|
||||
### Pause
|
||||
|
||||
Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume.
|
||||
|
||||
### Resume
|
||||
|
||||
```
|
||||
/gsd auto
|
||||
```
|
||||
|
||||
Auto mode reads disk state and picks up where it left off.
|
||||
|
||||
### Stop
|
||||
|
||||
```
|
||||
/gsd stop
|
||||
```
|
||||
|
||||
Stops auto mode gracefully. Can be run from a different terminal.
|
||||
|
||||
### Steer
|
||||
|
||||
```
|
||||
/gsd steer
|
||||
```
|
||||
|
||||
Modify plan documents during execution without stopping. Changes are picked up at the next phase boundary.
|
||||
|
||||
### Capture Thoughts
|
||||
|
||||
```
|
||||
/gsd capture "add rate limiting to API endpoints"
|
||||
```
|
||||
|
||||
Fire-and-forget thought capture. Captures are triaged automatically between tasks without pausing execution. See [Captures & Triage](../features/captures.md).
|
||||
|
||||
## Fresh Session Per Unit
|
||||
|
||||
Every task gets a clean AI context window. No accumulated garbage, no quality degradation from context bloat. The dispatch prompt includes everything needed — task plans, prior summaries, decisions, dependency context — so the AI starts oriented.
|
||||
|
||||
## Git Isolation
|
||||
|
||||
GSD isolates milestone work using one of three modes:
|
||||
|
||||
| Mode | How It Works | Best For |
|
||||
|------|-------------|----------|
|
||||
| `worktree` (default) | Each milestone gets its own directory and branch | Most projects |
|
||||
| `branch` | Work happens in the project root on a milestone branch | Submodule-heavy repos |
|
||||
| `none` | Work happens directly on your current branch | Hot-reload workflows |
|
||||
|
||||
In worktree mode, all commits are squash-merged to main as one clean commit when the milestone completes. See [Git & Worktrees](../configuration/git-settings.md).
|
||||
|
||||
## Crash Recovery
|
||||
|
||||
If a session dies, the next `/gsd auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context.
|
||||
|
||||
In headless mode (`gsd headless auto`), crashes trigger automatic restart with exponential backoff (5s → 10s → 30s, up to 3 attempts). Combined with crash recovery, this enables true overnight "fire and forget" execution.
|
||||
|
||||
## Provider Error Recovery
|
||||
|
||||
GSD handles provider errors automatically:
|
||||
|
||||
| Error Type | Examples | What Happens |
|
||||
|-----------|----------|-------------|
|
||||
| Rate limit | 429, "too many requests" | Auto-resumes after cooldown (60s or retry-after header) |
|
||||
| Server error | 500, 502, 503, "overloaded" | Auto-resumes after 30s |
|
||||
| Permanent | "unauthorized", "invalid key" | Pauses — requires manual resume |
|
||||
|
||||
No manual intervention needed for transient errors.
|
||||
|
||||
## Timeout Supervision
|
||||
|
||||
Three timeout tiers prevent runaway sessions:
|
||||
|
||||
| Timeout | Default | What Happens |
|
||||
|---------|---------|-------------|
|
||||
| Soft | 20 min | Warns the AI to wrap up |
|
||||
| Idle | 10 min | Detects stalls, intervenes |
|
||||
| Hard | 30 min | Pauses auto mode |
|
||||
|
||||
Configure in preferences:
|
||||
|
||||
```yaml
|
||||
auto_supervisor:
|
||||
soft_timeout_minutes: 20
|
||||
idle_timeout_minutes: 10
|
||||
hard_timeout_minutes: 30
|
||||
```
|
||||
|
||||
## Verification Gates
|
||||
|
||||
Configure shell commands that run automatically after every task:
|
||||
|
||||
```yaml
|
||||
verification_commands:
|
||||
- npm run lint
|
||||
- npm run test
|
||||
verification_auto_fix: true # auto-retry on failure
|
||||
verification_max_retries: 2 # max retry attempts
|
||||
```
|
||||
|
||||
If verification fails, the AI sees the output and attempts to fix the issues before advancing. This ensures quality gates are enforced mechanically.
|
||||
|
||||
## Slice Discussion Gate
|
||||
|
||||
For projects requiring human review before each slice:
|
||||
|
||||
```yaml
|
||||
require_slice_discussion: true
|
||||
```
|
||||
|
||||
Auto mode pauses before each slice, showing the plan for your approval before building.
|
||||
|
||||
## Stuck Detection
|
||||
|
||||
GSD uses sliding-window analysis to detect stuck loops — not just "same unit dispatched twice" but also cycles like A→B→A→B. On detection, GSD retries once with a diagnostic prompt. If it fails again, auto mode stops with details so you can intervene.
|
||||
|
||||
## Cost Tracking
|
||||
|
||||
Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause auto mode before overspending. See [Cost Management](../features/cost-management.md).
|
||||
|
||||
## Dashboard
|
||||
|
||||
`Ctrl+Alt+G` or `/gsd status` shows real-time progress:
|
||||
|
||||
- Current milestone, slice, and task
|
||||
- Auto mode elapsed time and phase
|
||||
- Per-unit cost and token breakdown
|
||||
- Cost projections
|
||||
- Completed and in-progress units
|
||||
- Pending capture count
|
||||
- Parallel worker status (when running parallel milestones)
|
||||
|
||||
## HTML Reports
|
||||
|
||||
After a milestone completes, GSD generates a self-contained HTML report in `.gsd/reports/` with project summary, progress tree, dependency graph, cost metrics, timeline, and changelog. Generate manually with:
|
||||
|
||||
```
|
||||
/gsd export --html
|
||||
/gsd export --html --all # all milestones
|
||||
```
|
||||
|
||||
## Diagnostic Tools
|
||||
|
||||
If auto mode has issues, GSD provides two diagnostic tools:
|
||||
|
||||
- **`/gsd doctor`** — validates `.gsd/` integrity, checks referential consistency, fixes structural issues
|
||||
- **`/gsd forensics`** — full post-mortem debugger with anomaly detection, unit traces, metrics analysis, and AI-guided investigation
|
||||
|
||||
```
|
||||
/gsd doctor
|
||||
/gsd forensics [optional problem description]
|
||||
```
|
||||
104
gitbook/core-concepts/project-structure.md
Normal file
104
gitbook/core-concepts/project-structure.md
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
# How GSD Organizes Work
|
||||
|
||||
GSD uses a three-level hierarchy to break projects into manageable pieces that an AI can execute reliably.
|
||||
|
||||
## The Hierarchy
|
||||
|
||||
```
|
||||
Milestone → a shippable version (4-10 slices)
|
||||
Slice → one demoable vertical feature (1-7 tasks)
|
||||
Task → one context-window-sized unit of work
|
||||
```
|
||||
|
||||
### Milestones
|
||||
|
||||
A milestone is a shippable version of your project — an MVP, a major release, or a feature set that delivers standalone value. Milestones typically contain 4-10 slices.
|
||||
|
||||
Examples:
|
||||
- "MVP with user auth, dashboard, and settings"
|
||||
- "v2.0 with real-time collaboration and API v2"
|
||||
- "Security hardening milestone"
|
||||
|
||||
### Slices
|
||||
|
||||
A slice is one demoable, vertical capability within a milestone. It cuts across layers (database, backend, frontend) to deliver something you could show to a user. Slices contain 1-7 tasks.
|
||||
|
||||
Examples:
|
||||
- "User authentication with JWT"
|
||||
- "Dashboard layout with charts"
|
||||
- "API rate limiting"
|
||||
|
||||
### Tasks
|
||||
|
||||
A task is the smallest unit of work — something that fits in one AI context window. If a task can't be completed in a single AI session, it's broken into smaller tasks.
|
||||
|
||||
Examples:
|
||||
- "Create the User model and migration"
|
||||
- "Implement JWT middleware"
|
||||
- "Build the login form component"
|
||||
|
||||
## The `.gsd/` Directory
|
||||
|
||||
All project state lives on disk in a `.gsd/` directory at your project root:
|
||||
|
||||
```
|
||||
.gsd/
|
||||
PROJECT.md — living description of what the project is
|
||||
REQUIREMENTS.md — requirement contract (active/validated/deferred)
|
||||
DECISIONS.md — append-only architectural decisions log
|
||||
KNOWLEDGE.md — cross-session rules, patterns, and lessons
|
||||
RUNTIME.md — runtime context: API endpoints, env vars, services
|
||||
STATE.md — quick-glance status of current work
|
||||
PREFERENCES.md — project-level preferences (optional)
|
||||
milestones/
|
||||
M001/
|
||||
M001-ROADMAP.md — slice plan with risk levels and dependencies
|
||||
M001-CONTEXT.md — scope and goals from discussion phase
|
||||
slices/
|
||||
S01/
|
||||
S01-PLAN.md — task decomposition for this slice
|
||||
S01-SUMMARY.md — what was built and what changed
|
||||
S01-UAT.md — human test script
|
||||
tasks/
|
||||
T01-PLAN.md — detailed plan for this task
|
||||
T01-SUMMARY.md — what the task accomplished
|
||||
```
|
||||
|
||||
### Key Files
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `PROJECT.md` | High-level project description, updated as the project evolves |
|
||||
| `REQUIREMENTS.md` | Formal requirement contract — tracks what's active, validated, and deferred |
|
||||
| `DECISIONS.md` | Append-only log of architectural decisions with rationale |
|
||||
| `KNOWLEDGE.md` | Rules, patterns, and lessons learned across sessions — GSD reads this at the start of every task |
|
||||
| `RUNTIME.md` | Runtime context like API URLs, ports, and environment variables |
|
||||
| `STATE.md` | Current status at a glance — auto-generated, don't edit manually |
|
||||
|
||||
## How Work Flows
|
||||
|
||||
Each slice flows through phases:
|
||||
|
||||
```
|
||||
Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice
|
||||
```
|
||||
|
||||
1. **Plan** — GSD scouts the codebase, researches relevant docs, and decomposes the slice into tasks with clear requirements
|
||||
2. **Execute** — Each task runs in a fresh AI session with focused context
|
||||
3. **Complete** — GSD writes summaries, generates a UAT script, and commits
|
||||
4. **Reassess** — The roadmap is checked against reality — slices may be reordered, added, or removed
|
||||
5. **Next Slice** — The loop continues until all slices are done
|
||||
|
||||
After all slices complete, a **milestone validation** gate checks that success criteria were actually met before sealing the milestone.
|
||||
|
||||
## Adding Knowledge
|
||||
|
||||
GSD maintains a knowledge base that persists across sessions. Add rules, patterns, or lessons:
|
||||
|
||||
```
|
||||
/gsd knowledge rule "Always use parameterized queries for database access"
|
||||
/gsd knowledge pattern "Service classes go in src/services/"
|
||||
/gsd knowledge lesson "The OAuth flow requires the redirect URL to match exactly"
|
||||
```
|
||||
|
||||
This knowledge is injected into every task prompt automatically.
|
||||
54
gitbook/core-concepts/step-mode.md
Normal file
54
gitbook/core-concepts/step-mode.md
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# Step Mode
|
||||
|
||||
Step mode is GSD's interactive, one-step-at-a-time workflow. You stay in the loop, reviewing output between each step.
|
||||
|
||||
## Starting Step Mode
|
||||
|
||||
```
|
||||
/gsd
|
||||
```
|
||||
|
||||
GSD reads the state of your `.gsd/` directory and presents a wizard showing what's completed and what's next. It then executes one unit of work and pauses.
|
||||
|
||||
## How It Works
|
||||
|
||||
Step mode adapts to your project's current state:
|
||||
|
||||
| State | What Happens |
|
||||
|-------|-------------|
|
||||
| No `.gsd/` directory | Starts a discussion flow to capture your project vision |
|
||||
| Milestone exists, no roadmap | Opens a discussion or research phase for the milestone |
|
||||
| Roadmap exists, slices pending | Plans the next slice or executes the next task |
|
||||
| Mid-task | Resumes where you left off |
|
||||
|
||||
After each unit completes, you see results and decide what to do next. This is ideal for:
|
||||
|
||||
- New projects where you want to shape the architecture
|
||||
- Critical work where you want to review each step
|
||||
- Learning how GSD works before trusting auto mode
|
||||
|
||||
## Steering During Step Mode
|
||||
|
||||
Between steps, you can:
|
||||
|
||||
- **Discuss** — `/gsd discuss` to talk through architecture decisions
|
||||
- **Skip** — `/gsd skip` to prevent a unit from being dispatched
|
||||
- **Undo** — `/gsd undo` to revert the last completed unit
|
||||
- **Switch to auto** — `/gsd auto` to let GSD continue autonomously
|
||||
|
||||
## When to Use Step Mode
|
||||
|
||||
- **First milestone** — Review GSD's work before trusting it to run solo
|
||||
- **Architectural decisions** — When you want to guide the approach
|
||||
- **Unfamiliar codebases** — When you want to ensure GSD understands the project
|
||||
- **High-stakes changes** — When mistakes would be costly
|
||||
|
||||
## Transitioning to Auto Mode
|
||||
|
||||
Once you're comfortable with GSD's approach, switch to auto mode:
|
||||
|
||||
```
|
||||
/gsd auto
|
||||
```
|
||||
|
||||
You can always press **Escape** to pause auto mode and return to step-by-step control.
|
||||
54
gitbook/features/captures.md
Normal file
54
gitbook/features/captures.md
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# Captures & Triage
|
||||
|
||||
Captures let you fire-and-forget thoughts during auto-mode execution. Instead of pausing auto mode to steer, capture ideas, bugs, or scope changes and let GSD triage them at natural seams between tasks.
|
||||
|
||||
## Quick Start
|
||||
|
||||
While auto mode is running (or any time):
|
||||
|
||||
```
|
||||
/gsd capture "add rate limiting to the API endpoints"
|
||||
/gsd capture "the auth flow should support OAuth, not just JWT"
|
||||
```
|
||||
|
||||
Captures are appended to `.gsd/CAPTURES.md` and triaged automatically between tasks.
|
||||
|
||||
## How It Works
|
||||
|
||||
```
|
||||
Capture → Triage → Confirm → Resolve → Resume
|
||||
```
|
||||
|
||||
1. **Capture** — your thought is saved with a timestamp
|
||||
2. **Triage** — between tasks, GSD classifies each capture
|
||||
3. **Confirm** — you see the proposed resolution and approve or adjust
|
||||
4. **Resolve** — the resolution is applied
|
||||
5. **Resume** — auto mode continues
|
||||
|
||||
## Classification Types
|
||||
|
||||
Each capture is classified into one of five types:
|
||||
|
||||
| Type | Meaning | What Happens |
|
||||
|------|---------|-------------|
|
||||
| `quick-task` | Small, self-contained fix | Executed immediately |
|
||||
| `inject` | New task needed in current slice | Task added to active slice |
|
||||
| `defer` | Important but not urgent | Deferred to roadmap reassessment |
|
||||
| `replan` | Changes the current approach | Triggers slice replan |
|
||||
| `note` | Informational, no action needed | Acknowledged, no changes |
|
||||
|
||||
Plan-modifying resolutions (inject, replan) require your confirmation.
|
||||
|
||||
## Manual Triage
|
||||
|
||||
Trigger triage manually at any time:
|
||||
|
||||
```
|
||||
/gsd triage
|
||||
```
|
||||
|
||||
Useful when you've accumulated several captures and want to process them before the next natural seam.
|
||||
|
||||
## Dashboard Integration
|
||||
|
||||
The progress widget shows a pending capture count badge when captures are waiting for triage.
|
||||
74
gitbook/features/cost-management.md
Normal file
74
gitbook/features/cost-management.md
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# Cost Management
|
||||
|
||||
GSD tracks token usage and cost for every unit of work during auto mode. This data powers the dashboard, budget enforcement, and cost projections.
|
||||
|
||||
## Viewing Costs
|
||||
|
||||
**Dashboard:** Press `Ctrl+Alt+G` or type `/gsd status` for real-time cost breakdown.
|
||||
|
||||
**Visualizer:** `/gsd visualize` → Metrics tab for detailed charts.
|
||||
|
||||
**Aggregations:**
|
||||
- By phase (research, planning, execution, completion, reassessment)
|
||||
- By slice
|
||||
- By model
|
||||
- Project totals
|
||||
|
||||
## Budget Ceiling
|
||||
|
||||
Set a maximum spend:
|
||||
|
||||
```yaml
|
||||
budget_ceiling: 50.00
|
||||
```
|
||||
|
||||
### Enforcement Modes
|
||||
|
||||
```yaml
|
||||
budget_enforcement: pause # default when ceiling is set
|
||||
```
|
||||
|
||||
| Mode | What Happens |
|
||||
|------|-------------|
|
||||
| `warn` | Log a warning, keep going |
|
||||
| `pause` | Pause auto mode, wait for you |
|
||||
| `halt` | Stop auto mode entirely |
|
||||
|
||||
## Cost Projections
|
||||
|
||||
Once at least two slices have completed, GSD projects the remaining cost:
|
||||
|
||||
```
|
||||
Projected remaining: $12.40 ($6.20/slice avg × 2 remaining)
|
||||
```
|
||||
|
||||
## Budget Pressure
|
||||
|
||||
When approaching the budget ceiling, GSD automatically uses cheaper models:
|
||||
|
||||
| Budget Used | Effect |
|
||||
|------------|--------|
|
||||
| < 50% | No adjustment |
|
||||
| 50-75% | Standard tasks downgrade to lighter models |
|
||||
| 75-90% | More aggressive downgrading |
|
||||
| > 90% | Nearly everything downgrades; only complex tasks stay at standard |
|
||||
|
||||
This spreads your budget across remaining work instead of exhausting it early.
|
||||
|
||||
## Token Profiles & Cost
|
||||
|
||||
| Profile | Typical Savings | How |
|
||||
|---------|----------------|-----|
|
||||
| `budget` | 40-60% | Cheaper models, phase skipping, minimal context |
|
||||
| `balanced` | 10-20% | Default models, standard context |
|
||||
| `quality` | 0% (baseline) | All phases, full context |
|
||||
|
||||
## Tips
|
||||
|
||||
- Start with `balanced` profile and a generous `budget_ceiling` to establish baseline costs
|
||||
- Check `/gsd status` after a few slices to see per-slice cost averages
|
||||
- Switch to `budget` for well-understood, repetitive work
|
||||
- Use `quality` only when architectural decisions are being made
|
||||
- Use per-phase model selection to save: Opus for planning, Sonnet for execution
|
||||
- Enable `dynamic_routing` for automatic model downgrading on simple tasks
|
||||
- Use `/gsd visualize` → Metrics tab to see where your budget is going
|
||||
88
gitbook/features/dynamic-model-routing.md
Normal file
88
gitbook/features/dynamic-model-routing.md
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
# Dynamic Model Routing
|
||||
|
||||
Dynamic model routing automatically selects cheaper models for simple work and reserves expensive models for complex tasks. This reduces cost by 20-50% without sacrificing quality where it matters.
|
||||
|
||||
## Enabling
|
||||
|
||||
```yaml
|
||||
dynamic_routing:
|
||||
enabled: true
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
Each unit passes through two stages:
|
||||
|
||||
1. **Complexity classification** — classifies work as light, standard, or heavy
|
||||
2. **Capability scoring** — within the tier, ranks models by how well they match the task
|
||||
|
||||
**Key rule:** Your configured model is always the ceiling — routing never upgrades beyond what you've set.
|
||||
|
||||
| Tier | Typical Work | Model Level |
|
||||
|------|-------------|-------------|
|
||||
| Light | Slice completion, UAT, hooks | Haiku-class |
|
||||
| Standard | Research, planning, execution | Sonnet-class |
|
||||
| Heavy | Replanning, roadmap reassessment | Opus-class |
|
||||
|
||||
## Configuration
|
||||
|
||||
```yaml
|
||||
dynamic_routing:
|
||||
enabled: true
|
||||
tier_models: # optional: explicit model per tier
|
||||
light: claude-haiku-4-5
|
||||
standard: claude-sonnet-4-6
|
||||
heavy: claude-opus-4-6
|
||||
escalate_on_failure: true # bump tier on failure (default)
|
||||
budget_pressure: true # auto-downgrade near budget ceiling (default)
|
||||
cross_provider: true # consider models from other providers (default)
|
||||
capability_routing: true # score models by task fit (default)
|
||||
```
|
||||
|
||||
### Escalate on Failure
|
||||
|
||||
When a task fails at a given tier, the router escalates to the next tier on retry: Light → Standard → Heavy. This prevents cheap models from burning retries on work that needs more reasoning.
|
||||
|
||||
### Budget Pressure
|
||||
|
||||
When approaching the budget ceiling, the router progressively downgrades:
|
||||
|
||||
| Budget Used | Effect |
|
||||
|------------|--------|
|
||||
| < 50% | No adjustment |
|
||||
| 50-75% | Standard → Light |
|
||||
| 75-90% | More aggressive |
|
||||
| > 90% | Nearly everything → Light |
|
||||
|
||||
### Cross-Provider
|
||||
|
||||
When enabled, the router may select models from providers other than your primary, using the built-in cost table to find the cheapest model at each tier.
|
||||
|
||||
### Capability Routing
|
||||
|
||||
Models are scored across 7 dimensions: coding, debugging, research, reasoning, speed, long context handling, and instruction following. Different task types weight these dimensions differently — a research task prioritizes research and reasoning, while an execution task prioritizes coding and instruction following.
|
||||
|
||||
Set `capability_routing: false` to revert to simple cheapest-in-tier selection.
|
||||
|
||||
## Interaction with Token Profiles
|
||||
|
||||
Dynamic routing and token profiles work together:
|
||||
|
||||
- **Token profiles** control phase skipping and context compression
|
||||
- **Dynamic routing** controls per-unit model selection
|
||||
|
||||
The `budget` profile + dynamic routing provides maximum cost savings.
|
||||
|
||||
## Adaptive Learning
|
||||
|
||||
GSD tracks routing outcomes in `.gsd/routing-history.json`. If a tier's failure rate exceeds 20% for a given task type, future classifications are bumped up.
|
||||
|
||||
Use `/gsd rate` to submit feedback:
|
||||
|
||||
```
|
||||
/gsd rate over # too powerful — use cheaper next time
|
||||
/gsd rate ok # just right
|
||||
/gsd rate under # too weak — use stronger next time
|
||||
```
|
||||
|
||||
Feedback is weighted 2x compared to automatic outcomes.
|
||||
44
gitbook/features/github-sync.md
Normal file
44
gitbook/features/github-sync.md
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
# GitHub Sync
|
||||
|
||||
GSD can auto-sync milestones, slices, and tasks to GitHub Issues, PRs, and Milestones.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Install and authenticate the `gh` CLI:
|
||||
```bash
|
||||
gh auth login
|
||||
```
|
||||
|
||||
2. Enable in preferences:
|
||||
```yaml
|
||||
github:
|
||||
enabled: true
|
||||
repo: "owner/repo" # auto-detected from git remote if omitted
|
||||
labels: [gsd, auto-generated] # labels for created items
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/github-sync bootstrap` | Initial setup — creates GitHub Milestones, Issues, and draft PRs from current `.gsd/` state |
|
||||
| `/github-sync status` | Show sync mapping counts (milestones, slices, tasks) |
|
||||
|
||||
## How It Works
|
||||
|
||||
- Milestones → GitHub Milestones
|
||||
- Slices → GitHub Issues (linked to milestone)
|
||||
- Tasks → GitHub Issue checklists
|
||||
- Completed slices → Draft PRs
|
||||
|
||||
Sync mapping is persisted in `.gsd/.github-sync.json`. The sync is rate-limit aware — it skips when the GitHub API rate limit is low.
|
||||
|
||||
## Configuration
|
||||
|
||||
```yaml
|
||||
github:
|
||||
enabled: true
|
||||
repo: "owner/repo"
|
||||
labels: [gsd, auto-generated]
|
||||
project: "Project ID" # optional: GitHub Project board
|
||||
```
|
||||
86
gitbook/features/headless.md
Normal file
86
gitbook/features/headless.md
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
# Headless & CI Mode
|
||||
|
||||
`gsd headless` runs GSD commands without a terminal UI — designed for CI pipelines, cron jobs, and scripted automation.
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```bash
|
||||
# Run auto mode
|
||||
gsd headless
|
||||
|
||||
# Run a single unit
|
||||
gsd headless next
|
||||
|
||||
# With timeout for CI
|
||||
gsd headless --timeout 600000 auto
|
||||
|
||||
# Force a specific phase
|
||||
gsd headless dispatch plan
|
||||
|
||||
# Stream all events as JSONL
|
||||
gsd headless --json auto
|
||||
```
|
||||
|
||||
## Creating Milestones Headlessly
|
||||
|
||||
```bash
|
||||
# From a context file
|
||||
gsd headless new-milestone --context brief.md --auto
|
||||
|
||||
# From inline text
|
||||
gsd headless new-milestone --context-text "Build a REST API with auth"
|
||||
|
||||
# Pipe from stdin
|
||||
echo "Build a CLI tool" | gsd headless new-milestone --context -
|
||||
```
|
||||
|
||||
## CLI Flags
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--timeout N` | 300000 (5 min) | Overall timeout in milliseconds |
|
||||
| `--max-restarts N` | 3 | Auto-restart on crash (0 to disable) |
|
||||
| `--json` | — | Stream events as JSONL to stdout |
|
||||
| `--model ID` | — | Override model for this session |
|
||||
| `--context <file>` | — | Context file for `new-milestone` (use `-` for stdin) |
|
||||
| `--context-text <text>` | — | Inline context for `new-milestone` |
|
||||
| `--auto` | — | Chain into auto mode after milestone creation |
|
||||
|
||||
## Exit Codes
|
||||
|
||||
| Code | Meaning |
|
||||
|------|---------|
|
||||
| `0` | Complete |
|
||||
| `1` | Error or timeout |
|
||||
| `2` | Blocked |
|
||||
|
||||
## Instant State Query
|
||||
|
||||
`gsd headless query` returns a JSON snapshot of project state — no AI session, instant response (~50ms):
|
||||
|
||||
```bash
|
||||
gsd headless query | jq '.state.phase'
|
||||
# "executing"
|
||||
|
||||
gsd headless query | jq '.next'
|
||||
# {"action":"dispatch","unitType":"execute-task","unitId":"M001/S01/T03"}
|
||||
|
||||
gsd headless query | jq '.cost.total'
|
||||
# 4.25
|
||||
```
|
||||
|
||||
Any `/gsd` subcommand works as a positional argument: `gsd headless status`, `gsd headless doctor`, etc.
|
||||
|
||||
## MCP Server Mode
|
||||
|
||||
`gsd --mode mcp` runs GSD as a Model Context Protocol server over stdin/stdout, exposing all GSD tools to external AI clients:
|
||||
|
||||
```bash
|
||||
gsd --mode mcp
|
||||
```
|
||||
|
||||
Compatible with Claude Desktop, VS Code Copilot, and any MCP host.
|
||||
|
||||
## Auto-Restart
|
||||
|
||||
In headless mode, crashes trigger automatic restart with exponential backoff (5s → 10s → 30s cap, default 3 attempts). SIGINT/SIGTERM bypasses restart. Combined with crash recovery, this enables true overnight unattended execution.
|
||||
97
gitbook/features/parallel.md
Normal file
97
gitbook/features/parallel.md
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# Parallel Orchestration
|
||||
|
||||
Run multiple milestones simultaneously in isolated git worktrees. Each milestone gets its own worker process, branch, and context window.
|
||||
|
||||
{% hint style="info" %}
|
||||
Parallel mode is off by default. Enable it in preferences to use `/gsd parallel` commands.
|
||||
{% endhint %}
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. Enable parallel mode:
|
||||
```yaml
|
||||
parallel:
|
||||
enabled: true
|
||||
max_workers: 2
|
||||
```
|
||||
|
||||
2. Start parallel execution:
|
||||
```
|
||||
/gsd parallel start
|
||||
```
|
||||
GSD scans milestones, checks dependencies and file overlap, shows an eligibility report, and spawns workers.
|
||||
|
||||
3. Monitor:
|
||||
```
|
||||
/gsd parallel status
|
||||
```
|
||||
|
||||
4. Stop:
|
||||
```
|
||||
/gsd parallel stop
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
Each worker is a separate GSD process with complete isolation:
|
||||
|
||||
| Resource | Isolation |
|
||||
|----------|----------|
|
||||
| Filesystem | Own git worktree |
|
||||
| Git branch | `milestone/<MID>` |
|
||||
| Context window | Separate process |
|
||||
| Metrics | Own `metrics.json` |
|
||||
| Crash recovery | Own `auto.lock` |
|
||||
|
||||
Workers communicate with the coordinator through file-based IPC — heartbeat files and signal files in `.gsd/parallel/`.
|
||||
|
||||
## Eligibility
|
||||
|
||||
Before starting, GSD checks which milestones can run concurrently:
|
||||
|
||||
1. **Not complete** — finished milestones are skipped
|
||||
2. **Dependencies satisfied** — all `dependsOn` entries must be complete
|
||||
3. **File overlap** — milestones touching the same files get a warning (but are still eligible since they run in separate worktrees)
|
||||
|
||||
## Configuration
|
||||
|
||||
```yaml
|
||||
parallel:
|
||||
enabled: false # master toggle (default: false)
|
||||
max_workers: 2 # concurrent workers (1-4)
|
||||
budget_ceiling: 50.00 # aggregate cost limit
|
||||
merge_strategy: "per-milestone" # when to merge back
|
||||
auto_merge: "confirm" # "auto", "confirm", or "manual"
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd parallel start` | Analyze and start workers |
|
||||
| `/gsd parallel status` | Show all workers with progress and cost |
|
||||
| `/gsd parallel stop [MID]` | Stop all or a specific worker |
|
||||
| `/gsd parallel pause [MID]` | Pause all or a specific worker |
|
||||
| `/gsd parallel resume [MID]` | Resume paused workers |
|
||||
| `/gsd parallel merge [MID]` | Merge completed milestones to main |
|
||||
|
||||
## Merge Reconciliation
|
||||
|
||||
When milestones complete, their changes merge back to main:
|
||||
|
||||
- `.gsd/` state files are auto-resolved
|
||||
- Code conflicts halt the merge — resolve manually and retry with `/gsd parallel merge <MID>`
|
||||
|
||||
## Budget Management
|
||||
|
||||
When `budget_ceiling` is set, aggregate cost across all workers is tracked. When the ceiling is reached, workers are signaled to stop.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Problem | Fix |
|
||||
|---------|-----|
|
||||
| "Parallel mode is not enabled" | Set `parallel.enabled: true` |
|
||||
| "No eligible milestones" | All milestones are complete or blocked; check `/gsd queue` |
|
||||
| Worker crashed | Run `/gsd doctor --fix`, then `/gsd parallel start` |
|
||||
| Merge conflicts | Resolve in `.gsd/worktrees/<MID>/`, then `/gsd parallel merge <MID>` |
|
||||
| Workers seem stuck | Check if budget ceiling was reached via `/gsd parallel status` |
|
||||
90
gitbook/features/remote-questions.md
Normal file
90
gitbook/features/remote-questions.md
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
# Remote Questions
|
||||
|
||||
Remote questions let GSD ask for your input via Slack, Discord, or Telegram when running in headless auto mode. When GSD needs a decision, it posts the question to your configured channel and polls for a response.
|
||||
|
||||
## Setup
|
||||
|
||||
### Discord
|
||||
|
||||
```
|
||||
/gsd remote discord
|
||||
```
|
||||
|
||||
The wizard prompts for your bot token, validates it, lets you pick a server and channel, sends a test message, and saves the config.
|
||||
|
||||
**Bot requirements:**
|
||||
- A bot application with a token from the [Discord Developer Portal](https://discord.com/developers/applications)
|
||||
- Bot invited to the server with: Send Messages, Read Message History, Add Reactions, View Channel
|
||||
- `DISCORD_BOT_TOKEN` environment variable set
|
||||
|
||||
### Slack
|
||||
|
||||
```
|
||||
/gsd remote slack
|
||||
```
|
||||
|
||||
**Bot requirements:**
|
||||
- A Slack app with a bot token (`xoxb-...`) from [Slack API](https://api.slack.com/apps)
|
||||
- Bot invited to the target channel
|
||||
- Scopes: `chat:write`, `reactions:read`, `reactions:write`, `channels:read`, `groups:read`, `channels:history`, `groups:history`
|
||||
|
||||
### Telegram
|
||||
|
||||
```
|
||||
/gsd remote telegram
|
||||
```
|
||||
|
||||
**Bot requirements:**
|
||||
- A bot token from [@BotFather](https://t.me/BotFather)
|
||||
- Bot added to the target group chat
|
||||
- `TELEGRAM_BOT_TOKEN` environment variable set
|
||||
|
||||
## Configuration
|
||||
|
||||
```yaml
|
||||
remote_questions:
|
||||
channel: discord # or slack or telegram
|
||||
channel_id: "1234567890123456789"
|
||||
timeout_minutes: 5 # 1-30, default 5
|
||||
poll_interval_seconds: 5 # 2-30, default 5
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. GSD encounters a decision point during auto mode
|
||||
2. The question is posted to your channel as a rich message
|
||||
3. GSD polls for a response at the configured interval
|
||||
4. You respond by:
|
||||
- **Reacting** with a number emoji (1️⃣, 2️⃣, etc.) for single-question prompts
|
||||
- **Replying** with a number, comma-separated numbers, or free text
|
||||
5. GSD picks up the response and continues
|
||||
6. A ✅ reaction confirms receipt
|
||||
|
||||
### Response Formats
|
||||
|
||||
**Single question:** React with a number emoji, reply with a number, or reply with free text.
|
||||
|
||||
**Multiple questions:** Reply with semicolons (`1;2;custom text`) or newlines (one answer per line).
|
||||
|
||||
### Timeouts
|
||||
|
||||
If no response arrives within `timeout_minutes`, GSD continues with a timeout result — typically making a conservative default choice.
|
||||
|
||||
## Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd remote` | Show menu and current status |
|
||||
| `/gsd remote slack` | Set up Slack |
|
||||
| `/gsd remote discord` | Set up Discord |
|
||||
| `/gsd remote telegram` | Set up Telegram |
|
||||
| `/gsd remote status` | Show current config |
|
||||
| `/gsd remote disconnect` | Remove configuration |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Problem | Fix |
|
||||
|---------|-----|
|
||||
| "Remote auth failed" | Verify bot token is correct and not expired |
|
||||
| "Could not send to channel" | Check bot has Send Messages permission; invite bot to channel |
|
||||
| No response detected | Make sure you're replying to the prompt message, not posting a new one |
|
||||
120
gitbook/features/skills.md
Normal file
120
gitbook/features/skills.md
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
# Skills
|
||||
|
||||
Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance — coding patterns, framework idioms, testing strategies, and tool usage.
|
||||
|
||||
Skills follow the open [Agent Skills standard](https://agentskills.io/) and work across multiple AI agents, not just GSD.
|
||||
|
||||
## Skill Directories
|
||||
|
||||
| Location | Scope | Description |
|
||||
|----------|-------|------------|
|
||||
| `~/.agents/skills/` | Global | Shared across all projects |
|
||||
| `.agents/skills/` (project root) | Project | Project-specific, committable to git |
|
||||
|
||||
Global skills take precedence when names collide.
|
||||
|
||||
## Installing Skills
|
||||
|
||||
Skills are installed via the [skills.sh CLI](https://skills.sh):
|
||||
|
||||
```bash
|
||||
# Interactive — choose skills and target agents
|
||||
npx skills add dpearson2699/swift-ios-skills
|
||||
|
||||
# Install specific skills
|
||||
npx skills add dpearson2699/swift-ios-skills --skill swift-concurrency --skill swiftui-patterns -y
|
||||
|
||||
# Install all from a repo
|
||||
npx skills add dpearson2699/swift-ios-skills --all
|
||||
|
||||
# Check for updates
|
||||
npx skills check
|
||||
|
||||
# Update installed skills
|
||||
npx skills update
|
||||
```
|
||||
|
||||
## Onboarding Catalog
|
||||
|
||||
During `gsd init`, GSD detects your project's tech stack and recommends relevant skill packs:
|
||||
|
||||
- **Swift** — SwiftUI, Swift Core, concurrency, Charts, Testing
|
||||
- **iOS** — App Intents, Widgets, StoreKit, MapKit, Core ML, Vision, accessibility
|
||||
- **Web** — React, React Native, frontend design, accessibility
|
||||
- **Languages** — Rust, Python, Go patterns and best practices
|
||||
- **General** — Document handling (PDF, DOCX, XLSX)
|
||||
|
||||
## Skill Discovery
|
||||
|
||||
The `skill_discovery` preference controls how GSD finds skills during auto mode:
|
||||
|
||||
| Mode | Behavior |
|
||||
|------|----------|
|
||||
| `auto` | Skills found and applied automatically |
|
||||
| `suggest` | Skills identified but require confirmation (default) |
|
||||
| `off` | No skill discovery |
|
||||
|
||||
## Skill Preferences
|
||||
|
||||
Control which skills are used:
|
||||
|
||||
```yaml
|
||||
always_use_skills:
|
||||
- debug-like-expert
|
||||
prefer_skills:
|
||||
- frontend-design
|
||||
avoid_skills:
|
||||
- security-docker
|
||||
skill_rules:
|
||||
- when: task involves authentication
|
||||
use: [clerk]
|
||||
- when: frontend styling work
|
||||
prefer: [frontend-design]
|
||||
```
|
||||
|
||||
## Creating Custom Skills
|
||||
|
||||
Create your own skill by adding a directory with a `SKILL.md` file:
|
||||
|
||||
```
|
||||
~/.agents/skills/my-skill/
|
||||
SKILL.md — instructions for the AI
|
||||
references/ — optional reference files
|
||||
```
|
||||
|
||||
The `SKILL.md` contains instructions the AI follows when the skill is active.
|
||||
|
||||
### Project-Local Skills
|
||||
|
||||
Place skills in your project root for project-specific guidance:
|
||||
|
||||
```
|
||||
.agents/skills/my-project-skill/
|
||||
SKILL.md
|
||||
```
|
||||
|
||||
Project-local skills can be committed to git so team members share the same skill set.
|
||||
|
||||
## Skill Health Dashboard
|
||||
|
||||
Track skill performance:
|
||||
|
||||
```
|
||||
/gsd skill-health # overview table
|
||||
/gsd skill-health rust-core # detailed view for one skill
|
||||
/gsd skill-health --stale 30 # skills unused for 30+ days
|
||||
/gsd skill-health --declining # skills with falling success rates
|
||||
```
|
||||
|
||||
The dashboard flags:
|
||||
- Success rate below 70% over the last 10 uses
|
||||
- Token usage rising 20%+ compared to previous window
|
||||
- Skills unused beyond the configured threshold
|
||||
|
||||
### Staleness Detection
|
||||
|
||||
```yaml
|
||||
skill_staleness_days: 60 # flag skills unused for 60+ days (0 to disable)
|
||||
```
|
||||
|
||||
Stale skills are excluded from automatic matching but remain available for explicit use.
|
||||
91
gitbook/features/teams.md
Normal file
91
gitbook/features/teams.md
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
# Working in Teams
|
||||
|
||||
GSD supports multi-user workflows where several developers work on the same repository concurrently.
|
||||
|
||||
## Quick Setup
|
||||
|
||||
The simplest way: set team mode in your project preferences.
|
||||
|
||||
```yaml
|
||||
# .gsd/PREFERENCES.md (committed to git)
|
||||
---
|
||||
version: 1
|
||||
mode: team
|
||||
---
|
||||
```
|
||||
|
||||
This enables unique milestone IDs, push branches, pre-merge checks, and other team-appropriate defaults in one setting.
|
||||
|
||||
## What Team Mode Does
|
||||
|
||||
| Setting | Effect |
|
||||
|---------|--------|
|
||||
| `unique_milestone_ids` | IDs like `M001-eh88as` instead of `M001` — no collisions |
|
||||
| `git.push_branches` | Milestone branches are pushed to remote |
|
||||
| `git.pre_merge_check` | Validation runs before merging |
|
||||
|
||||
You can override individual settings on top of `mode: team`.
|
||||
|
||||
## Configure `.gitignore`
|
||||
|
||||
Share planning artifacts while keeping runtime files local:
|
||||
|
||||
```bash
|
||||
# Runtime files (per-developer, gitignore these)
|
||||
.gsd/auto.lock
|
||||
.gsd/completed-units.json
|
||||
.gsd/STATE.md
|
||||
.gsd/metrics.json
|
||||
.gsd/activity/
|
||||
.gsd/runtime/
|
||||
.gsd/worktrees/
|
||||
.gsd/milestones/**/continue.md
|
||||
.gsd/milestones/**/*-CONTINUE.md
|
||||
```
|
||||
|
||||
**What gets shared** (committed to git):
|
||||
- `.gsd/PREFERENCES.md` — project preferences
|
||||
- `.gsd/PROJECT.md` — living project description
|
||||
- `.gsd/REQUIREMENTS.md` — requirement contract
|
||||
- `.gsd/DECISIONS.md` — architectural decisions
|
||||
- `.gsd/milestones/` — roadmaps, plans, summaries, research
|
||||
|
||||
**What stays local** (gitignored):
|
||||
- Lock files, metrics, state, activity logs, worktrees
|
||||
|
||||
## Commit the Config
|
||||
|
||||
```bash
|
||||
git add .gsd/PREFERENCES.md
|
||||
git commit -m "chore: enable GSD team workflow"
|
||||
```
|
||||
|
||||
## Keeping `.gsd/` Local
|
||||
|
||||
For teams where only some members use GSD:
|
||||
|
||||
```yaml
|
||||
git:
|
||||
commit_docs: false
|
||||
```
|
||||
|
||||
This gitignores `.gsd/` entirely. You get structured planning without affecting teammates.
|
||||
|
||||
## Parallel Development
|
||||
|
||||
Multiple developers can run auto mode simultaneously on different milestones. Each developer:
|
||||
|
||||
- Gets their own worktree (`.gsd/worktrees/<MID>/`)
|
||||
- Works on a unique `milestone/<MID>` branch
|
||||
- Squash-merges to main independently
|
||||
|
||||
Milestone dependencies can be declared:
|
||||
|
||||
```yaml
|
||||
# In M00X-CONTEXT.md frontmatter
|
||||
---
|
||||
depends_on: [M001-eh88as]
|
||||
---
|
||||
```
|
||||
|
||||
GSD enforces that dependent milestones complete before starting downstream work.
|
||||
108
gitbook/features/token-optimization.md
Normal file
108
gitbook/features/token-optimization.md
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
# Token Optimization
|
||||
|
||||
GSD's token optimization system can reduce token usage by 40-60% without sacrificing output quality. It has three pillars: **token profiles**, **context compression**, and **complexity-based task routing**.
|
||||
|
||||
## Token Profiles
|
||||
|
||||
A token profile coordinates model selection, phase skipping, and context compression with a single setting:
|
||||
|
||||
```yaml
|
||||
token_profile: balanced
|
||||
```
|
||||
|
||||
### `budget` — Maximum Savings (40-60%)
|
||||
|
||||
| Setting | Value |
|
||||
|---------|-------|
|
||||
| Planning model | Sonnet |
|
||||
| Execution model | Sonnet |
|
||||
| Simple task model | Haiku |
|
||||
| Milestone research | Skipped |
|
||||
| Slice research | Skipped |
|
||||
| Roadmap reassessment | Skipped |
|
||||
| Context level | Minimal |
|
||||
|
||||
Best for: prototyping, small projects, well-understood codebases.
|
||||
|
||||
### `balanced` — Smart Defaults (default)
|
||||
|
||||
| Setting | Value |
|
||||
|---------|-------|
|
||||
| All models | User's default |
|
||||
| Milestone research | Runs |
|
||||
| Slice research | Skipped |
|
||||
| Roadmap reassessment | Runs |
|
||||
| Context level | Standard |
|
||||
|
||||
Best for: most projects, day-to-day development.
|
||||
|
||||
### `quality` — Full Context
|
||||
|
||||
| Setting | Value |
|
||||
|---------|-------|
|
||||
| All models | User's configured defaults |
|
||||
| All phases | Run |
|
||||
| Context level | Full |
|
||||
|
||||
Best for: complex architectures, greenfield projects, critical work.
|
||||
|
||||
## Context Compression
|
||||
|
||||
Each profile controls how much context is pre-loaded into AI prompts:
|
||||
|
||||
| Profile | What's Included |
|
||||
|---------|----------------|
|
||||
| `budget` | Task plan and essential prior summaries only |
|
||||
| `balanced` | Task plan, summaries, slice plan, roadmap excerpt |
|
||||
| `quality` | Everything — all plans, summaries, decisions, requirements |
|
||||
|
||||
## Complexity-Based Task Routing
|
||||
|
||||
GSD classifies each task by complexity and routes it to an appropriate model:
|
||||
|
||||
| Complexity | Indicators | Model Level |
|
||||
|-----------|------------|-------------|
|
||||
| Simple | ≤3 steps, ≤3 files, short description | Haiku-class |
|
||||
| Standard | 4-7 steps, 4-7 files | Sonnet-class |
|
||||
| Complex | ≥8 steps, ≥8 files, complexity keywords | Opus-class |
|
||||
|
||||
**Complexity keywords** that prevent simple classification: `refactor`, `migrate`, `integrate`, `architect`, `security`, `performance`, `concurrent`, `distributed`, and others.
|
||||
|
||||
{% hint style="info" %}
|
||||
Dynamic routing requires `models` configured in your preferences and `dynamic_routing.enabled: true`. See [Dynamic Model Routing](dynamic-model-routing.md).
|
||||
{% endhint %}
|
||||
|
||||
## Overriding Profile Defaults
|
||||
|
||||
The `token_profile` sets defaults, but explicit preferences always win:
|
||||
|
||||
```yaml
|
||||
token_profile: budget
|
||||
phases:
|
||||
skip_research: false # override: keep research
|
||||
models:
|
||||
planning: claude-opus-4-6 # override: use Opus for planning
|
||||
```
|
||||
|
||||
## Adaptive Learning
|
||||
|
||||
GSD tracks success and failure of tier assignments over time. If a model tier's failure rate exceeds 20% for a given task type, future tasks of that type are bumped to a higher tier.
|
||||
|
||||
Submit manual feedback with:
|
||||
|
||||
```
|
||||
/gsd rate over # model was overpowered — use cheaper next time
|
||||
/gsd rate ok # model was appropriate
|
||||
/gsd rate under # model was too weak — use stronger next time
|
||||
```
|
||||
|
||||
## Observation Masking
|
||||
|
||||
During auto mode, old tool results are replaced with lightweight placeholders before each AI call. This reduces token usage between compactions with zero overhead.
|
||||
|
||||
```yaml
|
||||
context_management:
|
||||
observation_masking: true # default: true
|
||||
observation_mask_turns: 8 # keep results from last 8 turns
|
||||
tool_result_max_chars: 800 # truncate large tool outputs
|
||||
```
|
||||
82
gitbook/features/visualizer.md
Normal file
82
gitbook/features/visualizer.md
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# Workflow Visualizer
|
||||
|
||||
The workflow visualizer is a full-screen terminal overlay showing project progress, dependencies, cost metrics, and execution timeline.
|
||||
|
||||
## Opening
|
||||
|
||||
```
|
||||
/gsd visualize
|
||||
```
|
||||
|
||||
Or configure automatic display after milestone completion:
|
||||
|
||||
```yaml
|
||||
auto_visualize: true
|
||||
```
|
||||
|
||||
## Tabs
|
||||
|
||||
Switch tabs with `Tab`, `1`-`4`, or arrow keys.
|
||||
|
||||
### 1. Progress
|
||||
|
||||
A tree view of milestones, slices, and tasks with completion status:
|
||||
|
||||
```
|
||||
M001: User Management 3/6 tasks
|
||||
✅ S01: Auth module 3/3 tasks
|
||||
✅ T01: Core types
|
||||
✅ T02: JWT middleware
|
||||
✅ T03: Login flow
|
||||
⏳ S02: User dashboard 1/2 tasks
|
||||
✅ T01: Layout component
|
||||
⬜ T02: Profile page
|
||||
```
|
||||
|
||||
### 2. Dependencies
|
||||
|
||||
An ASCII dependency graph showing slice relationships:
|
||||
|
||||
```
|
||||
S01 ──→ S02 ──→ S04
|
||||
└───→ S03 ──↗
|
||||
```
|
||||
|
||||
### 3. Metrics
|
||||
|
||||
Bar charts showing cost and token usage:
|
||||
|
||||
- By phase (research, planning, execution, completion)
|
||||
- By slice (with running totals)
|
||||
- By model (which models consumed the most budget)
|
||||
|
||||
### 4. Timeline
|
||||
|
||||
Chronological execution history: unit type, timestamps, duration, model, and token counts.
|
||||
|
||||
## Controls
|
||||
|
||||
| Key | Action |
|
||||
|-----|--------|
|
||||
| `Tab` | Next tab |
|
||||
| `Shift+Tab` | Previous tab |
|
||||
| `1`-`4` | Jump to tab |
|
||||
| `↑`/`↓` | Scroll |
|
||||
| `Escape` / `q` | Close |
|
||||
|
||||
The visualizer auto-refreshes every 2 seconds, staying current alongside running auto mode.
|
||||
|
||||
## HTML Reports
|
||||
|
||||
For shareable reports outside the terminal:
|
||||
|
||||
```
|
||||
/gsd export --html # current milestone
|
||||
/gsd export --html --all # all milestones
|
||||
```
|
||||
|
||||
Generates self-contained HTML files in `.gsd/reports/` with progress tree, dependency graph, cost charts, timeline, and changelog. All CSS and JS are inlined — no external dependencies. Printable to PDF from any browser.
|
||||
|
||||
```yaml
|
||||
auto_report: true # auto-generate after milestone completion (default)
|
||||
```
|
||||
37
gitbook/features/web-interface.md
Normal file
37
gitbook/features/web-interface.md
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# Web Interface
|
||||
|
||||
GSD includes a browser-based interface for project management and real-time progress monitoring.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
gsd --web
|
||||
```
|
||||
|
||||
This starts a local web server and opens the dashboard in your default browser.
|
||||
|
||||
## CLI Flags
|
||||
|
||||
```bash
|
||||
gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com"
|
||||
```
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--host` | `localhost` | Bind address |
|
||||
| `--port` | `3000` | Port |
|
||||
| `--allowed-origins` | (none) | Comma-separated CORS origins |
|
||||
|
||||
## Features
|
||||
|
||||
- **Project management** — view milestones, slices, and tasks in a visual dashboard
|
||||
- **Real-time progress** — live updates as auto mode executes
|
||||
- **Multi-project support** — manage multiple projects from one browser tab via `?project=` URL parameter
|
||||
- **Change project root** — switch directories from the web UI without restarting
|
||||
- **Onboarding flow** — API key setup and provider configuration in the browser
|
||||
- **Model selection** — switch models and providers from the web UI
|
||||
|
||||
## Platform Notes
|
||||
|
||||
- **macOS/Linux** — Full support
|
||||
- **Windows** — Web build is skipped due to Next.js compatibility issues; CLI remains fully functional
|
||||
45
gitbook/features/workflow-templates.md
Normal file
45
gitbook/features/workflow-templates.md
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# Workflow Templates
|
||||
|
||||
Workflow templates are pre-built patterns for common development tasks. Instead of setting up a full milestone for a quick bugfix or spike, use a template to get started immediately.
|
||||
|
||||
## Using Templates
|
||||
|
||||
```
|
||||
/gsd start # pick from available templates
|
||||
/gsd start resume # resume an in-progress workflow
|
||||
```
|
||||
|
||||
## Available Templates
|
||||
|
||||
| Template | Purpose |
|
||||
|----------|---------|
|
||||
| `bugfix` | Fix a specific bug with diagnosis and verification |
|
||||
| `spike` | Time-boxed investigation or prototype |
|
||||
| `feature` | Standard feature development |
|
||||
| `hotfix` | Urgent production fix |
|
||||
| `refactor` | Code restructuring and cleanup |
|
||||
| `security-audit` | Security review and remediation |
|
||||
| `dep-upgrade` | Dependency update and migration |
|
||||
| `full-project` | Complete project from scratch |
|
||||
|
||||
## Listing and Inspecting
|
||||
|
||||
```
|
||||
/gsd templates # list all available templates
|
||||
/gsd templates info <name> # show details for a template
|
||||
```
|
||||
|
||||
## Custom Workflows
|
||||
|
||||
Create your own workflow definitions:
|
||||
|
||||
```
|
||||
/gsd workflow new # create a new workflow YAML
|
||||
/gsd workflow run <name> # start a workflow run
|
||||
/gsd workflow list # list active runs
|
||||
/gsd workflow validate <name> # validate definition
|
||||
/gsd workflow pause # pause running workflow
|
||||
/gsd workflow resume # resume paused workflow
|
||||
```
|
||||
|
||||
Custom workflows are defined in YAML and can specify phases, dependencies, and configuration for each step.
|
||||
94
gitbook/getting-started/choosing-a-model.md
Normal file
94
gitbook/getting-started/choosing-a-model.md
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
# Choosing a Model
|
||||
|
||||
GSD auto-selects a default model after you log in to a provider. You can switch models at any time.
|
||||
|
||||
## Switch Models
|
||||
|
||||
Inside a GSD session, type:
|
||||
|
||||
```
|
||||
/model
|
||||
```
|
||||
|
||||
This opens an interactive picker showing all available models from your configured providers.
|
||||
|
||||
## Per-Phase Models
|
||||
|
||||
Different phases of work have different requirements. You can assign specific models to each phase in your preferences:
|
||||
|
||||
```yaml
|
||||
models:
|
||||
research: claude-sonnet-4-6 # scouting and research
|
||||
planning: claude-opus-4-6 # architectural decisions
|
||||
execution: claude-sonnet-4-6 # writing code
|
||||
execution_simple: claude-haiku-4-5 # simple tasks (docs, config)
|
||||
completion: claude-sonnet-4-6 # summaries and wrap-up
|
||||
subagent: claude-sonnet-4-6 # delegated sub-tasks
|
||||
```
|
||||
|
||||
Omit a key to use whatever model is currently active for that phase.
|
||||
|
||||
## Model Fallbacks
|
||||
|
||||
If a model is unavailable (provider down, rate limited, credits exhausted), GSD can automatically fall back to another:
|
||||
|
||||
```yaml
|
||||
models:
|
||||
planning:
|
||||
model: claude-opus-4-6
|
||||
fallbacks:
|
||||
- openrouter/z-ai/glm-5
|
||||
- openrouter/moonshotai/kimi-k2.5
|
||||
```
|
||||
|
||||
Fallbacks are tried in order until one works.
|
||||
|
||||
## Token Profiles
|
||||
|
||||
Token profiles coordinate model selection, phase skipping, and context compression with a single setting:
|
||||
|
||||
| Profile | Cost Savings | Best For |
|
||||
|---------|-------------|----------|
|
||||
| `budget` | 40-60% | Prototyping, small projects, well-understood codebases |
|
||||
| `balanced` | 10-20% | Most projects, day-to-day development (default) |
|
||||
| `quality` | 0% (baseline) | Complex architectures, greenfield projects, critical work |
|
||||
|
||||
```yaml
|
||||
token_profile: balanced
|
||||
```
|
||||
|
||||
See [Token Optimization](../features/token-optimization.md) for details.
|
||||
|
||||
## Dynamic Model Routing
|
||||
|
||||
When enabled, GSD automatically picks cheaper models for simple tasks and reserves expensive ones for complex work:
|
||||
|
||||
```yaml
|
||||
dynamic_routing:
|
||||
enabled: true
|
||||
```
|
||||
|
||||
A documentation fix gets Haiku. An architectural refactor gets Opus. Your configured model is always the ceiling — routing never upgrades beyond what you've set.
|
||||
|
||||
See [Dynamic Model Routing](../features/dynamic-model-routing.md) for the full guide.
|
||||
|
||||
## Supported Providers
|
||||
|
||||
GSD supports 20+ providers out of the box. See [Provider Setup](../configuration/providers.md) for setup instructions:
|
||||
|
||||
| Provider | Auth Method |
|
||||
|----------|-------------|
|
||||
| Anthropic (Claude) | OAuth or API key |
|
||||
| OpenAI | API key |
|
||||
| Google Gemini | API key |
|
||||
| OpenRouter | API key |
|
||||
| Groq | API key |
|
||||
| xAI (Grok) | API key |
|
||||
| Mistral | API key |
|
||||
| GitHub Copilot | OAuth |
|
||||
| Amazon Bedrock | IAM credentials |
|
||||
| Vertex AI | ADC |
|
||||
| Azure OpenAI | API key |
|
||||
| Ollama | Local (no auth) |
|
||||
| LM Studio | Local (no auth) |
|
||||
| vLLM / SGLang | Local (no auth) |
|
||||
128
gitbook/getting-started/first-project.md
Normal file
128
gitbook/getting-started/first-project.md
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
# Your First Project
|
||||
|
||||
## Launch GSD
|
||||
|
||||
Open a terminal in any project directory (or an empty one) and run:
|
||||
|
||||
```bash
|
||||
gsd
|
||||
```
|
||||
|
||||
GSD shows a welcome screen with your version, active model, and available tool keys.
|
||||
|
||||
## Start a Discussion
|
||||
|
||||
Type `/gsd` to enter step mode. GSD reads the state of your project directory and determines the next logical action:
|
||||
|
||||
- **No `.gsd/` directory** — starts a discussion flow to capture your project vision
|
||||
- **Milestone exists, no roadmap** — discuss or research the milestone
|
||||
- **Roadmap exists, slices pending** — plan the next slice or execute a task
|
||||
- **Mid-task** — resume where you left off
|
||||
|
||||
For a new project, GSD will ask you to describe what you want to build. Talk through your vision — GSD captures requirements, architectural decisions, and scope.
|
||||
|
||||
## The Project Hierarchy
|
||||
|
||||
After discussion, GSD organizes your work into:
|
||||
|
||||
```
|
||||
Milestone → a shippable version (4-10 slices)
|
||||
Slice → one demoable feature (1-7 tasks)
|
||||
Task → one context-window-sized unit of work
|
||||
```
|
||||
|
||||
The key rule: **a task must fit in one AI context window.** If it can't, it becomes two tasks.
|
||||
|
||||
## Run Auto Mode
|
||||
|
||||
Once you have a milestone and roadmap, let GSD take the wheel:
|
||||
|
||||
```
|
||||
/gsd auto
|
||||
```
|
||||
|
||||
GSD autonomously:
|
||||
1. **Plans** each slice — scouts the codebase, researches docs, decomposes into tasks
|
||||
2. **Executes** each task — writes code in a fresh AI session
|
||||
3. **Completes** the slice — writes summaries, commits with meaningful messages
|
||||
4. **Reassesses** the roadmap — checks if the plan still makes sense
|
||||
5. **Repeats** until the milestone is done
|
||||
|
||||
## The Two-Terminal Workflow
|
||||
|
||||
The recommended approach: auto mode in one terminal, steering from another.
|
||||
|
||||
**Terminal 1 — let it build:**
|
||||
|
||||
```bash
|
||||
gsd
|
||||
/gsd auto
|
||||
```
|
||||
|
||||
**Terminal 2 — steer while it works:**
|
||||
|
||||
```bash
|
||||
gsd
|
||||
/gsd discuss # talk through architecture decisions
|
||||
/gsd status # check progress
|
||||
/gsd queue # queue the next milestone
|
||||
/gsd capture "add rate limiting to the API" # fire-and-forget thought
|
||||
```
|
||||
|
||||
Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically.
|
||||
|
||||
## Check Progress
|
||||
|
||||
Press `Ctrl+Alt+G` or type `/gsd status` to see the dashboard:
|
||||
|
||||
- Current milestone, slice, and task
|
||||
- Elapsed time and phase
|
||||
- Per-unit cost and token breakdown
|
||||
- Completed and in-progress work
|
||||
|
||||
## Resume a Session
|
||||
|
||||
```bash
|
||||
gsd --continue # or gsd -c
|
||||
```
|
||||
|
||||
Resumes the most recent session for the current directory.
|
||||
|
||||
To browse and pick from all saved sessions:
|
||||
|
||||
```bash
|
||||
gsd sessions
|
||||
```
|
||||
|
||||
Shows each session's date, message count, and preview so you can choose which to resume.
|
||||
|
||||
## What's on Disk
|
||||
|
||||
All state lives in `.gsd/` inside your project:
|
||||
|
||||
```
|
||||
.gsd/
|
||||
PROJECT.md — what the project is
|
||||
REQUIREMENTS.md — requirement contract
|
||||
DECISIONS.md — architectural decisions
|
||||
KNOWLEDGE.md — cross-session rules and patterns
|
||||
STATE.md — quick-glance status
|
||||
milestones/
|
||||
M001/
|
||||
M001-ROADMAP.md — slice plan with dependencies
|
||||
M001-CONTEXT.md — scope and goals
|
||||
slices/
|
||||
S01/
|
||||
S01-PLAN.md — task decomposition
|
||||
S01-SUMMARY.md — what happened
|
||||
S01-UAT.md — test script
|
||||
tasks/
|
||||
T01-PLAN.md
|
||||
T01-SUMMARY.md
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Auto Mode](../core-concepts/auto-mode.md) — deep dive into autonomous execution
|
||||
- [Preferences](../configuration/preferences.md) — model selection, timeouts, budgets
|
||||
- [Commands](../reference/commands.md) — all commands and shortcuts
|
||||
84
gitbook/getting-started/installation.md
Normal file
84
gitbook/getting-started/installation.md
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
# Installation
|
||||
|
||||
## Install GSD
|
||||
|
||||
```bash
|
||||
npm install -g gsd-pi
|
||||
```
|
||||
|
||||
Requires **Node.js 22.0.0 or later** (24 LTS recommended) and **Git**.
|
||||
|
||||
{% hint style="info" %}
|
||||
**`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [Troubleshooting](../reference/troubleshooting.md) for details.
|
||||
{% endhint %}
|
||||
|
||||
GSD checks for updates once every 24 hours. When a new version is available, you'll see a prompt at startup with the option to update immediately or skip. You can also update from within a session with `/gsd update`.
|
||||
|
||||
## Set Up Your LLM Provider
|
||||
|
||||
Launch GSD for the first time:
|
||||
|
||||
```bash
|
||||
gsd
|
||||
```
|
||||
|
||||
The setup wizard walks you through:
|
||||
|
||||
1. **LLM Provider** — choose from 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth flows handle Claude Max and Copilot subscriptions automatically; otherwise paste an API key.
|
||||
2. **Tool API Keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any.
|
||||
|
||||
Re-run the wizard anytime with:
|
||||
|
||||
```bash
|
||||
gsd config
|
||||
```
|
||||
|
||||
For detailed provider setup, see [Provider Setup](../configuration/providers.md).
|
||||
|
||||
## Set Up API Keys for Tools
|
||||
|
||||
If you use a non-Anthropic model, you may need a search API key for web search. Run `/gsd config` inside any GSD session to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects.
|
||||
|
||||
| Tool | Purpose | Get a Key |
|
||||
|------|---------|-----------|
|
||||
| Tavily Search | Web search for non-Anthropic models | [tavily.com](https://tavily.com/app/api-keys) |
|
||||
| Brave Search | Web search for non-Anthropic models | [brave.com](https://brave.com/search/api) |
|
||||
| Context7 Docs | Library documentation lookup | [context7.com](https://context7.com/dashboard) |
|
||||
|
||||
Anthropic models have built-in web search and don't need these keys.
|
||||
|
||||
## VS Code Extension
|
||||
|
||||
GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions.
|
||||
|
||||
The extension provides:
|
||||
|
||||
- **`@gsd` chat participant** — talk to the agent in VS Code Chat
|
||||
- **Sidebar dashboard** — connection status, model info, token usage, quick actions
|
||||
- **Full command palette** — start/stop agent, switch models, export sessions
|
||||
|
||||
The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC.
|
||||
|
||||
## Web Interface
|
||||
|
||||
GSD also has a browser-based interface:
|
||||
|
||||
```bash
|
||||
gsd --web
|
||||
```
|
||||
|
||||
This starts a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](../features/web-interface.md) for details.
|
||||
|
||||
## Alternative Binary Name
|
||||
|
||||
If the `gsd` command conflicts with another tool (e.g., the oh-my-zsh git plugin aliases `gsd` to `git svn dcommit`), use the alternative:
|
||||
|
||||
```bash
|
||||
gsd-cli
|
||||
```
|
||||
|
||||
Both `gsd` and `gsd-cli` point to the same binary. To remove the conflict permanently, add this to your `~/.zshrc`:
|
||||
|
||||
```bash
|
||||
unalias gsd 2>/dev/null
|
||||
```
|
||||
61
gitbook/reference/cli-flags.md
Normal file
61
gitbook/reference/cli-flags.md
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
# CLI Flags
|
||||
|
||||
## Starting GSD
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `gsd` | Start a new interactive session |
|
||||
| `gsd --continue` (`-c`) | Resume the most recent session |
|
||||
| `gsd --model <id>` | Override the default model for this session |
|
||||
| `gsd --web [path]` | Start browser-based web interface |
|
||||
| `gsd --worktree` (`-w`) [name] | Start in a git worktree |
|
||||
| `gsd --no-session` | Disable session persistence |
|
||||
| `gsd --extension <path>` | Load an additional extension (repeatable) |
|
||||
| `gsd --append-system-prompt <text>` | Append text to the system prompt |
|
||||
| `gsd --tools <list>` | Comma-separated tools to enable |
|
||||
| `gsd --version` (`-v`) | Print version and exit |
|
||||
| `gsd --help` (`-h`) | Print help and exit |
|
||||
| `gsd --debug` | Enable diagnostic logging |
|
||||
|
||||
## Non-Interactive Modes
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) |
|
||||
| `gsd --mode <text\|json\|rpc\|mcp>` | Output mode for non-interactive use |
|
||||
|
||||
## Session Management
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `gsd sessions` | Interactive session picker — list and resume saved sessions |
|
||||
| `gsd --list-models [search]` | List available models and exit |
|
||||
|
||||
## Configuration
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `gsd config` | Set up global API keys |
|
||||
| `gsd update` | Update to the latest version |
|
||||
|
||||
## Headless Mode
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `gsd headless` | Run without TUI |
|
||||
| `gsd headless --timeout N` | Timeout in ms (default: 300000) |
|
||||
| `gsd headless --max-restarts N` | Auto-restart on crash (default: 3) |
|
||||
| `gsd headless --json` | Stream events as JSONL |
|
||||
| `gsd headless --model ID` | Override model |
|
||||
| `gsd headless --context <file>` | Context file for `new-milestone` |
|
||||
| `gsd headless --context-text <text>` | Inline context for `new-milestone` |
|
||||
| `gsd headless --auto` | Chain into auto mode after milestone creation |
|
||||
| `gsd headless query` | Instant JSON state snapshot (~50ms) |
|
||||
|
||||
## Web Interface
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--host` | `localhost` | Bind address |
|
||||
| `--port` | `3000` | Port |
|
||||
| `--allowed-origins` | (none) | CORS origins |
|
||||
128
gitbook/reference/commands.md
Normal file
128
gitbook/reference/commands.md
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
# Commands
|
||||
|
||||
## Session Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd` | Step mode — execute one unit at a time |
|
||||
| `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat |
|
||||
| `/gsd quick` | Quick task with GSD guarantees but no full planning |
|
||||
| `/gsd stop` | Stop auto mode gracefully |
|
||||
| `/gsd pause` | Pause auto mode (preserves state) |
|
||||
| `/gsd steer` | Modify plan documents during execution |
|
||||
| `/gsd discuss` | Discuss architecture and decisions |
|
||||
| `/gsd status` | Progress dashboard |
|
||||
| `/gsd widget` | Cycle dashboard widget: full / small / min / off |
|
||||
| `/gsd queue` | Queue and reorder future milestones |
|
||||
| `/gsd capture` | Fire-and-forget thought capture |
|
||||
| `/gsd triage` | Manually trigger capture triage |
|
||||
| `/gsd dispatch` | Dispatch a specific phase directly |
|
||||
| `/gsd history` | View execution history (supports `--cost`, `--phase`, `--model` filters) |
|
||||
| `/gsd forensics` | Full debugger for auto-mode failures |
|
||||
| `/gsd cleanup` | Clean up state files and stale worktrees |
|
||||
| `/gsd visualize` | Open workflow visualizer |
|
||||
| `/gsd export --html` | Generate HTML report for current milestone |
|
||||
| `/gsd export --html --all` | Generate reports for all milestones |
|
||||
| `/gsd update` | Update GSD to the latest version |
|
||||
| `/gsd knowledge` | Add persistent project knowledge |
|
||||
| `/gsd fast` | Toggle service tier for supported models |
|
||||
| `/gsd rate` | Rate last unit's model tier (over/ok/under) |
|
||||
| `/gsd changelog` | Show release notes |
|
||||
| `/gsd logs` | Browse activity and debug logs |
|
||||
| `/gsd remote` | Control remote auto-mode |
|
||||
| `/gsd help` | Show all available commands |
|
||||
|
||||
## Configuration & Diagnostics
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd prefs` | Preferences wizard |
|
||||
| `/gsd mode` | Switch workflow mode (solo/team) |
|
||||
| `/gsd config` | Re-run provider setup wizard |
|
||||
| `/gsd keys` | API key manager |
|
||||
| `/gsd doctor` | Runtime health checks with auto-fix |
|
||||
| `/gsd inspect` | Show database diagnostics |
|
||||
| `/gsd init` | Project init wizard |
|
||||
| `/gsd setup` | Global setup status |
|
||||
| `/gsd skill-health` | Skill lifecycle dashboard |
|
||||
| `/gsd hooks` | Show configured hooks |
|
||||
| `/gsd migrate` | Migrate v1 `.planning` to `.gsd` format |
|
||||
|
||||
## Milestone Management
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd new-milestone` | Create a new milestone |
|
||||
| `/gsd skip` | Prevent a unit from auto-mode dispatch |
|
||||
| `/gsd undo` | Revert last completed unit |
|
||||
| `/gsd undo-task` | Reset a specific task's completion state |
|
||||
| `/gsd reset-slice` | Reset a slice and all its tasks |
|
||||
| `/gsd park` | Park a milestone (skip without deleting) |
|
||||
| `/gsd unpark` | Reactivate a parked milestone |
|
||||
|
||||
## Parallel Orchestration
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd parallel start` | Analyze and start parallel workers |
|
||||
| `/gsd parallel status` | Show worker state and progress |
|
||||
| `/gsd parallel stop [MID]` | Stop workers |
|
||||
| `/gsd parallel pause [MID]` | Pause workers |
|
||||
| `/gsd parallel resume [MID]` | Resume workers |
|
||||
| `/gsd parallel merge [MID]` | Merge completed milestones |
|
||||
|
||||
## Workflow Templates
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd start` | Start a workflow template |
|
||||
| `/gsd start resume` | Resume an in-progress workflow |
|
||||
| `/gsd templates` | List available templates |
|
||||
| `/gsd templates info <name>` | Show template details |
|
||||
|
||||
## Custom Workflows
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd workflow new` | Create a workflow definition |
|
||||
| `/gsd workflow run <name>` | Start a workflow run |
|
||||
| `/gsd workflow list` | List workflow runs |
|
||||
| `/gsd workflow validate <name>` | Validate a workflow YAML |
|
||||
| `/gsd workflow pause` | Pause workflow auto-mode |
|
||||
| `/gsd workflow resume` | Resume paused workflow |
|
||||
|
||||
## Extensions
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd extensions list` | List all extensions |
|
||||
| `/gsd extensions enable <id>` | Enable an extension |
|
||||
| `/gsd extensions disable <id>` | Disable an extension |
|
||||
| `/gsd extensions info <id>` | Show extension details |
|
||||
|
||||
## GitHub Sync
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/github-sync bootstrap` | Initial GitHub sync setup |
|
||||
| `/github-sync status` | Show sync mapping counts |
|
||||
|
||||
## Session Management
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/clear` | Start a new session |
|
||||
| `/exit` | Graceful shutdown |
|
||||
| `/model` | Switch the active model |
|
||||
| `/login` | Log in to an LLM provider |
|
||||
| `/thinking` | Toggle thinking level |
|
||||
| `/voice` | Toggle speech-to-text |
|
||||
| `/worktree` (`/wt`) | Git worktree management |
|
||||
|
||||
## In-Session Update
|
||||
|
||||
```
|
||||
/gsd update
|
||||
```
|
||||
|
||||
Checks npm for a newer version and installs it without leaving the session.
|
||||
56
gitbook/reference/environment-variables.md
Normal file
56
gitbook/reference/environment-variables.md
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# Environment Variables
|
||||
|
||||
## GSD Configuration
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `GSD_HOME` | `~/.gsd` | Global GSD directory. All paths derive from this unless individually overridden. |
|
||||
| `GSD_PROJECT_ID` | (auto-hash) | Override automatic project identity hash. Useful for CI/CD or sharing state across repo clones. |
|
||||
| `GSD_STATE_DIR` | `$GSD_HOME` | Per-project state root. Controls where `projects/<repo-hash>/` directories are created. |
|
||||
| `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | Agent directory for extensions, auth, and managed resources. |
|
||||
| `GSD_FETCH_ALLOWED_URLS` | (none) | Comma-separated hostnames exempt from internal URL blocking. |
|
||||
| `GSD_ALLOWED_COMMAND_PREFIXES` | (built-in) | Comma-separated command prefixes allowed for value resolution. |
|
||||
| `GSD_WEB_PROJECT_CWD` | — | Default project path for `gsd --web` when `?project=` is not specified. |
|
||||
|
||||
## LLM Provider Keys
|
||||
|
||||
| Variable | Provider |
|
||||
|----------|----------|
|
||||
| `ANTHROPIC_API_KEY` | Anthropic (Claude) |
|
||||
| `OPENAI_API_KEY` | OpenAI |
|
||||
| `GEMINI_API_KEY` | Google Gemini |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter |
|
||||
| `GROQ_API_KEY` | Groq |
|
||||
| `XAI_API_KEY` | xAI (Grok) |
|
||||
| `MISTRAL_API_KEY` | Mistral |
|
||||
| `GH_TOKEN` | GitHub Copilot |
|
||||
| `AWS_PROFILE` | Amazon Bedrock (named profile) |
|
||||
| `AWS_ACCESS_KEY_ID` | Amazon Bedrock (IAM keys) |
|
||||
| `AWS_SECRET_ACCESS_KEY` | Amazon Bedrock (IAM keys) |
|
||||
| `AWS_REGION` | Amazon Bedrock (region) |
|
||||
| `AWS_BEARER_TOKEN_BEDROCK` | Amazon Bedrock (bearer token) |
|
||||
| `ANTHROPIC_VERTEX_PROJECT_ID` | Vertex AI |
|
||||
| `GOOGLE_APPLICATION_CREDENTIALS` | Vertex AI (ADC) |
|
||||
| `AZURE_OPENAI_API_KEY` | Azure OpenAI |
|
||||
|
||||
## Tool API Keys
|
||||
|
||||
| Variable | Purpose |
|
||||
|----------|---------|
|
||||
| `TAVILY_API_KEY` | Tavily web search |
|
||||
| `BRAVE_API_KEY` | Brave web search |
|
||||
| `CONTEXT7_API_KEY` | Context7 documentation lookup |
|
||||
| `DISCORD_BOT_TOKEN` | Discord remote questions |
|
||||
| `TELEGRAM_BOT_TOKEN` | Telegram remote questions |
|
||||
|
||||
## URL Blocking
|
||||
|
||||
The `fetch_page` tool blocks requests to private/internal networks by default (SSRF protection). To allow specific internal hosts:
|
||||
|
||||
```bash
|
||||
export GSD_FETCH_ALLOWED_URLS="internal-docs.company.com,192.168.1.50"
|
||||
```
|
||||
|
||||
Or set `fetchAllowedUrls` in `~/.gsd/agent/settings.json`.
|
||||
|
||||
Blocked by default: private IP ranges, cloud metadata endpoints, localhost, non-HTTP protocols, IPv6 private ranges.
|
||||
33
gitbook/reference/keyboard-shortcuts.md
Normal file
33
gitbook/reference/keyboard-shortcuts.md
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# Keyboard Shortcuts
|
||||
|
||||
| Shortcut | Action |
|
||||
|----------|--------|
|
||||
| `Ctrl+Alt+G` | Toggle dashboard overlay |
|
||||
| `Ctrl+Alt+V` | Toggle voice transcription |
|
||||
| `Ctrl+Alt+B` | Show background shell processes |
|
||||
| `Ctrl+V` / `Alt+V` | Paste image from clipboard (screenshot → vision input) |
|
||||
| `Escape` | Pause auto mode (preserves conversation) |
|
||||
|
||||
## Terminal Compatibility
|
||||
|
||||
In terminals without Kitty keyboard protocol support (macOS Terminal.app, JetBrains IDEs), slash-command fallbacks are shown instead of `Ctrl+Alt` shortcuts.
|
||||
|
||||
{% hint style="tip" %}
|
||||
If `Ctrl+V` is intercepted by your terminal (e.g. Warp), use `Alt+V` instead for clipboard image paste.
|
||||
{% endhint %}
|
||||
|
||||
## iTerm2 Note
|
||||
|
||||
If `Ctrl+Alt` shortcuts trigger the wrong action (e.g., `Ctrl+Alt+G` opens external editor instead of the dashboard), go to **Profiles → Keys → General** and set **Left Option Key** to **Esc+**. This makes Alt/Option work correctly with Ctrl combinations.
|
||||
|
||||
## cmux Integration
|
||||
|
||||
If you use cmux (terminal multiplexer), GSD can integrate with it:
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/gsd cmux status` | Show cmux detection and capabilities |
|
||||
| `/gsd cmux on` / `off` | Enable/disable integration |
|
||||
| `/gsd cmux notifications on/off` | Toggle desktop notifications |
|
||||
| `/gsd cmux sidebar on/off` | Toggle sidebar metadata |
|
||||
| `/gsd cmux splits on/off` | Toggle visual subagent splits |
|
||||
48
gitbook/reference/migration.md
Normal file
48
gitbook/reference/migration.md
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Migration from v1
|
||||
|
||||
If you have projects with `.planning` directories from the original Get Shit Done (v1), you can migrate them to GSD-2's `.gsd` format.
|
||||
|
||||
## Running the Migration
|
||||
|
||||
```bash
|
||||
# From within the project directory
|
||||
/gsd migrate
|
||||
|
||||
# Or specify a path
|
||||
/gsd migrate ~/projects/my-old-project
|
||||
```
|
||||
|
||||
## What Gets Migrated
|
||||
|
||||
The migration tool:
|
||||
|
||||
- Parses your old `PROJECT.md`, `ROADMAP.md`, `REQUIREMENTS.md`, phase directories, plans, summaries, and research
|
||||
- Maps phases → slices, plans → tasks, milestones → milestones
|
||||
- Preserves completion state (`[x]` phases stay done, summaries carry over)
|
||||
- Consolidates research files into the new structure
|
||||
- Shows a preview before writing anything
|
||||
- Optionally runs an AI-driven review for quality assurance
|
||||
|
||||
## Supported Formats
|
||||
|
||||
The migration handles various v1 format variations:
|
||||
|
||||
- Milestone-sectioned roadmaps with `<details>` blocks
|
||||
- Bold phase entries
|
||||
- Bullet-format requirements
|
||||
- Decimal phase numbering
|
||||
- Duplicate phase numbers across milestones
|
||||
|
||||
## Requirements
|
||||
|
||||
Migration works best with a `ROADMAP.md` file for milestone structure. Without one, milestones are inferred from the `phases/` directory.
|
||||
|
||||
## Post-Migration
|
||||
|
||||
After migrating, verify the output:
|
||||
|
||||
```
|
||||
/gsd doctor
|
||||
```
|
||||
|
||||
This checks `.gsd/` integrity and flags any structural issues.
|
||||
151
gitbook/reference/troubleshooting.md
Normal file
151
gitbook/reference/troubleshooting.md
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
# Troubleshooting
|
||||
|
||||
## `/gsd doctor`
|
||||
|
||||
The built-in diagnostic tool validates `.gsd/` integrity:
|
||||
|
||||
```
|
||||
/gsd doctor
|
||||
```
|
||||
|
||||
It checks file structure, roadmap ↔ slice ↔ task consistency, completion state, git health, stale locks, and orphaned records.
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Auto mode loops on the same unit
|
||||
|
||||
The same unit dispatches repeatedly.
|
||||
|
||||
**Fix:** Run `/gsd doctor` to repair state, then `/gsd auto`. If it persists, check that the expected artifact file exists on disk.
|
||||
|
||||
### Auto mode stops with "Loop detected"
|
||||
|
||||
A unit failed to produce its expected artifact twice.
|
||||
|
||||
**Fix:** Check the task plan for clarity. Refine it manually, then `/gsd auto`.
|
||||
|
||||
### `command not found: gsd` after install
|
||||
|
||||
npm's global bin directory isn't in `$PATH`.
|
||||
|
||||
**Fix:**
|
||||
```bash
|
||||
npm prefix -g
|
||||
# Add the bin dir to PATH:
|
||||
echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc
|
||||
source ~/.zshrc
|
||||
```
|
||||
|
||||
**Common causes:**
|
||||
- **Homebrew Node** — `/opt/homebrew/bin` missing from PATH
|
||||
- **Version manager (nvm, fnm, mise)** — global bin is version-specific
|
||||
- **oh-my-zsh** — `gitfast` plugin aliases `gsd` to `git svn dcommit`; check with `alias gsd`
|
||||
|
||||
### Provider errors during auto mode
|
||||
|
||||
| Error Type | Auto-Resume? | Delay |
|
||||
|-----------|-------------|-------|
|
||||
| Rate limit (429) | Yes | 60s or retry-after header |
|
||||
| Server error (500, 502, 503) | Yes | 30s |
|
||||
| Auth/billing ("unauthorized") | No | Manual resume required |
|
||||
|
||||
For permanent errors, configure fallback models:
|
||||
|
||||
```yaml
|
||||
models:
|
||||
execution:
|
||||
model: claude-sonnet-4-6
|
||||
fallbacks:
|
||||
- openrouter/minimax/minimax-m2.5
|
||||
```
|
||||
|
||||
### Budget ceiling reached
|
||||
|
||||
Auto mode pauses with "Budget ceiling reached."
|
||||
|
||||
**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile, then `/gsd auto`.
|
||||
|
||||
### Stale lock file
|
||||
|
||||
Auto mode won't start, says another session is running.
|
||||
|
||||
**Fix:** GSD auto-detects stale locks (dead PID = auto cleanup). If automatic recovery fails:
|
||||
|
||||
```bash
|
||||
rm -f .gsd/auto.lock
|
||||
rm -rf "$(dirname .gsd)/.gsd.lock"
|
||||
```
|
||||
|
||||
### Git merge conflicts
|
||||
|
||||
Worktree merge fails on `.gsd/` files.
|
||||
|
||||
**Fix:** `.gsd/` conflicts are auto-resolved. Code conflicts get an AI fix attempt; if that fails, resolve manually.
|
||||
|
||||
### Notifications not appearing on macOS
|
||||
|
||||
**Fix:** Install `terminal-notifier`:
|
||||
|
||||
```bash
|
||||
brew install terminal-notifier
|
||||
```
|
||||
|
||||
See [Notifications](../configuration/notifications.md) for details.
|
||||
|
||||
## MCP Issues
|
||||
|
||||
### No servers configured
|
||||
|
||||
**Fix:** Add server to `.mcp.json` or `.gsd/mcp.json`, verify JSON is valid, run `mcp_servers(refresh=true)`.
|
||||
|
||||
### Server discovery times out
|
||||
|
||||
**Fix:** Run the configured command outside GSD to confirm it starts. Check that backend services are reachable.
|
||||
|
||||
### Server connection closed immediately
|
||||
|
||||
**Fix:** Verify `command` and `args` paths are correct and absolute. Run the command manually to catch errors.
|
||||
|
||||
## Recovery Procedures
|
||||
|
||||
### Reset auto mode state
|
||||
|
||||
```bash
|
||||
rm .gsd/auto.lock
|
||||
rm .gsd/completed-units.json
|
||||
```
|
||||
|
||||
Then `/gsd auto` to restart from current state.
|
||||
|
||||
### Reset routing history
|
||||
|
||||
```bash
|
||||
rm .gsd/routing-history.json
|
||||
```
|
||||
|
||||
### Full state rebuild
|
||||
|
||||
```
|
||||
/gsd doctor
|
||||
```
|
||||
|
||||
Rebuilds `STATE.md` from plan and roadmap files and fixes inconsistencies.
|
||||
|
||||
## Getting Help
|
||||
|
||||
- **GitHub Issues:** [github.com/gsd-build/GSD-2/issues](https://github.com/gsd-build/GSD-2/issues)
|
||||
- **Dashboard:** `Ctrl+Alt+G` or `/gsd status`
|
||||
- **Forensics:** `/gsd forensics` for post-mortem analysis
|
||||
- **Session logs:** `.gsd/activity/` contains JSONL session dumps
|
||||
|
||||
## Platform-Specific Issues
|
||||
|
||||
### iTerm2
|
||||
|
||||
`Ctrl+Alt` shortcuts trigger wrong actions → Set **Profiles → Keys → General → Left Option Key** to **Esc+**.
|
||||
|
||||
### Windows
|
||||
|
||||
- LSP ENOENT on MSYS2/Git Bash → Fixed in v2.29+, upgrade
|
||||
- EBUSY errors during builds → Close browser extension, or change output directory
|
||||
- Transient EBUSY/EPERM on `.gsd/` files → Retry; close file-locking tools if persistent
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@gsd-build/engine-darwin-arm64",
|
||||
"version": "2.68.0",
|
||||
"version": "2.71.0",
|
||||
"description": "GSD native engine binary for macOS ARM64",
|
||||
"os": [
|
||||
"darwin"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@gsd-build/engine-darwin-x64",
|
||||
"version": "2.68.0",
|
||||
"version": "2.71.0",
|
||||
"description": "GSD native engine binary for macOS Intel",
|
||||
"os": [
|
||||
"darwin"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@gsd-build/engine-linux-arm64-gnu",
|
||||
"version": "2.68.0",
|
||||
"version": "2.71.0",
|
||||
"description": "GSD native engine binary for Linux ARM64 (glibc)",
|
||||
"os": [
|
||||
"linux"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@gsd-build/engine-linux-x64-gnu",
|
||||
"version": "2.68.0",
|
||||
"version": "2.71.0",
|
||||
"description": "GSD native engine binary for Linux x64 (glibc)",
|
||||
"os": [
|
||||
"linux"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@gsd-build/engine-win32-x64-msvc",
|
||||
"version": "2.68.0",
|
||||
"version": "2.71.0",
|
||||
"description": "GSD native engine binary for Windows x64 (MSVC)",
|
||||
"os": [
|
||||
"win32"
|
||||
|
|
|
|||
22
package.json
22
package.json
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "gsd-pi",
|
||||
"version": "2.68.0",
|
||||
"version": "2.71.0",
|
||||
"description": "GSD — Get Shit Done coding agent",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
|
@ -56,22 +56,22 @@
|
|||
"copy-themes": "node scripts/copy-themes.cjs",
|
||||
"copy-export-html": "node scripts/copy-export-html.cjs",
|
||||
"test:compile": "node scripts/compile-tests.mjs",
|
||||
"test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test 'dist-test/src/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.mjs' 'dist-test/src/resources/extensions/shared/tests/*.test.js' 'dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js' 'dist-test/src/resources/extensions/github-sync/tests/*.test.js' 'dist-test/src/resources/extensions/universal-config/tests/*.test.js' 'dist-test/src/resources/extensions/voice/tests/*.test.js' 'dist-test/src/resources/extensions/mcp-client/tests/*.test.js'",
|
||||
"test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js",
|
||||
"test:marketplace": "GSD_TEST_CLONE_MARKETPLACES=1 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts",
|
||||
"test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts",
|
||||
"test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test 'src/tests/integration/*.test.ts' 'src/resources/extensions/gsd/tests/integration/*.test.ts' 'src/resources/extensions/async-jobs/*.test.ts' 'src/resources/extensions/browser-tools/tests/*.test.mjs'",
|
||||
"test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test \"dist-test/src/tests/*.test.js\" \"dist-test/src/resources/extensions/gsd/tests/*.test.js\" \"dist-test/src/resources/extensions/gsd/tests/*.test.mjs\" \"dist-test/src/resources/extensions/shared/tests/*.test.js\" \"dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js\" \"dist-test/src/resources/extensions/github-sync/tests/*.test.js\" \"dist-test/src/resources/extensions/universal-config/tests/*.test.js\" \"dist-test/src/resources/extensions/voice/tests/*.test.js\" \"dist-test/src/resources/extensions/mcp-client/tests/*.test.js\"",
|
||||
"test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js packages/pi-coding-agent/dist/core/tools/spawn-shell-windows.test.js",
|
||||
"test:marketplace": "node scripts/with-env.mjs GSD_TEST_CLONE_MARKETPLACES=1 -- node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts",
|
||||
"test:coverage": "c8 --reporter=text --reporter=lcov --exclude=\"src/resources/extensions/gsd/tests/**\" --exclude=\"src/tests/**\" --exclude=\"scripts/**\" --exclude=\"native/**\" --exclude=\"node_modules/**\" --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts",
|
||||
"test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test \"src/tests/integration/*.test.ts\" \"src/resources/extensions/gsd/tests/integration/*.test.ts\" \"src/resources/extensions/async-jobs/*.test.ts\" \"src/resources/extensions/browser-tools/tests/*.test.mjs\"",
|
||||
"pretest": "npm run typecheck:extensions",
|
||||
"test": "npm run test:unit && npm run test:integration",
|
||||
"test:smoke": "node --experimental-strip-types tests/smoke/run.ts",
|
||||
"test:fixtures": "node --experimental-strip-types tests/fixtures/run.ts",
|
||||
"test:fixtures:record": "GSD_FIXTURE_MODE=record node --experimental-strip-types tests/fixtures/record.ts",
|
||||
"test:live": "GSD_LIVE_TESTS=1 node --experimental-strip-types tests/live/run.ts",
|
||||
"test:fixtures:record": "node scripts/with-env.mjs GSD_FIXTURE_MODE=record -- node --experimental-strip-types tests/fixtures/record.ts",
|
||||
"test:live": "node scripts/with-env.mjs GSD_LIVE_TESTS=1 -- node --experimental-strip-types tests/live/run.ts",
|
||||
"test:browser-tools": "node --test src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs",
|
||||
"test:native": "node --test packages/native/src/__tests__/grep.test.mjs",
|
||||
"test:secret-scan": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/tests/secret-scan.test.ts",
|
||||
"secret-scan": "bash scripts/secret-scan.sh",
|
||||
"secret-scan:install-hook": "bash scripts/install-hooks.sh",
|
||||
"secret-scan": "node scripts/secret-scan.mjs",
|
||||
"secret-scan:install-hook": "node scripts/install-hooks.mjs",
|
||||
"build:native": "node native/scripts/build.js",
|
||||
"build:native:dev": "node native/scripts/build.js --dev",
|
||||
"dev": "node scripts/dev.js",
|
||||
|
|
@ -92,7 +92,7 @@
|
|||
"release:update-changelog": "node scripts/update-changelog.mjs",
|
||||
"docker:build-runtime": "docker build --target runtime -t ghcr.io/gsd-build/gsd-pi .",
|
||||
"docker:build-builder": "docker build --target builder -t ghcr.io/gsd-build/gsd-ci-builder .",
|
||||
"prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && ([ \"$CI\" = 'true' ] || git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1)) && npm run build && npm run typecheck:extensions && npm run validate-pack",
|
||||
"prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && node scripts/prepublish-check.mjs && npm run build && npm run typecheck:extensions && npm run validate-pack",
|
||||
"test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
|
|
|
|||
|
|
@ -12,9 +12,6 @@
|
|||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { readFileSync, writeFileSync, chmodSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { homedir } from 'node:os';
|
||||
import type Anthropic from '@anthropic-ai/sdk';
|
||||
import type {
|
||||
MessageParam,
|
||||
|
|
@ -30,90 +27,18 @@ import type { ProjectInfo, ManagedSession } from './types.js';
|
|||
import type { Logger } from './logger.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OAuth token resolution — reads GSD's auth.json, refreshes if expired
|
||||
// API key resolution — requires ANTHROPIC_API_KEY env var
|
||||
// Anthropic OAuth removed per TOS compliance (see docs/user-docs/claude-code-auth-compliance.md)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface OAuthCredentials {
|
||||
type: 'oauth';
|
||||
refresh: string;
|
||||
access: string;
|
||||
expires: number;
|
||||
}
|
||||
|
||||
const TOKEN_URL = 'https://platform.claude.com/v1/oauth/token';
|
||||
const CLIENT_ID = atob('OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl');
|
||||
|
||||
/**
|
||||
* Read the Anthropic OAuth access token from GSD's auth.json.
|
||||
* If expired, refresh it and write the new credentials back.
|
||||
* Falls back to ANTHROPIC_API_KEY env var if no OAuth credential exists.
|
||||
*/
|
||||
async function resolveAnthropicApiKey(logger?: Logger): Promise<string> {
|
||||
// Try env var first (explicit override)
|
||||
if (process.env.ANTHROPIC_API_KEY) {
|
||||
return process.env.ANTHROPIC_API_KEY;
|
||||
}
|
||||
|
||||
const authPath = join(homedir(), '.gsd', 'agent', 'auth.json');
|
||||
let authData: Record<string, unknown>;
|
||||
try {
|
||||
authData = JSON.parse(readFileSync(authPath, 'utf-8'));
|
||||
} catch {
|
||||
function resolveAnthropicApiKey(): string {
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
'No Anthropic auth found. Run `gsd login` to authenticate, or set ANTHROPIC_API_KEY.',
|
||||
'ANTHROPIC_API_KEY is required. Set it in your environment or run `gsd config`.',
|
||||
);
|
||||
}
|
||||
|
||||
const cred = authData.anthropic as OAuthCredentials | undefined;
|
||||
if (!cred || cred.type !== 'oauth' || !cred.access) {
|
||||
throw new Error(
|
||||
'No Anthropic OAuth credential in auth.json. Run `gsd login` to authenticate.',
|
||||
);
|
||||
}
|
||||
|
||||
// If token is still valid, use it
|
||||
if (Date.now() < cred.expires) {
|
||||
return cred.access;
|
||||
}
|
||||
|
||||
// Token expired — refresh it
|
||||
logger?.info('orchestrator: refreshing Anthropic OAuth token');
|
||||
const response = await fetch(TOKEN_URL, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
grant_type: 'refresh_token',
|
||||
client_id: CLIENT_ID,
|
||||
refresh_token: cred.refresh,
|
||||
}),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`Anthropic token refresh failed: ${error}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as {
|
||||
access_token: string;
|
||||
refresh_token: string;
|
||||
expires_in: number;
|
||||
};
|
||||
|
||||
const newCred: OAuthCredentials = {
|
||||
type: 'oauth',
|
||||
refresh: data.refresh_token,
|
||||
access: data.access_token,
|
||||
expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000,
|
||||
};
|
||||
|
||||
// Write back to auth.json
|
||||
authData.anthropic = newCred;
|
||||
writeFileSync(authPath, JSON.stringify(authData, null, 2), 'utf-8');
|
||||
chmodSync(authPath, 0o600);
|
||||
logger?.info('orchestrator: Anthropic OAuth token refreshed');
|
||||
|
||||
return newCred.access;
|
||||
return apiKey;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -254,11 +179,11 @@ export class Orchestrator {
|
|||
|
||||
/**
|
||||
* Lazily initialise the Anthropic client. Dynamic import handles K007 module resolution.
|
||||
* Resolves auth from GSD's OAuth credentials (auth.json), refreshing if needed.
|
||||
* Requires ANTHROPIC_API_KEY environment variable.
|
||||
*/
|
||||
private async getClient(): Promise<Anthropic> {
|
||||
if (this.client) return this.client;
|
||||
const apiKey = await resolveAnthropicApiKey(this.deps.logger);
|
||||
const apiKey = resolveAnthropicApiKey();
|
||||
const { default: AnthropicSDK } = await import('@anthropic-ai/sdk');
|
||||
this.client = new AnthropicSDK({ apiKey });
|
||||
return this.client;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ Start GSD auto-mode sessions, poll progress, resolve blockers, and retrieve resu
|
|||
This package now exposes two tool surfaces:
|
||||
|
||||
- session/read tools for starting and inspecting GSD sessions
|
||||
- workflow mutation tools for planning, completion, validation, reassessment, and gate persistence
|
||||
- MCP-native interactive tools for structured user input
|
||||
- headless-safe workflow tools for planning, completion, validation, reassessment, metadata persistence, and journal reads
|
||||
|
||||
## Installation
|
||||
|
||||
|
|
@ -74,18 +75,29 @@ Add to `.cursor/mcp.json`:
|
|||
|
||||
## Tools
|
||||
|
||||
### Workflow mutation tools
|
||||
### Workflow tools
|
||||
|
||||
The workflow MCP surface includes:
|
||||
|
||||
- `gsd_decision_save`
|
||||
- `gsd_save_decision`
|
||||
- `gsd_requirement_update`
|
||||
- `gsd_update_requirement`
|
||||
- `gsd_requirement_save`
|
||||
- `gsd_save_requirement`
|
||||
- `gsd_milestone_generate_id`
|
||||
- `gsd_generate_milestone_id`
|
||||
- `gsd_plan_milestone`
|
||||
- `gsd_plan_slice`
|
||||
- `gsd_plan_task`
|
||||
- `gsd_task_plan`
|
||||
- `gsd_replan_slice`
|
||||
- `gsd_slice_replan`
|
||||
- `gsd_task_complete`
|
||||
- `gsd_complete_task`
|
||||
- `gsd_slice_complete`
|
||||
- `gsd_complete_slice`
|
||||
- `gsd_skip_slice`
|
||||
- `gsd_validate_milestone`
|
||||
- `gsd_milestone_validate`
|
||||
- `gsd_complete_milestone`
|
||||
|
|
@ -95,13 +107,21 @@ The workflow MCP surface includes:
|
|||
- `gsd_save_gate_result`
|
||||
- `gsd_summary_save`
|
||||
- `gsd_milestone_status`
|
||||
- `gsd_journal_query`
|
||||
|
||||
These mutation tools use the same GSD workflow handlers as the native in-process tool path.
|
||||
These tools use the same GSD workflow handlers as the native in-process tool path wherever a shared handler exists.
|
||||
|
||||
### Interactive tools
|
||||
|
||||
The packaged server now exposes `ask_user_questions` through MCP form elicitation. This keeps the existing GSD answer payload shape while allowing Claude Code CLI and other elicitation-capable clients to surface structured user choices.
|
||||
|
||||
`secure_env_collect` is still not exposed by this package. That path needs MCP URL elicitation or an equivalent secure bridge because secrets should not flow through form elicitation.
|
||||
|
||||
Current support boundary:
|
||||
|
||||
- when running inside the GSD monorepo checkout, the MCP server auto-discovers the shared workflow executor module
|
||||
- outside the monorepo, set `GSD_WORKFLOW_EXECUTORS_MODULE` to an importable `workflow-tool-executors` module path if you want the mutation tools enabled
|
||||
- `ask_user_questions` requires an MCP client that supports form elicitation
|
||||
- session/read tools do not depend on this bridge
|
||||
|
||||
If the executor bridge cannot be loaded, workflow mutation calls will fail with a precise configuration error instead of silently degrading.
|
||||
|
|
@ -214,6 +234,8 @@ Resolve a pending blocker in a session by sending a response to the blocked UI r
|
|||
| `GSD_CLI_PATH` | Absolute path to the GSD CLI binary. If not set, the server resolves `gsd` via `which`. |
|
||||
| `GSD_WORKFLOW_EXECUTORS_MODULE` | Optional absolute path or `file:` URL for the shared GSD workflow executor module used by workflow mutation tools. |
|
||||
|
||||
The server also hydrates supported model-provider and tool credentials from `~/.gsd/agent/auth.json` on startup. Keys saved through `/gsd config` or `/gsd keys` become available to the MCP server process automatically, and any explicitly-set environment variable still wins.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* @gsd-build/mcp-server CLI — stdio transport entry point.
|
||||
*
|
||||
|
|
@ -9,13 +7,17 @@
|
|||
|
||||
import { SessionManager } from './session-manager.js';
|
||||
import { createMcpServer } from './server.js';
|
||||
import { loadStoredCredentialEnvKeys } from './tool-credentials.js';
|
||||
|
||||
const MCP_PKG = '@modelcontextprotocol/sdk';
|
||||
|
||||
async function main(): Promise<void> {
|
||||
loadStoredCredentialEnvKeys();
|
||||
|
||||
const sessionManager = new SessionManager();
|
||||
|
||||
// Create the configured MCP server with all 12 tools (6 session + 6 read-only)
|
||||
// Create the configured MCP server with session, interactive, read-only,
|
||||
// and workflow tools.
|
||||
const { server } = await createMcpServer(sessionManager);
|
||||
|
||||
// Dynamic import for StdioServerTransport (same TS subpath workaround)
|
||||
|
|
|
|||
280
packages/mcp-server/src/env-writer.test.ts
Normal file
280
packages/mcp-server/src/env-writer.test.ts
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
// @gsd-build/mcp-server — Tests for env-writer utilities
|
||||
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
|
||||
import { describe, it, afterEach } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import {
|
||||
checkExistingEnvKeys,
|
||||
detectDestination,
|
||||
writeEnvKey,
|
||||
applySecrets,
|
||||
isSafeEnvVarKey,
|
||||
isSupportedDeploymentEnvironment,
|
||||
shellEscapeSingle,
|
||||
} from './env-writer.js';
|
||||
|
||||
function makeTempDir(prefix: string): string {
|
||||
return mkdtempSync(join(tmpdir(), `${prefix}-`));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// checkExistingEnvKeys
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('checkExistingEnvKeys', () => {
|
||||
it('finds key in .env file', async () => {
|
||||
const tmp = makeTempDir('env-check');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
writeFileSync(envPath, 'API_KEY=secret123\nOTHER=val\n');
|
||||
const result = await checkExistingEnvKeys(['API_KEY'], envPath);
|
||||
assert.deepStrictEqual(result, ['API_KEY']);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('finds key in process.env', async () => {
|
||||
const tmp = makeTempDir('env-check');
|
||||
const saved = process.env.GSD_MCP_TEST_KEY_1;
|
||||
try {
|
||||
process.env.GSD_MCP_TEST_KEY_1 = 'some-value';
|
||||
const envPath = join(tmp, '.env');
|
||||
const result = await checkExistingEnvKeys(['GSD_MCP_TEST_KEY_1'], envPath);
|
||||
assert.deepStrictEqual(result, ['GSD_MCP_TEST_KEY_1']);
|
||||
} finally {
|
||||
delete process.env.GSD_MCP_TEST_KEY_1;
|
||||
if (saved !== undefined) process.env.GSD_MCP_TEST_KEY_1 = saved;
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns empty for missing keys', async () => {
|
||||
const tmp = makeTempDir('env-check');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
writeFileSync(envPath, 'OTHER=val\n');
|
||||
delete process.env.DEFINITELY_NOT_SET_MCP_XYZ;
|
||||
const result = await checkExistingEnvKeys(['DEFINITELY_NOT_SET_MCP_XYZ'], envPath);
|
||||
assert.deepStrictEqual(result, []);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('handles missing .env file gracefully', async () => {
|
||||
const tmp = makeTempDir('env-check');
|
||||
try {
|
||||
const envPath = join(tmp, 'nonexistent.env');
|
||||
delete process.env.DEFINITELY_NOT_SET_MCP_XYZ;
|
||||
const result = await checkExistingEnvKeys(['DEFINITELY_NOT_SET_MCP_XYZ'], envPath);
|
||||
assert.deepStrictEqual(result, []);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// detectDestination
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('detectDestination', () => {
|
||||
it('returns vercel when vercel.json exists', () => {
|
||||
const tmp = makeTempDir('dest');
|
||||
try {
|
||||
writeFileSync(join(tmp, 'vercel.json'), '{}');
|
||||
assert.equal(detectDestination(tmp), 'vercel');
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns convex when convex/ dir exists', () => {
|
||||
const tmp = makeTempDir('dest');
|
||||
try {
|
||||
mkdirSync(join(tmp, 'convex'));
|
||||
assert.equal(detectDestination(tmp), 'convex');
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns dotenv when neither exists', () => {
|
||||
const tmp = makeTempDir('dest');
|
||||
try {
|
||||
assert.equal(detectDestination(tmp), 'dotenv');
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('vercel takes priority over convex', () => {
|
||||
const tmp = makeTempDir('dest');
|
||||
try {
|
||||
writeFileSync(join(tmp, 'vercel.json'), '{}');
|
||||
mkdirSync(join(tmp, 'convex'));
|
||||
assert.equal(detectDestination(tmp), 'vercel');
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// writeEnvKey
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('writeEnvKey', () => {
|
||||
it('creates .env file with new key', async () => {
|
||||
const tmp = makeTempDir('write');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
await writeEnvKey(envPath, 'NEW_KEY', 'new-value');
|
||||
const content = readFileSync(envPath, 'utf8');
|
||||
assert.ok(content.includes('NEW_KEY=new-value'));
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('updates existing key in-place', async () => {
|
||||
const tmp = makeTempDir('write');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
writeFileSync(envPath, 'EXISTING=old\nOTHER=keep\n');
|
||||
await writeEnvKey(envPath, 'EXISTING', 'new');
|
||||
const content = readFileSync(envPath, 'utf8');
|
||||
assert.ok(content.includes('EXISTING=new'));
|
||||
assert.ok(content.includes('OTHER=keep'));
|
||||
assert.ok(!content.includes('old'));
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('escapes newlines in values', async () => {
|
||||
const tmp = makeTempDir('write');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
await writeEnvKey(envPath, 'MULTI', 'line1\nline2');
|
||||
const content = readFileSync(envPath, 'utf8');
|
||||
assert.ok(content.includes('MULTI=line1\\nline2'));
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects non-string values', async () => {
|
||||
const tmp = makeTempDir('write');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
await assert.rejects(
|
||||
() => writeEnvKey(envPath, 'KEY', undefined as unknown as string),
|
||||
/expects a string value/,
|
||||
);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// applySecrets (dotenv)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('applySecrets', () => {
|
||||
const savedKeys: Record<string, string | undefined> = {};
|
||||
|
||||
afterEach(() => {
|
||||
for (const [k, v] of Object.entries(savedKeys)) {
|
||||
if (v === undefined) delete process.env[k];
|
||||
else process.env[k] = v;
|
||||
}
|
||||
});
|
||||
|
||||
it('writes keys to .env and hydrates process.env', async () => {
|
||||
const tmp = makeTempDir('apply');
|
||||
const envPath = join(tmp, '.env');
|
||||
savedKeys.GSD_APPLY_TEST_A = process.env.GSD_APPLY_TEST_A;
|
||||
try {
|
||||
const { applied, errors } = await applySecrets(
|
||||
[{ key: 'GSD_APPLY_TEST_A', value: 'val-a' }],
|
||||
'dotenv',
|
||||
{ envFilePath: envPath },
|
||||
);
|
||||
assert.deepStrictEqual(applied, ['GSD_APPLY_TEST_A']);
|
||||
assert.deepStrictEqual(errors, []);
|
||||
assert.equal(process.env.GSD_APPLY_TEST_A, 'val-a');
|
||||
const content = readFileSync(envPath, 'utf8');
|
||||
assert.ok(content.includes('GSD_APPLY_TEST_A=val-a'));
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('returns errors for invalid vercel environment', async () => {
|
||||
const tmp = makeTempDir('apply');
|
||||
try {
|
||||
const { applied, errors } = await applySecrets(
|
||||
[{ key: 'KEY', value: 'val' }],
|
||||
'vercel',
|
||||
{
|
||||
envFilePath: join(tmp, '.env'),
|
||||
environment: 'staging' as 'development',
|
||||
execFn: async () => ({ code: 0, stderr: '' }),
|
||||
},
|
||||
);
|
||||
assert.deepStrictEqual(applied, []);
|
||||
assert.ok(errors[0]?.includes('unsupported'));
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('isSafeEnvVarKey', () => {
|
||||
it('accepts valid keys', () => {
|
||||
assert.ok(isSafeEnvVarKey('API_KEY'));
|
||||
assert.ok(isSafeEnvVarKey('_PRIVATE'));
|
||||
assert.ok(isSafeEnvVarKey('key123'));
|
||||
});
|
||||
|
||||
it('rejects invalid keys', () => {
|
||||
assert.ok(!isSafeEnvVarKey('123BAD'));
|
||||
assert.ok(!isSafeEnvVarKey('has-dash'));
|
||||
assert.ok(!isSafeEnvVarKey('has space'));
|
||||
assert.ok(!isSafeEnvVarKey(''));
|
||||
});
|
||||
});
|
||||
|
||||
describe('isSupportedDeploymentEnvironment', () => {
|
||||
it('accepts valid environments', () => {
|
||||
assert.ok(isSupportedDeploymentEnvironment('development'));
|
||||
assert.ok(isSupportedDeploymentEnvironment('preview'));
|
||||
assert.ok(isSupportedDeploymentEnvironment('production'));
|
||||
});
|
||||
|
||||
it('rejects invalid environments', () => {
|
||||
assert.ok(!isSupportedDeploymentEnvironment('staging'));
|
||||
assert.ok(!isSupportedDeploymentEnvironment('test'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('shellEscapeSingle', () => {
|
||||
it('wraps in single quotes', () => {
|
||||
assert.equal(shellEscapeSingle('hello'), "'hello'");
|
||||
});
|
||||
|
||||
it('escapes embedded single quotes', () => {
|
||||
assert.equal(shellEscapeSingle("it's"), "'it'\\''s'");
|
||||
});
|
||||
});
|
||||
183
packages/mcp-server/src/env-writer.ts
Normal file
183
packages/mcp-server/src/env-writer.ts
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
// @gsd-build/mcp-server — Environment variable write utilities
|
||||
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
//
|
||||
// Shared helpers for writing env vars to .env files, detecting project
|
||||
// destinations, and checking existing keys. Used by secure_env_collect
|
||||
// MCP tool. No TUI dependencies — pure filesystem + process.env operations.
|
||||
|
||||
import { readFile, writeFile } from "node:fs/promises";
|
||||
import { existsSync, statSync } from "node:fs";
|
||||
import { resolve } from "node:path";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// checkExistingEnvKeys
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Check which keys already exist in a .env file or process.env.
|
||||
* Returns the subset of `keys` that are already set.
|
||||
*/
|
||||
export async function checkExistingEnvKeys(keys: string[], envFilePath: string): Promise<string[]> {
|
||||
let fileContent = "";
|
||||
try {
|
||||
fileContent = await readFile(envFilePath, "utf8");
|
||||
} catch {
|
||||
// ENOENT or other read error — proceed with empty content
|
||||
}
|
||||
|
||||
const existing: string[] = [];
|
||||
for (const key of keys) {
|
||||
const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
const regex = new RegExp(`^${escaped}\\s*=`, "m");
|
||||
if (regex.test(fileContent) || key in process.env) {
|
||||
existing.push(key);
|
||||
}
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// detectDestination
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detect the write destination based on project files in basePath.
|
||||
* Priority: vercel.json → convex/ dir → fallback "dotenv".
|
||||
*/
|
||||
export function detectDestination(basePath: string): "dotenv" | "vercel" | "convex" {
|
||||
if (existsSync(resolve(basePath, "vercel.json"))) {
|
||||
return "vercel";
|
||||
}
|
||||
const convexPath = resolve(basePath, "convex");
|
||||
try {
|
||||
if (existsSync(convexPath) && statSync(convexPath).isDirectory()) {
|
||||
return "convex";
|
||||
}
|
||||
} catch {
|
||||
// stat error — treat as not found
|
||||
}
|
||||
return "dotenv";
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// writeEnvKey
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Write a single key=value pair to a .env file.
|
||||
* Updates existing keys in-place, appends new ones at the end.
|
||||
*/
|
||||
export async function writeEnvKey(filePath: string, key: string, value: string): Promise<void> {
|
||||
if (typeof value !== "string") {
|
||||
throw new TypeError(`writeEnvKey expects a string value for key "${key}", got ${typeof value}`);
|
||||
}
|
||||
let content = "";
|
||||
try {
|
||||
content = await readFile(filePath, "utf8");
|
||||
} catch {
|
||||
content = "";
|
||||
}
|
||||
const escaped = value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/\r/g, "");
|
||||
const line = `${key}=${escaped}`;
|
||||
const regex = new RegExp(`^${key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*=.*$`, "m");
|
||||
if (regex.test(content)) {
|
||||
content = content.replace(regex, line);
|
||||
} else {
|
||||
if (content.length > 0 && !content.endsWith("\n")) content += "\n";
|
||||
content += `${line}\n`;
|
||||
}
|
||||
await writeFile(filePath, content, "utf8");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function isSafeEnvVarKey(key: string): boolean {
|
||||
return /^[A-Za-z_][A-Za-z0-9_]*$/.test(key);
|
||||
}
|
||||
|
||||
export function isSupportedDeploymentEnvironment(env: string): boolean {
|
||||
return env === "development" || env === "preview" || env === "production";
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shell helpers (for vercel/convex CLI)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function shellEscapeSingle(value: string): string {
|
||||
return `'${value.replace(/'/g, `'\\''`)}'`;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// applySecrets
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface ApplyResult {
|
||||
applied: string[];
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply collected secrets to the target destination.
|
||||
* Dotenv writes are handled directly; vercel/convex shell out via execFn.
|
||||
*/
|
||||
export async function applySecrets(
|
||||
provided: Array<{ key: string; value: string }>,
|
||||
destination: "dotenv" | "vercel" | "convex",
|
||||
opts: {
|
||||
envFilePath: string;
|
||||
environment?: string;
|
||||
execFn?: (cmd: string, args: string[]) => Promise<{ code: number; stderr: string }>;
|
||||
},
|
||||
): Promise<ApplyResult> {
|
||||
const applied: string[] = [];
|
||||
const errors: string[] = [];
|
||||
|
||||
if (destination === "dotenv") {
|
||||
for (const { key, value } of provided) {
|
||||
try {
|
||||
await writeEnvKey(opts.envFilePath, key, value);
|
||||
applied.push(key);
|
||||
// Hydrate process.env so the current session sees the new value
|
||||
process.env[key] = value;
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
errors.push(`${key}: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((destination === "vercel" || destination === "convex") && opts.execFn) {
|
||||
const env = opts.environment ?? "development";
|
||||
if (!isSupportedDeploymentEnvironment(env)) {
|
||||
errors.push(`environment: unsupported target environment "${env}"`);
|
||||
return { applied, errors };
|
||||
}
|
||||
for (const { key, value } of provided) {
|
||||
if (!isSafeEnvVarKey(key)) {
|
||||
errors.push(`${key}: invalid environment variable name`);
|
||||
continue;
|
||||
}
|
||||
const cmd = destination === "vercel"
|
||||
? `printf %s ${shellEscapeSingle(value)} | vercel env add ${key} ${env}`
|
||||
: "";
|
||||
try {
|
||||
const result = destination === "vercel"
|
||||
? await opts.execFn("sh", ["-c", cmd])
|
||||
: await opts.execFn("npx", ["convex", "env", "set", key, value]);
|
||||
if (result.code !== 0) {
|
||||
errors.push(`${key}: ${result.stderr.slice(0, 200)}`);
|
||||
} else {
|
||||
applied.push(key);
|
||||
process.env[key] = value;
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
errors.push(`${key}: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { applied, errors };
|
||||
}
|
||||
48
packages/mcp-server/src/import-candidates.test.ts
Normal file
48
packages/mcp-server/src/import-candidates.test.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
// GSD-2 — Regression tests for importLocalModule candidate resolution (#3954)
|
||||
import { describe, it } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import { _buildImportCandidates } from "./workflow-tools.js";
|
||||
|
||||
describe("_buildImportCandidates", () => {
|
||||
it("includes dist/ fallback for src/ paths", () => {
|
||||
const candidates = _buildImportCandidates("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
assert.ok(
|
||||
candidates.some((c) => c.includes("/dist/resources/extensions/gsd/db-writer.js")),
|
||||
"should include dist/ swapped candidate",
|
||||
);
|
||||
});
|
||||
|
||||
it("includes src/ fallback for dist/ paths", () => {
|
||||
const candidates = _buildImportCandidates("../../../dist/resources/extensions/gsd/db-writer.js");
|
||||
assert.ok(
|
||||
candidates.some((c) => c.includes("/src/resources/extensions/gsd/db-writer.js")),
|
||||
"should include src/ swapped candidate",
|
||||
);
|
||||
});
|
||||
|
||||
it("includes .ts variants for .js paths", () => {
|
||||
const candidates = _buildImportCandidates("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
assert.ok(
|
||||
candidates.some((c) => c.endsWith("db-writer.ts") && c.includes("/src/")),
|
||||
"should include .ts variant for original src/ path",
|
||||
);
|
||||
assert.ok(
|
||||
candidates.some((c) => c.endsWith("db-writer.ts") && c.includes("/dist/")),
|
||||
"should include .ts variant for swapped dist/ path",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns original path first", () => {
|
||||
const input = "../../../src/resources/extensions/gsd/db-writer.js";
|
||||
const candidates = _buildImportCandidates(input);
|
||||
assert.equal(candidates[0], input, "first candidate should be the original path");
|
||||
});
|
||||
|
||||
it("handles paths without src/ or dist/ gracefully", () => {
|
||||
const candidates = _buildImportCandidates("./local-module.js");
|
||||
assert.equal(candidates.length, 2, "should have original + .ts variant only");
|
||||
assert.equal(candidates[0], "./local-module.js");
|
||||
assert.equal(candidates[1], "./local-module.ts");
|
||||
});
|
||||
});
|
||||
|
|
@ -16,7 +16,11 @@ import { resolve } from 'node:path';
|
|||
import { EventEmitter } from 'node:events';
|
||||
|
||||
import { SessionManager } from './session-manager.js';
|
||||
import { createMcpServer } from './server.js';
|
||||
import {
|
||||
buildAskUserQuestionsElicitRequest,
|
||||
createMcpServer,
|
||||
formatAskUserQuestionsElicitResult,
|
||||
} from './server.js';
|
||||
import { MAX_EVENTS } from './types.js';
|
||||
import type { ManagedSession, CostAccumulator, PendingBlocker } from './types.js';
|
||||
|
||||
|
|
@ -574,6 +578,8 @@ describe('createMcpServer tool registration', () => {
|
|||
it('creates server successfully with all required methods', async () => {
|
||||
const { server } = await createMcpServer(sm);
|
||||
assert.ok(server);
|
||||
assert.ok(server.server);
|
||||
assert.equal(typeof server.server.elicitInput, 'function');
|
||||
assert.ok(typeof server.connect === 'function');
|
||||
assert.ok(typeof server.close === 'function');
|
||||
});
|
||||
|
|
@ -625,4 +631,82 @@ describe('createMcpServer tool registration', () => {
|
|||
const session = sm.getSession(sessionId)!;
|
||||
assert.equal(session.status, 'cancelled');
|
||||
});
|
||||
|
||||
it('buildAskUserQuestionsElicitRequest adds None of the above note field for single-select questions', () => {
|
||||
const request = buildAskUserQuestionsElicitRequest([
|
||||
{
|
||||
id: 'depth_verification_M001',
|
||||
header: 'Depth Check',
|
||||
question: 'Did I capture the depth right?',
|
||||
options: [
|
||||
{ label: 'Yes, you got it (Recommended)', description: 'Continue with the current summary.' },
|
||||
{ label: 'Not quite', description: 'I need to clarify the depth further.' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'focus_areas',
|
||||
header: 'Focus',
|
||||
question: 'Which areas matter most?',
|
||||
allowMultiple: true,
|
||||
options: [
|
||||
{ label: 'Frontend', description: 'Prioritize the UI.' },
|
||||
{ label: 'Backend', description: 'Prioritize server logic.' },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
assert.equal(request.mode, 'form');
|
||||
assert.deepEqual(request.requestedSchema.required, ['depth_verification_M001', 'focus_areas']);
|
||||
assert.ok(request.requestedSchema.properties['depth_verification_M001']);
|
||||
assert.ok(request.requestedSchema.properties['depth_verification_M001__note']);
|
||||
assert.ok(!request.requestedSchema.properties['focus_areas__note']);
|
||||
});
|
||||
|
||||
it('formatAskUserQuestionsElicitResult preserves the existing answers JSON shape', () => {
|
||||
const result = formatAskUserQuestionsElicitResult(
|
||||
[
|
||||
{
|
||||
id: 'depth_verification_M001',
|
||||
header: 'Depth Check',
|
||||
question: 'Did I capture the depth right?',
|
||||
options: [
|
||||
{ label: 'Yes, you got it (Recommended)', description: 'Continue with the current summary.' },
|
||||
{ label: 'Not quite', description: 'I need to clarify the depth further.' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'focus_areas',
|
||||
header: 'Focus',
|
||||
question: 'Which areas matter most?',
|
||||
allowMultiple: true,
|
||||
options: [
|
||||
{ label: 'Frontend', description: 'Prioritize the UI.' },
|
||||
{ label: 'Backend', description: 'Prioritize server logic.' },
|
||||
],
|
||||
},
|
||||
],
|
||||
{
|
||||
action: 'accept',
|
||||
content: {
|
||||
depth_verification_M001: 'None of the above',
|
||||
depth_verification_M001__note: 'Need more implementation detail.',
|
||||
focus_areas: ['Frontend', 'Backend'],
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
result,
|
||||
JSON.stringify({
|
||||
answers: {
|
||||
depth_verification_M001: {
|
||||
answers: ['None of the above', 'user_note: Need more implementation detail.'],
|
||||
},
|
||||
focus_areas: {
|
||||
answers: ['Frontend', 'Backend'],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
265
packages/mcp-server/src/secure-env-collect.test.ts
Normal file
265
packages/mcp-server/src/secure-env-collect.test.ts
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
// @gsd-build/mcp-server — Tests for secure_env_collect MCP tool
|
||||
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
//
|
||||
// Tests the secure_env_collect tool registered in createMcpServer.
|
||||
// Uses a mock MCP server to intercept tool registration and elicitInput calls.
|
||||
|
||||
import { describe, it, beforeEach } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import { createMcpServer } from './server.js';
|
||||
import { SessionManager } from './session-manager.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock infrastructure
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* We intercept McpServer construction by monkey-patching the dynamic import.
|
||||
* Instead, we'll test the tool handler indirectly through the exported
|
||||
* createMcpServer function — capturing the registered tool handlers.
|
||||
*
|
||||
* Since createMcpServer dynamically imports McpServer, we need to test at
|
||||
* a level that exercises the tool handler logic. We do this by extracting
|
||||
* the tool handler through the server.tool() calls.
|
||||
*/
|
||||
|
||||
interface RegisteredTool {
|
||||
name: string;
|
||||
description: string;
|
||||
params: Record<string, unknown>;
|
||||
handler: (args: Record<string, unknown>) => Promise<unknown>;
|
||||
}
|
||||
|
||||
interface ToolResult {
|
||||
content?: Array<{ type: string; text: string }>;
|
||||
isError?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock McpServer that captures tool registrations and provides
|
||||
* a controllable elicitInput response.
|
||||
*/
|
||||
class MockMcpServer {
|
||||
registeredTools: RegisteredTool[] = [];
|
||||
elicitResponse: { action: string; content?: Record<string, unknown> } = { action: 'accept', content: {} };
|
||||
|
||||
server = {
|
||||
elicitInput: async (_params: unknown) => {
|
||||
return this.elicitResponse;
|
||||
},
|
||||
};
|
||||
|
||||
tool(name: string, description: string, params: Record<string, unknown>, handler: (args: Record<string, unknown>) => Promise<unknown>) {
|
||||
this.registeredTools.push({ name, description, params, handler });
|
||||
}
|
||||
|
||||
async connect(_transport: unknown) { /* no-op */ }
|
||||
async close() { /* no-op */ }
|
||||
|
||||
getToolHandler(name: string): ((args: Record<string, unknown>) => Promise<unknown>) | undefined {
|
||||
return this.registeredTools.find((t) => t.name === name)?.handler;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper to create a mock MCP server with secure_env_collect registered
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Since createMcpServer uses dynamic import for McpServer, we can't easily
|
||||
* mock it. Instead, we test the env-writer utilities directly (in env-writer.test.ts)
|
||||
* and test the tool integration by verifying:
|
||||
* 1. The tool exists in the registered tools list
|
||||
* 2. The handler produces correct results with mock data
|
||||
*
|
||||
* For handler-level testing, we create a standalone test that replicates
|
||||
* the tool handler logic with a controllable mock.
|
||||
*/
|
||||
|
||||
function makeTempDir(prefix: string): string {
|
||||
return mkdtempSync(join(tmpdir(), `${prefix}-`));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integration test — verify tool is registered
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('secure_env_collect tool registration', () => {
|
||||
it('createMcpServer registers secure_env_collect tool', async () => {
|
||||
// This test verifies the tool exists — createMcpServer internally calls
|
||||
// server.tool('secure_env_collect', ...) which we can't intercept without
|
||||
// module mocking, but we can verify the server creates successfully
|
||||
const sm = new SessionManager();
|
||||
try {
|
||||
const { server } = await createMcpServer(sm);
|
||||
assert.ok(server, 'server should be created');
|
||||
// The McpServer internally tracks registered tools — we verify no error
|
||||
} finally {
|
||||
await sm.cleanup();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Handler logic tests — using env-writer directly to test the flow
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('secure_env_collect handler logic', () => {
|
||||
it('skips keys that already exist in .env', async () => {
|
||||
const tmp = makeTempDir('sec-collect');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
writeFileSync(envPath, 'ALREADY_SET=existing-value\n');
|
||||
|
||||
// Import the utility directly to test the pre-check logic
|
||||
const { checkExistingEnvKeys } = await import('./env-writer.js');
|
||||
const existing = await checkExistingEnvKeys(['ALREADY_SET', 'NEW_KEY'], envPath);
|
||||
assert.deepStrictEqual(existing, ['ALREADY_SET']);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('writes collected values to .env without returning secret values', async () => {
|
||||
const tmp = makeTempDir('sec-collect');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
const savedKey = process.env.SEC_COLLECT_TEST_KEY;
|
||||
|
||||
const { applySecrets } = await import('./env-writer.js');
|
||||
const { applied, errors } = await applySecrets(
|
||||
[{ key: 'SEC_COLLECT_TEST_KEY', value: 'super-secret-value' }],
|
||||
'dotenv',
|
||||
{ envFilePath: envPath },
|
||||
);
|
||||
|
||||
assert.deepStrictEqual(applied, ['SEC_COLLECT_TEST_KEY']);
|
||||
assert.deepStrictEqual(errors, []);
|
||||
|
||||
// Verify the value was written
|
||||
const content = readFileSync(envPath, 'utf8');
|
||||
assert.ok(content.includes('SEC_COLLECT_TEST_KEY=super-secret-value'));
|
||||
|
||||
// Verify process.env was hydrated
|
||||
assert.equal(process.env.SEC_COLLECT_TEST_KEY, 'super-secret-value');
|
||||
|
||||
// Cleanup
|
||||
if (savedKey === undefined) delete process.env.SEC_COLLECT_TEST_KEY;
|
||||
else process.env.SEC_COLLECT_TEST_KEY = savedKey;
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('auto-detects vercel destination from vercel.json', async () => {
|
||||
const tmp = makeTempDir('sec-collect');
|
||||
try {
|
||||
writeFileSync(join(tmp, 'vercel.json'), '{}');
|
||||
const { detectDestination } = await import('./env-writer.js');
|
||||
assert.equal(detectDestination(tmp), 'vercel');
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('handles empty form values as skipped', async () => {
|
||||
// Simulate what happens when user leaves a field empty in the form
|
||||
const formContent: Record<string, string> = {
|
||||
'API_KEY': 'provided-value',
|
||||
'OPTIONAL_KEY': '', // empty = skip
|
||||
};
|
||||
|
||||
const provided: Array<{ key: string; value: string }> = [];
|
||||
const skipped: string[] = [];
|
||||
|
||||
for (const [key, raw] of Object.entries(formContent)) {
|
||||
const value = typeof raw === 'string' ? raw.trim() : '';
|
||||
if (value.length > 0) {
|
||||
provided.push({ key, value });
|
||||
} else {
|
||||
skipped.push(key);
|
||||
}
|
||||
}
|
||||
|
||||
assert.deepStrictEqual(provided, [{ key: 'API_KEY', value: 'provided-value' }]);
|
||||
assert.deepStrictEqual(skipped, ['OPTIONAL_KEY']);
|
||||
});
|
||||
|
||||
it('result text never contains secret values', async () => {
|
||||
const tmp = makeTempDir('sec-collect');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
const savedKey = process.env.RESULT_TEXT_TEST;
|
||||
|
||||
const { applySecrets } = await import('./env-writer.js');
|
||||
const { applied } = await applySecrets(
|
||||
[{ key: 'RESULT_TEXT_TEST', value: 'sk-super-secret-abc123' }],
|
||||
'dotenv',
|
||||
{ envFilePath: envPath },
|
||||
);
|
||||
|
||||
// Simulate building result text (same logic as the tool handler)
|
||||
const lines: string[] = [
|
||||
'destination: dotenv (auto-detected)',
|
||||
...applied.map((k) => `✓ ${k}: applied`),
|
||||
];
|
||||
const resultText = lines.join('\n');
|
||||
|
||||
// The result MUST NOT contain the secret value
|
||||
assert.ok(!resultText.includes('sk-super-secret-abc123'), 'result text must not contain secret value');
|
||||
assert.ok(resultText.includes('RESULT_TEXT_TEST'), 'result text should contain key name');
|
||||
|
||||
// Cleanup
|
||||
if (savedKey === undefined) delete process.env.RESULT_TEXT_TEST;
|
||||
else process.env.RESULT_TEXT_TEST = savedKey;
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('handles multiple keys with mixed existing/new/skipped', async () => {
|
||||
const tmp = makeTempDir('sec-collect');
|
||||
try {
|
||||
const envPath = join(tmp, '.env');
|
||||
writeFileSync(envPath, 'EXISTING_A=already-here\n');
|
||||
const savedB = process.env.NEW_B;
|
||||
const savedC = process.env.SKIP_C;
|
||||
|
||||
const { checkExistingEnvKeys, applySecrets } = await import('./env-writer.js');
|
||||
|
||||
const allKeys = ['EXISTING_A', 'NEW_B', 'SKIP_C'];
|
||||
const existing = await checkExistingEnvKeys(allKeys, envPath);
|
||||
assert.deepStrictEqual(existing, ['EXISTING_A']);
|
||||
|
||||
// Simulate form response: NEW_B has value, SKIP_C is empty
|
||||
const formContent = { NEW_B: 'new-value', SKIP_C: '' };
|
||||
const provided: Array<{ key: string; value: string }> = [];
|
||||
const skipped: string[] = [];
|
||||
|
||||
for (const key of allKeys.filter((k) => !existing.includes(k))) {
|
||||
const raw = formContent[key as keyof typeof formContent] ?? '';
|
||||
if (raw.trim().length > 0) provided.push({ key, value: raw.trim() });
|
||||
else skipped.push(key);
|
||||
}
|
||||
|
||||
const { applied, errors } = await applySecrets(provided, 'dotenv', { envFilePath: envPath });
|
||||
|
||||
assert.deepStrictEqual(applied, ['NEW_B']);
|
||||
assert.deepStrictEqual(skipped, ['SKIP_C']);
|
||||
assert.deepStrictEqual(errors, []);
|
||||
assert.deepStrictEqual(existing, ['EXISTING_A']);
|
||||
|
||||
// Cleanup
|
||||
if (savedB === undefined) delete process.env.NEW_B;
|
||||
else process.env.NEW_B = savedB;
|
||||
if (savedC === undefined) delete process.env.SKIP_C;
|
||||
else process.env.SKIP_C = savedC;
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -2,8 +2,9 @@
|
|||
* MCP Server — registers GSD orchestration, project-state, and workflow tools.
|
||||
*
|
||||
* Session tools (6): gsd_execute, gsd_status, gsd_result, gsd_cancel, gsd_query, gsd_resolve_blocker
|
||||
* Interactive tools (2): ask_user_questions, secure_env_collect via MCP form elicitation
|
||||
* Read-only tools (6): gsd_progress, gsd_roadmap, gsd_history, gsd_doctor, gsd_captures, gsd_knowledge
|
||||
* Workflow tools (17): planning, replanning, completion, validation, reassessment, gate result, and milestone status tools
|
||||
* Workflow tools (29): headless-safe planning, metadata persistence, replanning, completion, validation, reassessment, gate result, status, and journal tools
|
||||
*
|
||||
* Uses dynamic imports for @modelcontextprotocol/sdk because TS Node16
|
||||
* cannot resolve the SDK's subpath exports statically (same pattern as
|
||||
|
|
@ -21,6 +22,7 @@ import { readCaptures } from './readers/captures.js';
|
|||
import { readKnowledge } from './readers/knowledge.js';
|
||||
import { runDoctorLite } from './readers/doctor-lite.js';
|
||||
import { registerWorkflowTools } from './workflow-tools.js';
|
||||
import { applySecrets, checkExistingEnvKeys, detectDestination } from './env-writer.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
|
|
@ -44,6 +46,11 @@ function errorContent(message: string): { isError: true; content: Array<{ type:
|
|||
return { isError: true, content: [{ type: 'text' as const, text: message }] };
|
||||
}
|
||||
|
||||
/** Return raw text content without JSON wrapping. */
|
||||
function textContent(text: string): { content: Array<{ type: 'text'; text: string }> } {
|
||||
return { content: [{ type: 'text' as const, text }] };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// gsd_query filesystem reader
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -106,12 +113,172 @@ async function fileExists(path: string): Promise<boolean> {
|
|||
// MCP Server type — minimal interface for the dynamically-imported McpServer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface ElicitResult {
|
||||
action: 'accept' | 'decline' | 'cancel';
|
||||
content?: Record<string, string | number | boolean | string[]>;
|
||||
}
|
||||
|
||||
interface ElicitRequestFormParams {
|
||||
mode?: 'form';
|
||||
message: string;
|
||||
requestedSchema: {
|
||||
type: 'object';
|
||||
properties: Record<string, Record<string, unknown>>;
|
||||
required?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
interface McpServerInstance {
|
||||
tool(name: string, description: string, params: Record<string, unknown>, handler: (args: Record<string, unknown>) => Promise<unknown>): unknown;
|
||||
server: {
|
||||
elicitInput(
|
||||
params: AskUserQuestionsElicitRequest | ElicitRequestFormParams,
|
||||
options?: unknown,
|
||||
): Promise<AskUserQuestionsElicitResult>;
|
||||
};
|
||||
connect(transport: unknown): Promise<void>;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
interface AskUserQuestionOption {
|
||||
label: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
interface AskUserQuestion {
|
||||
id: string;
|
||||
header: string;
|
||||
question: string;
|
||||
options: AskUserQuestionOption[];
|
||||
allowMultiple?: boolean;
|
||||
}
|
||||
|
||||
interface AskUserQuestionsParams {
|
||||
questions: AskUserQuestion[];
|
||||
}
|
||||
|
||||
type AskUserQuestionsContentValue = string | number | boolean | string[];
|
||||
|
||||
interface AskUserQuestionsElicitResult {
|
||||
action: 'accept' | 'decline' | 'cancel';
|
||||
content?: Record<string, AskUserQuestionsContentValue>;
|
||||
}
|
||||
|
||||
interface AskUserQuestionsElicitRequest {
|
||||
mode: 'form';
|
||||
message: string;
|
||||
requestedSchema: {
|
||||
type: 'object';
|
||||
properties: Record<string, Record<string, unknown>>;
|
||||
required?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
const OTHER_OPTION_LABEL = 'None of the above';
|
||||
|
||||
function normalizeAskUserQuestionsNote(value: AskUserQuestionsContentValue | undefined): string {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
function normalizeAskUserQuestionsAnswers(
|
||||
value: AskUserQuestionsContentValue | undefined,
|
||||
allowMultiple: boolean,
|
||||
): string[] {
|
||||
if (allowMultiple) {
|
||||
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
|
||||
}
|
||||
|
||||
return typeof value === 'string' && value.length > 0 ? [value] : [];
|
||||
}
|
||||
|
||||
function validateAskUserQuestionsPayload(questions: AskUserQuestion[]): string | null {
|
||||
if (questions.length === 0 || questions.length > 3) {
|
||||
return 'Error: questions must contain 1-3 items';
|
||||
}
|
||||
|
||||
for (const question of questions) {
|
||||
if (!question.options || question.options.length === 0) {
|
||||
return `Error: ask_user_questions requires non-empty options for every question (question "${question.id}" has none)`;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function buildAskUserQuestionsElicitRequest(questions: AskUserQuestion[]): AskUserQuestionsElicitRequest {
|
||||
const properties: Record<string, Record<string, unknown>> = {};
|
||||
const required = questions.map((question) => question.id);
|
||||
|
||||
for (const question of questions) {
|
||||
if (question.allowMultiple) {
|
||||
properties[question.id] = {
|
||||
type: 'array',
|
||||
title: question.header,
|
||||
description: question.question,
|
||||
minItems: 1,
|
||||
maxItems: question.options.length,
|
||||
items: {
|
||||
anyOf: question.options.map((option) => ({
|
||||
const: option.label,
|
||||
title: option.label,
|
||||
})),
|
||||
},
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
properties[question.id] = {
|
||||
type: 'string',
|
||||
title: question.header,
|
||||
description: question.question,
|
||||
oneOf: [...question.options, { label: OTHER_OPTION_LABEL, description: 'Choose this when the listed options do not fit.' }].map((option) => ({
|
||||
const: option.label,
|
||||
title: option.label,
|
||||
})),
|
||||
};
|
||||
|
||||
properties[`${question.id}__note`] = {
|
||||
type: 'string',
|
||||
title: `${question.header} Note`,
|
||||
description: `Optional note for "${OTHER_OPTION_LABEL}".`,
|
||||
maxLength: 500,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
mode: 'form',
|
||||
message: 'Please answer the following question(s). For single-select questions, choose "None of the above" and add a note if the provided options do not fit.',
|
||||
requestedSchema: {
|
||||
type: 'object',
|
||||
properties,
|
||||
required,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function formatAskUserQuestionsElicitResult(
|
||||
questions: AskUserQuestion[],
|
||||
result: AskUserQuestionsElicitResult,
|
||||
): string {
|
||||
const answers: Record<string, { answers: string[] }> = {};
|
||||
const content = result.content ?? {};
|
||||
|
||||
for (const question of questions) {
|
||||
const answerList = normalizeAskUserQuestionsAnswers(content[question.id], !!question.allowMultiple);
|
||||
|
||||
if (!question.allowMultiple && answerList[0] === OTHER_OPTION_LABEL) {
|
||||
const note = normalizeAskUserQuestionsNote(content[`${question.id}__note`]);
|
||||
if (note) {
|
||||
answerList.push(`user_note: ${note}`);
|
||||
}
|
||||
}
|
||||
|
||||
answers[question.id] = { answers: answerList };
|
||||
}
|
||||
|
||||
return JSON.stringify({ answers });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// createMcpServer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -131,7 +298,7 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{
|
|||
|
||||
const server: McpServerInstance = new McpServer(
|
||||
{ name: SERVER_NAME, version: SERVER_VERSION },
|
||||
{ capabilities: { tools: {} } },
|
||||
{ capabilities: { tools: {}, elicitation: {} } },
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -285,6 +452,160 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{
|
|||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ask_user_questions — structured user input via MCP form elicitation
|
||||
// -----------------------------------------------------------------------
|
||||
server.tool(
|
||||
'ask_user_questions',
|
||||
'Request user input for one to three short questions and wait for the response. Single-select questions include a free-form "None of the above" path. Multi-select questions allow multiple choices.',
|
||||
{
|
||||
questions: z.array(z.object({
|
||||
id: z.string().describe('Stable identifier for mapping answers (snake_case)'),
|
||||
header: z.string().describe('Short header label shown in the UI (12 or fewer chars)'),
|
||||
question: z.string().describe('Single-sentence prompt shown to the user'),
|
||||
options: z.array(z.object({
|
||||
label: z.string().describe('User-facing label (1-5 words)'),
|
||||
description: z.string().describe('One short sentence explaining impact/tradeoff if selected'),
|
||||
})).describe('Provide 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with "(Recommended)". Do not include an "Other" option for single-select questions.'),
|
||||
allowMultiple: z.boolean().optional().describe('If true, the user can select multiple options. No "None of the above" option is added.'),
|
||||
})).describe('Questions to show the user. Prefer 1 and do not exceed 3.'),
|
||||
},
|
||||
async (args: Record<string, unknown>) => {
|
||||
const { questions } = args as unknown as AskUserQuestionsParams;
|
||||
try {
|
||||
const validationError = validateAskUserQuestionsPayload(questions);
|
||||
if (validationError) return errorContent(validationError);
|
||||
|
||||
const elicitation = await server.server.elicitInput(buildAskUserQuestionsElicitRequest(questions));
|
||||
if (elicitation.action !== 'accept' || !elicitation.content) {
|
||||
return textContent('ask_user_questions was cancelled before receiving a response');
|
||||
}
|
||||
|
||||
return textContent(formatAskUserQuestionsElicitResult(questions, elicitation));
|
||||
} catch (err) {
|
||||
return errorContent(err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// secure_env_collect — collect secrets via MCP form elicitation
|
||||
// -----------------------------------------------------------------------
|
||||
server.tool(
|
||||
'secure_env_collect',
|
||||
'Collect environment variables securely via form input. Values are written directly to .env (or Vercel/Convex) and NEVER appear in tool output — only key names and applied/skipped status are returned. Use this instead of asking users to manually edit .env files or paste secrets into chat.',
|
||||
{
|
||||
projectDir: z.string().describe('Absolute path to the project directory'),
|
||||
keys: z.array(z.object({
|
||||
key: z.string().describe('Env var name, e.g. OPENAI_API_KEY'),
|
||||
hint: z.string().optional().describe('Format hint shown to user, e.g. "starts with sk-"'),
|
||||
guidance: z.array(z.string()).optional().describe('Step-by-step instructions for obtaining this key'),
|
||||
})).min(1).describe('Environment variables to collect'),
|
||||
destination: z.enum(['dotenv', 'vercel', 'convex']).optional().describe('Where to write secrets. Auto-detected from project files if omitted.'),
|
||||
envFilePath: z.string().optional().describe('Path to .env file (dotenv only). Defaults to .env in projectDir.'),
|
||||
environment: z.enum(['development', 'preview', 'production']).optional().describe('Target environment (vercel/convex only)'),
|
||||
},
|
||||
async (args: Record<string, unknown>) => {
|
||||
const { projectDir, keys, destination, envFilePath, environment } = args as {
|
||||
projectDir: string;
|
||||
keys: Array<{ key: string; hint?: string; guidance?: string[] }>;
|
||||
destination?: 'dotenv' | 'vercel' | 'convex';
|
||||
envFilePath?: string;
|
||||
environment?: 'development' | 'preview' | 'production';
|
||||
};
|
||||
|
||||
try {
|
||||
const resolvedProjectDir = resolve(projectDir);
|
||||
const resolvedEnvPath = resolve(resolvedProjectDir, envFilePath ?? '.env');
|
||||
|
||||
// (1) Check which keys already exist
|
||||
const allKeyNames = keys.map((k) => k.key);
|
||||
const existingKeys = await checkExistingEnvKeys(allKeyNames, resolvedEnvPath);
|
||||
const existingSet = new Set(existingKeys);
|
||||
const pendingKeys = keys.filter((k) => !existingSet.has(k.key));
|
||||
|
||||
// If all keys already exist, return immediately
|
||||
if (pendingKeys.length === 0) {
|
||||
const lines = existingKeys.map((k) => `• ${k}: already set`);
|
||||
return textContent(`All ${existingKeys.length} key(s) already set.\n${lines.join('\n')}`);
|
||||
}
|
||||
|
||||
// (2) Build elicitation form — one string field per pending key
|
||||
const properties: Record<string, Record<string, unknown>> = {};
|
||||
const required: string[] = [];
|
||||
|
||||
for (const item of pendingKeys) {
|
||||
const descParts: string[] = [];
|
||||
if (item.hint) descParts.push(`Format: ${item.hint}`);
|
||||
if (item.guidance && item.guidance.length > 0) {
|
||||
descParts.push('How to get this:');
|
||||
item.guidance.forEach((step, i) => descParts.push(`${i + 1}. ${step}`));
|
||||
}
|
||||
descParts.push('Leave empty to skip.');
|
||||
|
||||
properties[item.key] = {
|
||||
type: 'string',
|
||||
title: item.key,
|
||||
description: descParts.join('\n'),
|
||||
};
|
||||
// Don't mark as required — empty string = skip
|
||||
}
|
||||
|
||||
// (3) Elicit input from the MCP client
|
||||
const elicitation = await server.server.elicitInput({
|
||||
message: `Enter values for ${pendingKeys.length} environment variable(s). Values are written directly to the project and never shown to the AI.`,
|
||||
requestedSchema: {
|
||||
type: 'object',
|
||||
properties,
|
||||
required,
|
||||
},
|
||||
});
|
||||
|
||||
if (elicitation.action !== 'accept' || !elicitation.content) {
|
||||
return textContent('secure_env_collect was cancelled by user.');
|
||||
}
|
||||
|
||||
// (4) Separate provided vs skipped from form response
|
||||
const provided: Array<{ key: string; value: string }> = [];
|
||||
const skipped: string[] = [];
|
||||
|
||||
for (const item of pendingKeys) {
|
||||
const raw = elicitation.content[item.key];
|
||||
const value = typeof raw === 'string' ? raw.trim() : '';
|
||||
if (value.length > 0) {
|
||||
provided.push({ key: item.key, value });
|
||||
} else {
|
||||
skipped.push(item.key);
|
||||
}
|
||||
}
|
||||
|
||||
// (5) Auto-detect destination if not specified
|
||||
const resolvedDestination = destination ?? detectDestination(resolvedProjectDir);
|
||||
|
||||
// (6) Write secrets to destination
|
||||
const { applied, errors } = await applySecrets(provided, resolvedDestination, {
|
||||
envFilePath: resolvedEnvPath,
|
||||
environment,
|
||||
});
|
||||
|
||||
// (7) Build result — NEVER include secret values
|
||||
const lines: string[] = [
|
||||
`destination: ${resolvedDestination}${!destination ? ' (auto-detected)' : ''}${environment ? ` (${environment})` : ''}`,
|
||||
];
|
||||
for (const k of applied) lines.push(`✓ ${k}: applied`);
|
||||
for (const k of skipped) lines.push(`• ${k}: skipped`);
|
||||
for (const k of existingKeys) lines.push(`• ${k}: already set`);
|
||||
for (const e of errors) lines.push(`✗ ${e}`);
|
||||
|
||||
return errors.length > 0 && applied.length === 0
|
||||
? errorContent(lines.join('\n'))
|
||||
: textContent(lines.join('\n'));
|
||||
} catch (err) {
|
||||
return errorContent(err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// =======================================================================
|
||||
// READ-ONLY TOOLS — no session required, pure filesystem reads
|
||||
// =======================================================================
|
||||
|
|
|
|||
95
packages/mcp-server/src/tool-credentials.test.ts
Normal file
95
packages/mcp-server/src/tool-credentials.test.ts
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
import { describe, it } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
import { loadStoredCredentialEnvKeys, resolveAuthPath } from "./tool-credentials.js";
|
||||
|
||||
describe("tool credentials", () => {
|
||||
it("hydrates supported model and tool keys from auth.json", () => {
|
||||
const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-"));
|
||||
const authPath = join(tempRoot, "auth.json");
|
||||
const env: NodeJS.ProcessEnv = {};
|
||||
|
||||
try {
|
||||
writeFileSync(authPath, JSON.stringify({
|
||||
anthropic: { type: "api_key", key: "sk-ant-secret" },
|
||||
openai: { type: "api_key", key: "sk-openai-secret" },
|
||||
tavily: { type: "api_key", key: "tvly-secret" },
|
||||
context7: [{ type: "api_key", key: "ctx7-secret" }],
|
||||
}));
|
||||
|
||||
const loaded = loadStoredCredentialEnvKeys({ authPath, env });
|
||||
assert.deepEqual(loaded.sort(), [
|
||||
"ANTHROPIC_API_KEY",
|
||||
"CONTEXT7_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"TAVILY_API_KEY",
|
||||
]);
|
||||
assert.equal(env.ANTHROPIC_API_KEY, "sk-ant-secret");
|
||||
assert.equal(env.OPENAI_API_KEY, "sk-openai-secret");
|
||||
assert.equal(env.TAVILY_API_KEY, "tvly-secret");
|
||||
assert.equal(env.CONTEXT7_API_KEY, "ctx7-secret");
|
||||
} finally {
|
||||
rmSync(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("does not overwrite explicit environment variables", () => {
|
||||
const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-"));
|
||||
const authPath = join(tempRoot, "auth.json");
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
BRAVE_API_KEY: "already-set",
|
||||
};
|
||||
|
||||
try {
|
||||
writeFileSync(authPath, JSON.stringify({
|
||||
brave: { type: "api_key", key: "from-auth-json" },
|
||||
anthropic: { type: "api_key", key: "sk-ant-from-auth-json" },
|
||||
}));
|
||||
|
||||
const loaded = loadStoredCredentialEnvKeys({ authPath, env });
|
||||
assert.deepEqual(loaded, ["ANTHROPIC_API_KEY"]);
|
||||
assert.equal(env.BRAVE_API_KEY, "already-set");
|
||||
assert.equal(env.ANTHROPIC_API_KEY, "sk-ant-from-auth-json");
|
||||
} finally {
|
||||
rmSync(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("ignores oauth credentials because they are resolved through auth storage, not env hydration", () => {
|
||||
const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-"));
|
||||
const authPath = join(tempRoot, "auth.json");
|
||||
const env: NodeJS.ProcessEnv = {};
|
||||
|
||||
try {
|
||||
writeFileSync(authPath, JSON.stringify({
|
||||
openai: { type: "oauth", access: "oauth-access-token" },
|
||||
"google-gemini-cli": { type: "oauth", token: "ya29.oauth-token" },
|
||||
}));
|
||||
|
||||
const loaded = loadStoredCredentialEnvKeys({ authPath, env });
|
||||
assert.deepEqual(loaded, []);
|
||||
assert.equal(env.OPENAI_API_KEY, undefined);
|
||||
assert.equal(env.GEMINI_API_KEY, undefined);
|
||||
} finally {
|
||||
rmSync(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("resolves auth.json from GSD_CODING_AGENT_DIR", () => {
|
||||
const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-agent-dir-"));
|
||||
const agentDir = join(tempRoot, "agent");
|
||||
mkdirSync(agentDir, { recursive: true });
|
||||
|
||||
try {
|
||||
assert.equal(
|
||||
resolveAuthPath({ GSD_CODING_AGENT_DIR: agentDir }),
|
||||
join(agentDir, "auth.json"),
|
||||
);
|
||||
} finally {
|
||||
rmSync(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
97
packages/mcp-server/src/tool-credentials.ts
Normal file
97
packages/mcp-server/src/tool-credentials.ts
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
type AuthCredential =
|
||||
| { type?: unknown; key?: unknown }
|
||||
| Array<{ type?: unknown; key?: unknown }>;
|
||||
|
||||
type AuthStorageData = Record<string, AuthCredential>;
|
||||
|
||||
const AUTH_ENV_KEYS = [
|
||||
["anthropic", "ANTHROPIC_API_KEY"],
|
||||
["openai", "OPENAI_API_KEY"],
|
||||
["github-copilot", "GITHUB_TOKEN"],
|
||||
["google", "GEMINI_API_KEY"],
|
||||
["groq", "GROQ_API_KEY"],
|
||||
["xai", "XAI_API_KEY"],
|
||||
["openrouter", "OPENROUTER_API_KEY"],
|
||||
["mistral", "MISTRAL_API_KEY"],
|
||||
["ollama-cloud", "OLLAMA_API_KEY"],
|
||||
["custom-openai", "CUSTOM_OPENAI_API_KEY"],
|
||||
["cerebras", "CEREBRAS_API_KEY"],
|
||||
["azure-openai-responses", "AZURE_OPENAI_API_KEY"],
|
||||
["vercel-ai-gateway", "AI_GATEWAY_API_KEY"],
|
||||
["zai", "ZAI_API_KEY"],
|
||||
["minimax", "MINIMAX_API_KEY"],
|
||||
["minimax-cn", "MINIMAX_CN_API_KEY"],
|
||||
["huggingface", "HF_TOKEN"],
|
||||
["opencode", "OPENCODE_API_KEY"],
|
||||
["opencode-go", "OPENCODE_API_KEY"],
|
||||
["kimi-coding", "KIMI_API_KEY"],
|
||||
["alibaba-coding-plan", "ALIBABA_API_KEY"],
|
||||
["brave", "BRAVE_API_KEY"],
|
||||
["brave_answers", "BRAVE_ANSWERS_KEY"],
|
||||
["context7", "CONTEXT7_API_KEY"],
|
||||
["jina", "JINA_API_KEY"],
|
||||
["tavily", "TAVILY_API_KEY"],
|
||||
["slack_bot", "SLACK_BOT_TOKEN"],
|
||||
["discord_bot", "DISCORD_BOT_TOKEN"],
|
||||
["telegram_bot", "TELEGRAM_BOT_TOKEN"],
|
||||
] as const;
|
||||
|
||||
function expandHome(pathValue: string): string {
|
||||
if (pathValue === "~") return homedir();
|
||||
if (pathValue.startsWith("~/")) return join(homedir(), pathValue.slice(2));
|
||||
return pathValue;
|
||||
}
|
||||
|
||||
function getStoredApiKey(data: AuthStorageData, providerId: string): string | undefined {
|
||||
const raw = data[providerId];
|
||||
const credentials = Array.isArray(raw) ? raw : raw ? [raw] : [];
|
||||
|
||||
for (const credential of credentials) {
|
||||
if (credential?.type !== "api_key") continue;
|
||||
if (typeof credential.key !== "string") continue;
|
||||
if (credential.key.trim().length === 0) continue;
|
||||
return credential.key;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveAuthPath(env: NodeJS.ProcessEnv = process.env): string {
|
||||
const agentDir = env.GSD_CODING_AGENT_DIR?.trim();
|
||||
if (agentDir) return join(expandHome(agentDir), "auth.json");
|
||||
return join(homedir(), ".gsd", "agent", "auth.json");
|
||||
}
|
||||
|
||||
export function loadStoredCredentialEnvKeys(options: {
|
||||
env?: NodeJS.ProcessEnv;
|
||||
authPath?: string;
|
||||
} = {}): string[] {
|
||||
const env = options.env ?? process.env;
|
||||
const authPath = options.authPath ?? resolveAuthPath(env);
|
||||
if (!existsSync(authPath)) return [];
|
||||
|
||||
let parsed: AuthStorageData;
|
||||
try {
|
||||
const raw = readFileSync(authPath, "utf-8");
|
||||
const data = JSON.parse(raw) as unknown;
|
||||
if (!data || typeof data !== "object" || Array.isArray(data)) return [];
|
||||
parsed = data as AuthStorageData;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
const loaded: string[] = [];
|
||||
for (const [providerId, envVar] of AUTH_ENV_KEYS) {
|
||||
if (env[envVar]) continue;
|
||||
const key = getStoredApiKey(parsed, providerId);
|
||||
if (!key) continue;
|
||||
env[envVar] = key;
|
||||
loaded.push(envVar);
|
||||
}
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@ import { tmpdir } from "node:os";
|
|||
import { randomUUID } from "node:crypto";
|
||||
|
||||
import { _getAdapter, closeDatabase } from "../../../src/resources/extensions/gsd/gsd-db.ts";
|
||||
import { registerWorkflowTools } from "./workflow-tools.ts";
|
||||
import { registerWorkflowTools, WORKFLOW_TOOL_NAMES } from "./workflow-tools.ts";
|
||||
|
||||
function makeTmpBase(): string {
|
||||
const base = join(tmpdir(), `gsd-mcp-workflow-${randomUUID()}`);
|
||||
|
|
@ -68,33 +68,12 @@ function makeMockServer() {
|
|||
}
|
||||
|
||||
describe("workflow MCP tools", () => {
|
||||
it("registers the seventeen workflow tools", () => {
|
||||
it("registers the full headless-safe workflow tool surface", () => {
|
||||
const server = makeMockServer();
|
||||
registerWorkflowTools(server as any);
|
||||
|
||||
assert.equal(server.tools.length, 17);
|
||||
assert.deepEqual(
|
||||
server.tools.map((t) => t.name),
|
||||
[
|
||||
"gsd_plan_milestone",
|
||||
"gsd_plan_slice",
|
||||
"gsd_replan_slice",
|
||||
"gsd_slice_replan",
|
||||
"gsd_slice_complete",
|
||||
"gsd_complete_slice",
|
||||
"gsd_complete_milestone",
|
||||
"gsd_milestone_complete",
|
||||
"gsd_validate_milestone",
|
||||
"gsd_milestone_validate",
|
||||
"gsd_reassess_roadmap",
|
||||
"gsd_roadmap_reassess",
|
||||
"gsd_save_gate_result",
|
||||
"gsd_summary_save",
|
||||
"gsd_task_complete",
|
||||
"gsd_complete_task",
|
||||
"gsd_milestone_status",
|
||||
],
|
||||
);
|
||||
assert.equal(server.tools.length, WORKFLOW_TOOL_NAMES.length);
|
||||
assert.deepEqual(server.tools.map((t) => t.name), [...WORKFLOW_TOOL_NAMES]);
|
||||
});
|
||||
|
||||
it("gsd_summary_save writes artifact through the shared executor", async () => {
|
||||
|
|
@ -405,6 +384,116 @@ describe("workflow MCP tools", () => {
|
|||
}
|
||||
});
|
||||
|
||||
it("gsd_requirement_save opens the DB before inline requirement writes", async () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
const server = makeMockServer();
|
||||
registerWorkflowTools(server as any);
|
||||
const requirementTool = server.tools.find((t) => t.name === "gsd_requirement_save");
|
||||
assert.ok(requirementTool, "requirement tool should be registered");
|
||||
|
||||
closeDatabase();
|
||||
|
||||
const result = await requirementTool!.handler({
|
||||
projectDir: base,
|
||||
class: "operability",
|
||||
description: "Inline MCP requirement save regression",
|
||||
why: "Reproduce missing ensureDbOpen in workflow-tools",
|
||||
source: "user",
|
||||
status: "active",
|
||||
primary_owner: "M010/S10",
|
||||
validation: "n/a",
|
||||
});
|
||||
|
||||
assert.match((result as any).content[0].text as string, /Saved requirement R\d+/);
|
||||
assert.ok(existsSync(join(base, ".gsd", "REQUIREMENTS.md")), "REQUIREMENTS.md should be written to disk");
|
||||
const row = _getAdapter()!
|
||||
.prepare("SELECT id, class, description FROM requirements WHERE description = ?")
|
||||
.get("Inline MCP requirement save regression") as Record<string, unknown> | undefined;
|
||||
assert.ok(row, "requirement should be written to the database");
|
||||
assert.equal(row["class"], "operability");
|
||||
} finally {
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
it("gsd_plan_task reopens the DB before inline task planning writes", async () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
const server = makeMockServer();
|
||||
registerWorkflowTools(server as any);
|
||||
const milestoneTool = server.tools.find((t) => t.name === "gsd_plan_milestone");
|
||||
const sliceTool = server.tools.find((t) => t.name === "gsd_plan_slice");
|
||||
const taskTool = server.tools.find((t) => t.name === "gsd_plan_task");
|
||||
assert.ok(milestoneTool, "milestone planning tool should be registered");
|
||||
assert.ok(sliceTool, "slice planning tool should be registered");
|
||||
assert.ok(taskTool, "task planning tool should be registered");
|
||||
|
||||
await milestoneTool!.handler({
|
||||
projectDir: base,
|
||||
milestoneId: "M010",
|
||||
title: "Inline task planning DB reopen",
|
||||
vision: "Seed a slice, close the DB, then plan another task inline.",
|
||||
slices: [
|
||||
{
|
||||
sliceId: "S10",
|
||||
title: "Inline task planning",
|
||||
risk: "medium",
|
||||
depends: [],
|
||||
demo: "Inline gsd_plan_task reopens the DB after it was closed.",
|
||||
goal: "Preserve MCP task planning after the DB adapter is closed.",
|
||||
successCriteria: "The second task plan persists after a closed DB is reopened.",
|
||||
proofLevel: "integration",
|
||||
integrationClosure: "The inline MCP handler reopens the DB before planning.",
|
||||
observabilityImpact: "workflow-tools MCP tests cover the inline reopen path.",
|
||||
},
|
||||
],
|
||||
});
|
||||
await sliceTool!.handler({
|
||||
projectDir: base,
|
||||
milestoneId: "M010",
|
||||
sliceId: "S10",
|
||||
goal: "Create the initial slice plan before closing the DB.",
|
||||
tasks: [
|
||||
{
|
||||
taskId: "T10",
|
||||
title: "Seed existing task",
|
||||
description: "Create the initial task plan before closing the DB.",
|
||||
estimate: "5m",
|
||||
files: ["packages/mcp-server/src/workflow-tools.ts"],
|
||||
verify: "node --test",
|
||||
inputs: ["M010-ROADMAP.md"],
|
||||
expectedOutput: ["T10-PLAN.md"],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
closeDatabase();
|
||||
|
||||
const result = await taskTool!.handler({
|
||||
projectDir: base,
|
||||
milestoneId: "M010",
|
||||
sliceId: "S10",
|
||||
taskId: "T11",
|
||||
title: "Reopen and plan",
|
||||
description: "Exercise the inline plan-task path after the DB was closed.",
|
||||
estimate: "5m",
|
||||
files: ["packages/mcp-server/src/workflow-tools.ts"],
|
||||
verify: "node --test",
|
||||
inputs: ["M010-ROADMAP.md", "S10-PLAN.md"],
|
||||
expectedOutput: ["T11-PLAN.md"],
|
||||
});
|
||||
|
||||
assert.match((result as any).content[0].text as string, /Planned task T11/);
|
||||
assert.ok(
|
||||
existsSync(join(base, ".gsd", "milestones", "M010", "slices", "S10", "tasks", "T11-PLAN.md")),
|
||||
"T11 plan should be written after reopening the DB",
|
||||
);
|
||||
} finally {
|
||||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
it("gsd_replan_slice and gsd_slice_replan work end-to-end", async () => {
|
||||
const base = makeTmpBase();
|
||||
try {
|
||||
|
|
@ -974,3 +1063,31 @@ describe("workflow MCP tools", () => {
|
|||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("URL scheme regex — Windows drive letter safety", () => {
|
||||
// This is the regex used in getWriteGateModuleCandidates() and
|
||||
// getWorkflowExecutorModuleCandidates() to reject non-file URL schemes.
|
||||
// It must NOT match single-letter Windows drive prefixes (C:, D:, etc.).
|
||||
const urlSchemeRegex = /^[a-z]{2,}:/i;
|
||||
|
||||
it("rejects multi-letter URL schemes", () => {
|
||||
assert.ok(urlSchemeRegex.test("http://example.com"), "http: should match");
|
||||
assert.ok(urlSchemeRegex.test("https://example.com"), "https: should match");
|
||||
assert.ok(urlSchemeRegex.test("ftp://files.example.com"), "ftp: should match");
|
||||
assert.ok(urlSchemeRegex.test("file:///C:/Users"), "file: should match");
|
||||
assert.ok(urlSchemeRegex.test("node:fs"), "node: should match");
|
||||
});
|
||||
|
||||
it("allows single-letter Windows drive prefixes", () => {
|
||||
assert.ok(!urlSchemeRegex.test("C:\\Users\\user\\project"), "C:\\ should not match");
|
||||
assert.ok(!urlSchemeRegex.test("D:\\other\\path"), "D:\\ should not match");
|
||||
assert.ok(!urlSchemeRegex.test("c:\\lowercase\\drive"), "c:\\ should not match");
|
||||
assert.ok(!urlSchemeRegex.test("E:/forward/slash/path"), "E:/ should not match");
|
||||
});
|
||||
|
||||
it("allows bare filesystem paths", () => {
|
||||
assert.ok(!urlSchemeRegex.test("/usr/local/lib/module.js"), "unix absolute path should not match");
|
||||
assert.ok(!urlSchemeRegex.test("./relative/path.js"), "relative path should not match");
|
||||
assert.ok(!urlSchemeRegex.test("../parent/path.js"), "parent relative path should not match");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -244,6 +244,10 @@ type WorkflowWriteGateModule = {
|
|||
) => { block: boolean; reason?: string };
|
||||
};
|
||||
|
||||
type WorkflowDbBootstrapModule = {
|
||||
ensureDbOpen: (basePath?: string) => Promise<boolean>;
|
||||
};
|
||||
|
||||
let workflowToolExecutorsPromise: Promise<WorkflowToolExecutors> | null = null;
|
||||
let workflowExecutionQueue: Promise<void> = Promise.resolve();
|
||||
let workflowWriteGatePromise: Promise<WorkflowWriteGateModule> | null = null;
|
||||
|
|
@ -318,7 +322,7 @@ function getWriteGateModuleCandidates(): string[] {
|
|||
const candidates: string[] = [];
|
||||
const explicitModule = process.env.GSD_WORKFLOW_WRITE_GATE_MODULE?.trim();
|
||||
if (explicitModule) {
|
||||
if (/^[a-z]+:/i.test(explicitModule) && !explicitModule.startsWith("file:")) {
|
||||
if (/^[a-z]{2,}:/i.test(explicitModule) && !explicitModule.startsWith("file:")) {
|
||||
throw new Error("GSD_WORKFLOW_WRITE_GATE_MODULE only supports file: URLs or filesystem paths.");
|
||||
}
|
||||
candidates.push(explicitModule.startsWith("file:") ? explicitModule : toFileUrl(explicitModule));
|
||||
|
|
@ -326,6 +330,7 @@ function getWriteGateModuleCandidates(): string[] {
|
|||
|
||||
candidates.push(
|
||||
new URL("../../../src/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url).href,
|
||||
new URL("../../../dist/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url).href,
|
||||
new URL("../../../src/resources/extensions/gsd/bootstrap/write-gate.ts", import.meta.url).href,
|
||||
);
|
||||
|
||||
|
|
@ -336,11 +341,46 @@ function toFileUrl(modulePath: string): string {
|
|||
return pathToFileURL(resolve(modulePath)).href;
|
||||
}
|
||||
|
||||
/** @internal — exported for testing only */
|
||||
export function _buildImportCandidates(relativePath: string): string[] {
|
||||
// Build candidate paths: try the given path first, then swap src/<->dist/
|
||||
// and try .ts extension. This handles both dev (tsx from src/) and prod
|
||||
// (compiled from dist/) execution contexts.
|
||||
const candidates: string[] = [relativePath];
|
||||
const swapped = relativePath.includes("/src/")
|
||||
? relativePath.replace("/src/", "/dist/")
|
||||
: relativePath.includes("/dist/")
|
||||
? relativePath.replace("/dist/", "/src/")
|
||||
: null;
|
||||
if (swapped) candidates.push(swapped);
|
||||
// Also try .ts variants for dev-mode tsx execution
|
||||
if (relativePath.endsWith(".js")) {
|
||||
candidates.push(relativePath.replace(/\.js$/, ".ts"));
|
||||
if (swapped) candidates.push(swapped.replace(/\.js$/, ".ts"));
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
async function importLocalModule<T>(relativePath: string): Promise<T> {
|
||||
const candidates = _buildImportCandidates(relativePath)
|
||||
.map((p) => new URL(p, import.meta.url).href);
|
||||
|
||||
let lastErr: unknown;
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
return await import(candidate) as T;
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
}
|
||||
}
|
||||
throw lastErr;
|
||||
}
|
||||
|
||||
function getWorkflowExecutorModuleCandidates(env: NodeJS.ProcessEnv = process.env): string[] {
|
||||
const candidates: string[] = [];
|
||||
const explicitModule = env.GSD_WORKFLOW_EXECUTORS_MODULE?.trim();
|
||||
if (explicitModule) {
|
||||
if (/^[a-z]+:/i.test(explicitModule) && !explicitModule.startsWith("file:")) {
|
||||
if (/^[a-z]{2,}:/i.test(explicitModule) && !explicitModule.startsWith("file:")) {
|
||||
throw new Error("GSD_WORKFLOW_EXECUTORS_MODULE only supports file: URLs or filesystem paths.");
|
||||
}
|
||||
candidates.push(explicitModule.startsWith("file:") ? explicitModule : toFileUrl(explicitModule));
|
||||
|
|
@ -348,6 +388,7 @@ function getWorkflowExecutorModuleCandidates(env: NodeJS.ProcessEnv = process.en
|
|||
|
||||
candidates.push(
|
||||
new URL("../../../src/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url).href,
|
||||
new URL("../../../dist/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url).href,
|
||||
new URL("../../../src/resources/extensions/gsd/tools/workflow-tool-executors.ts", import.meta.url).href,
|
||||
);
|
||||
|
||||
|
|
@ -420,6 +461,38 @@ interface McpToolServer {
|
|||
): unknown;
|
||||
}
|
||||
|
||||
export const WORKFLOW_TOOL_NAMES = [
|
||||
"gsd_decision_save",
|
||||
"gsd_save_decision",
|
||||
"gsd_requirement_update",
|
||||
"gsd_update_requirement",
|
||||
"gsd_requirement_save",
|
||||
"gsd_save_requirement",
|
||||
"gsd_milestone_generate_id",
|
||||
"gsd_generate_milestone_id",
|
||||
"gsd_plan_milestone",
|
||||
"gsd_plan_slice",
|
||||
"gsd_plan_task",
|
||||
"gsd_task_plan",
|
||||
"gsd_replan_slice",
|
||||
"gsd_slice_replan",
|
||||
"gsd_slice_complete",
|
||||
"gsd_complete_slice",
|
||||
"gsd_skip_slice",
|
||||
"gsd_complete_milestone",
|
||||
"gsd_milestone_complete",
|
||||
"gsd_validate_milestone",
|
||||
"gsd_milestone_validate",
|
||||
"gsd_reassess_roadmap",
|
||||
"gsd_roadmap_reassess",
|
||||
"gsd_save_gate_result",
|
||||
"gsd_summary_save",
|
||||
"gsd_task_complete",
|
||||
"gsd_complete_task",
|
||||
"gsd_milestone_status",
|
||||
"gsd_journal_query",
|
||||
] as const;
|
||||
|
||||
async function runSerializedWorkflowOperation<T>(fn: () => Promise<T>): Promise<T> {
|
||||
// The shared DB adapter and workflow log base path are process-global, so
|
||||
// workflow MCP mutations must not overlap within a single server process.
|
||||
|
|
@ -437,6 +510,22 @@ async function runSerializedWorkflowOperation<T>(fn: () => Promise<T>): Promise<
|
|||
}
|
||||
}
|
||||
|
||||
async function runSerializedWorkflowDbOperation<T>(
|
||||
projectDir: string,
|
||||
fn: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
return runSerializedWorkflowOperation(async () => {
|
||||
const { ensureDbOpen } = await importLocalModule<WorkflowDbBootstrapModule>(
|
||||
"../../../src/resources/extensions/gsd/bootstrap/dynamic-tools.js",
|
||||
);
|
||||
const dbAvailable = await ensureDbOpen(projectDir);
|
||||
if (!dbAvailable) {
|
||||
throw new Error("GSD database is not available");
|
||||
}
|
||||
return fn();
|
||||
});
|
||||
}
|
||||
|
||||
async function enforceWorkflowWriteGate(
|
||||
toolName: string,
|
||||
projectDir: string,
|
||||
|
|
@ -566,6 +655,15 @@ async function handleSaveGateResult(
|
|||
return runSerializedWorkflowOperation(() => executeSaveGateResult(params, projectDir));
|
||||
}
|
||||
|
||||
async function ensureMilestoneDbRow(milestoneId: string): Promise<void> {
|
||||
try {
|
||||
const { insertMilestone } = await importLocalModule<any>("../../../src/resources/extensions/gsd/gsd-db.js");
|
||||
insertMilestone({ id: milestoneId, status: "queued" });
|
||||
} catch {
|
||||
// Ignore pre-existing rows or transient DB availability issues.
|
||||
}
|
||||
}
|
||||
|
||||
const projectDirParam = z.string().describe("Absolute path to the project directory within the configured workflow root");
|
||||
|
||||
const planMilestoneParams = {
|
||||
|
|
@ -772,6 +870,73 @@ const summarySaveParams = {
|
|||
};
|
||||
const summarySaveSchema = z.object(summarySaveParams);
|
||||
|
||||
const decisionSaveParams = {
|
||||
projectDir: projectDirParam,
|
||||
scope: z.string().describe("Scope of the decision (e.g. architecture, library, observability)"),
|
||||
decision: z.string().describe("What is being decided"),
|
||||
choice: z.string().describe("The choice made"),
|
||||
rationale: z.string().describe("Why this choice was made"),
|
||||
revisable: z.string().optional().describe("Whether this can be revisited"),
|
||||
when_context: z.string().optional().describe("When/context for the decision"),
|
||||
made_by: z.enum(["human", "agent", "collaborative"]).optional().describe("Who made the decision"),
|
||||
};
|
||||
const decisionSaveSchema = z.object(decisionSaveParams);
|
||||
|
||||
const requirementUpdateParams = {
|
||||
projectDir: projectDirParam,
|
||||
id: z.string().describe("Requirement ID (e.g. R001)"),
|
||||
status: z.string().optional().describe("New status"),
|
||||
validation: z.string().optional().describe("Validation criteria or proof"),
|
||||
notes: z.string().optional().describe("Additional notes"),
|
||||
description: z.string().optional().describe("Updated description"),
|
||||
primary_owner: z.string().optional().describe("Primary owning slice"),
|
||||
supporting_slices: z.string().optional().describe("Supporting slices"),
|
||||
};
|
||||
const requirementUpdateSchema = z.object(requirementUpdateParams);
|
||||
|
||||
const requirementSaveParams = {
|
||||
projectDir: projectDirParam,
|
||||
class: z.string().describe("Requirement class"),
|
||||
description: z.string().describe("Short description of the requirement"),
|
||||
why: z.string().describe("Why this requirement matters"),
|
||||
source: z.string().describe("Origin of the requirement"),
|
||||
status: z.string().optional().describe("Requirement status"),
|
||||
primary_owner: z.string().optional().describe("Primary owning slice"),
|
||||
supporting_slices: z.string().optional().describe("Supporting slices"),
|
||||
validation: z.string().optional().describe("Validation criteria"),
|
||||
notes: z.string().optional().describe("Additional notes"),
|
||||
};
|
||||
const requirementSaveSchema = z.object(requirementSaveParams);
|
||||
|
||||
const milestoneGenerateIdParams = {
|
||||
projectDir: projectDirParam,
|
||||
};
|
||||
const milestoneGenerateIdSchema = z.object(milestoneGenerateIdParams);
|
||||
|
||||
const planTaskParams = {
|
||||
projectDir: projectDirParam,
|
||||
milestoneId: z.string().describe("Milestone ID (e.g. M001)"),
|
||||
sliceId: z.string().describe("Slice ID (e.g. S01)"),
|
||||
taskId: z.string().describe("Task ID (e.g. T01)"),
|
||||
title: z.string().describe("Task title"),
|
||||
description: z.string().describe("Task description / steps block"),
|
||||
estimate: z.string().describe("Task estimate"),
|
||||
files: z.array(z.string()).describe("Files likely touched"),
|
||||
verify: z.string().describe("Verification command or block"),
|
||||
inputs: z.array(z.string()).describe("Input files or references"),
|
||||
expectedOutput: z.array(z.string()).describe("Expected output files or artifacts"),
|
||||
observabilityImpact: z.string().optional().describe("Task observability impact"),
|
||||
};
|
||||
const planTaskSchema = z.object(planTaskParams);
|
||||
|
||||
const skipSliceParams = {
|
||||
projectDir: projectDirParam,
|
||||
sliceId: z.string().describe("Slice ID (e.g. S02)"),
|
||||
milestoneId: z.string().describe("Milestone ID (e.g. M003)"),
|
||||
reason: z.string().optional().describe("Reason for skipping this slice"),
|
||||
};
|
||||
const skipSliceSchema = z.object(skipSliceParams);
|
||||
|
||||
const taskCompleteParams = {
|
||||
projectDir: projectDirParam,
|
||||
taskId: z.string().describe("Task ID (e.g. T01)"),
|
||||
|
|
@ -803,7 +968,171 @@ const milestoneStatusParams = {
|
|||
};
|
||||
const milestoneStatusSchema = z.object(milestoneStatusParams);
|
||||
|
||||
const journalQueryParams = {
|
||||
projectDir: projectDirParam,
|
||||
flowId: z.string().optional().describe("Filter by flow ID"),
|
||||
unitId: z.string().optional().describe("Filter by unit ID"),
|
||||
rule: z.string().optional().describe("Filter by rule name"),
|
||||
eventType: z.string().optional().describe("Filter by event type"),
|
||||
after: z.string().optional().describe("ISO-8601 lower bound (inclusive)"),
|
||||
before: z.string().optional().describe("ISO-8601 upper bound (inclusive)"),
|
||||
limit: z.number().optional().describe("Maximum entries to return"),
|
||||
};
|
||||
const journalQuerySchema = z.object(journalQueryParams);
|
||||
|
||||
export function registerWorkflowTools(server: McpToolServer): void {
|
||||
server.tool(
|
||||
"gsd_decision_save",
|
||||
"Record a project decision to the GSD database and regenerate DECISIONS.md.",
|
||||
decisionSaveParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(decisionSaveSchema, args);
|
||||
const { projectDir, ...params } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_decision_save", projectDir);
|
||||
const result = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { saveDecisionToDb } = await importLocalModule<any>("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
return saveDecisionToDb(params, projectDir);
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: `Saved decision ${result.id}` }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_save_decision",
|
||||
"Alias for gsd_decision_save. Record a project decision to the GSD database and regenerate DECISIONS.md.",
|
||||
decisionSaveParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(decisionSaveSchema, args);
|
||||
const { projectDir, ...params } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_decision_save", projectDir);
|
||||
const result = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { saveDecisionToDb } = await importLocalModule<any>("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
return saveDecisionToDb(params, projectDir);
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: `Saved decision ${result.id}` }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_requirement_update",
|
||||
"Update an existing requirement in the GSD database and regenerate REQUIREMENTS.md.",
|
||||
requirementUpdateParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(requirementUpdateSchema, args);
|
||||
const { projectDir, id, ...updates } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_requirement_update", projectDir);
|
||||
await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { updateRequirementInDb } = await importLocalModule<any>("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
return updateRequirementInDb(id, updates, projectDir);
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: `Updated requirement ${id}` }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_update_requirement",
|
||||
"Alias for gsd_requirement_update. Update an existing requirement in the GSD database and regenerate REQUIREMENTS.md.",
|
||||
requirementUpdateParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(requirementUpdateSchema, args);
|
||||
const { projectDir, id, ...updates } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_requirement_update", projectDir);
|
||||
await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { updateRequirementInDb } = await importLocalModule<any>("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
return updateRequirementInDb(id, updates, projectDir);
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: `Updated requirement ${id}` }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_requirement_save",
|
||||
"Record a new requirement to the GSD database and regenerate REQUIREMENTS.md.",
|
||||
requirementSaveParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(requirementSaveSchema, args);
|
||||
const { projectDir, ...params } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_requirement_save", projectDir);
|
||||
const result = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { saveRequirementToDb } = await importLocalModule<any>("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
return saveRequirementToDb(params, projectDir);
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_save_requirement",
|
||||
"Alias for gsd_requirement_save. Record a new requirement to the GSD database and regenerate REQUIREMENTS.md.",
|
||||
requirementSaveParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(requirementSaveSchema, args);
|
||||
const { projectDir, ...params } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_requirement_save", projectDir);
|
||||
const result = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { saveRequirementToDb } = await importLocalModule<any>("../../../src/resources/extensions/gsd/db-writer.js");
|
||||
return saveRequirementToDb(params, projectDir);
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_milestone_generate_id",
|
||||
"Generate the next milestone ID for a new GSD milestone.",
|
||||
milestoneGenerateIdParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const { projectDir } = parseWorkflowArgs(milestoneGenerateIdSchema, args);
|
||||
await enforceWorkflowWriteGate("gsd_milestone_generate_id", projectDir);
|
||||
const id = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const {
|
||||
claimReservedId,
|
||||
findMilestoneIds,
|
||||
getReservedMilestoneIds,
|
||||
nextMilestoneId,
|
||||
} = await importLocalModule<any>("../../../src/resources/extensions/gsd/milestone-ids.js");
|
||||
const reserved = claimReservedId();
|
||||
if (reserved) {
|
||||
await ensureMilestoneDbRow(reserved);
|
||||
return reserved;
|
||||
}
|
||||
const allIds = [...new Set([...findMilestoneIds(projectDir), ...getReservedMilestoneIds()])];
|
||||
const nextId = nextMilestoneId(allIds);
|
||||
await ensureMilestoneDbRow(nextId);
|
||||
return nextId;
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: id }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_generate_milestone_id",
|
||||
"Alias for gsd_milestone_generate_id. Generate the next milestone ID for a new GSD milestone.",
|
||||
milestoneGenerateIdParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const { projectDir } = parseWorkflowArgs(milestoneGenerateIdSchema, args);
|
||||
await enforceWorkflowWriteGate("gsd_milestone_generate_id", projectDir);
|
||||
const id = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const {
|
||||
claimReservedId,
|
||||
findMilestoneIds,
|
||||
getReservedMilestoneIds,
|
||||
nextMilestoneId,
|
||||
} = await importLocalModule<any>("../../../src/resources/extensions/gsd/milestone-ids.js");
|
||||
const reserved = claimReservedId();
|
||||
if (reserved) {
|
||||
await ensureMilestoneDbRow(reserved);
|
||||
return reserved;
|
||||
}
|
||||
const allIds = [...new Set([...findMilestoneIds(projectDir), ...getReservedMilestoneIds()])];
|
||||
const nextId = nextMilestoneId(allIds);
|
||||
await ensureMilestoneDbRow(nextId);
|
||||
return nextId;
|
||||
});
|
||||
return { content: [{ type: "text" as const, text: id }] };
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_plan_milestone",
|
||||
"Write milestone planning state to the GSD database and render ROADMAP.md from DB.",
|
||||
|
|
@ -830,6 +1159,48 @@ export function registerWorkflowTools(server: McpToolServer): void {
|
|||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_plan_task",
|
||||
"Write task planning state to the GSD database and render tasks/T##-PLAN.md from DB.",
|
||||
planTaskParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(planTaskSchema, args);
|
||||
const { projectDir, ...params } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_plan_task", projectDir, params.milestoneId);
|
||||
const result = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { handlePlanTask } = await importLocalModule<any>("../../../src/resources/extensions/gsd/tools/plan-task.js");
|
||||
return handlePlanTask(params, projectDir);
|
||||
});
|
||||
if ("error" in result) {
|
||||
throw new Error(result.error);
|
||||
}
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Planned task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_task_plan",
|
||||
"Alias for gsd_plan_task. Write task planning state to the GSD database and render tasks/T##-PLAN.md from DB.",
|
||||
planTaskParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const parsed = parseWorkflowArgs(planTaskSchema, args);
|
||||
const { projectDir, ...params } = parsed;
|
||||
await enforceWorkflowWriteGate("gsd_plan_task", projectDir, params.milestoneId);
|
||||
const result = await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { handlePlanTask } = await importLocalModule<any>("../../../src/resources/extensions/gsd/tools/plan-task.js");
|
||||
return handlePlanTask(params, projectDir);
|
||||
});
|
||||
if ("error" in result) {
|
||||
throw new Error(result.error);
|
||||
}
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Planned task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_replan_slice",
|
||||
"Replan a slice after a blocker is discovered, preserving completed tasks and re-rendering PLAN.md + REPLAN.md.",
|
||||
|
|
@ -870,6 +1241,36 @@ export function registerWorkflowTools(server: McpToolServer): void {
|
|||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_skip_slice",
|
||||
"Mark a slice as skipped so auto-mode advances past it without executing.",
|
||||
skipSliceParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const { projectDir, milestoneId, sliceId, reason } = parseWorkflowArgs(skipSliceSchema, args);
|
||||
await enforceWorkflowWriteGate("gsd_skip_slice", projectDir, milestoneId);
|
||||
await runSerializedWorkflowDbOperation(projectDir, async () => {
|
||||
const { getSlice, updateSliceStatus } = await importLocalModule<any>("../../../src/resources/extensions/gsd/gsd-db.js");
|
||||
const { invalidateStateCache } = await importLocalModule<any>("../../../src/resources/extensions/gsd/state.js");
|
||||
const { rebuildState } = await importLocalModule<any>("../../../src/resources/extensions/gsd/doctor.js");
|
||||
const slice = getSlice(milestoneId, sliceId);
|
||||
if (!slice) {
|
||||
throw new Error(`Slice ${sliceId} not found in milestone ${milestoneId}`);
|
||||
}
|
||||
if (slice.status === "complete" || slice.status === "done") {
|
||||
throw new Error(`Slice ${sliceId} is already complete and cannot be skipped`);
|
||||
}
|
||||
if (slice.status !== "skipped") {
|
||||
updateSliceStatus(milestoneId, sliceId, "skipped");
|
||||
invalidateStateCache();
|
||||
await rebuildState(projectDir);
|
||||
}
|
||||
});
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Skipped slice ${sliceId} (${milestoneId}). Reason: ${reason ?? "User-directed skip"}.` }],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_complete_milestone",
|
||||
"Record a completed milestone to the GSD database and render its SUMMARY.md.",
|
||||
|
|
@ -994,4 +1395,19 @@ export function registerWorkflowTools(server: McpToolServer): void {
|
|||
return runSerializedWorkflowOperation(() => executeMilestoneStatus({ milestoneId }, projectDir));
|
||||
},
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"gsd_journal_query",
|
||||
"Query the structured event journal for auto-mode iterations.",
|
||||
journalQueryParams,
|
||||
async (args: Record<string, unknown>) => {
|
||||
const { projectDir, limit, ...filters } = parseWorkflowArgs(journalQuerySchema, args);
|
||||
const { queryJournal } = await importLocalModule<any>("../../../src/resources/extensions/gsd/journal.js");
|
||||
const entries = queryJournal(projectDir, filters).slice(0, limit ?? 100);
|
||||
if (entries.length === 0) {
|
||||
return { content: [{ type: "text" as const, text: "No matching journal entries found." }] };
|
||||
}
|
||||
return { content: [{ type: "text" as const, text: JSON.stringify(entries, null, 2) }] };
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ import assert from "node:assert/strict";
|
|||
import { readFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { Agent } from "./agent.ts";
|
||||
import { getModel, type AssistantMessageEventStream } from "@gsd/pi-ai";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
|
|
@ -50,4 +52,84 @@ describe("Agent — activeInferenceModel (#1844 Bug 2)", () => {
|
|||
assert.ok(setLine < abortLine,
|
||||
"activeInferenceModel must be set before streaming infrastructure is created");
|
||||
});
|
||||
|
||||
it("getProviderOptions are forwarded into the provider stream call", async () => {
|
||||
let capturedOptions: Record<string, unknown> | undefined;
|
||||
const agent = new Agent({
|
||||
initialState: {
|
||||
model: getModel("anthropic", "claude-3-5-sonnet-20241022"),
|
||||
systemPrompt: "test",
|
||||
tools: [],
|
||||
},
|
||||
getProviderOptions: async () => ({ customRuntimeOption: "present" }),
|
||||
streamFn: (_model, _context, options): AssistantMessageEventStream => {
|
||||
capturedOptions = options as Record<string, unknown> | undefined;
|
||||
return {
|
||||
async *[Symbol.asyncIterator]() {
|
||||
yield {
|
||||
type: "start",
|
||||
partial: {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
model: "claude-3-5-sonnet-20241022",
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
};
|
||||
yield {
|
||||
type: "done",
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
model: "claude-3-5-sonnet-20241022",
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
};
|
||||
},
|
||||
result: async () => ({
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
model: "claude-3-5-sonnet-20241022",
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
}),
|
||||
[Symbol.asyncDispose]: async () => {},
|
||||
} as AssistantMessageEventStream;
|
||||
},
|
||||
});
|
||||
|
||||
await agent.prompt("hello");
|
||||
assert.equal(capturedOptions?.customRuntimeOption, "present");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -108,6 +108,14 @@ export interface AgentOptions {
|
|||
* switches mid-session are handled correctly.
|
||||
*/
|
||||
externalToolExecution?: (model: Model<any>) => boolean;
|
||||
|
||||
/**
|
||||
* Optional provider-specific options to merge into the next stream call.
|
||||
*
|
||||
* Use this for runtime-only callbacks or handles that should not live in
|
||||
* shared agent state, such as UI bridges for external CLI providers.
|
||||
*/
|
||||
getProviderOptions?: (model: Model<any>) => Record<string, unknown> | undefined | Promise<Record<string, unknown> | undefined>;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -152,6 +160,7 @@ export class Agent {
|
|||
private _beforeToolCall?: AgentLoopConfig["beforeToolCall"];
|
||||
private _afterToolCall?: AgentLoopConfig["afterToolCall"];
|
||||
private _externalToolExecution?: (model: Model<any>) => boolean;
|
||||
private _getProviderOptions?: AgentOptions["getProviderOptions"];
|
||||
|
||||
constructor(opts: AgentOptions = {}) {
|
||||
this._state = { ...this._state, ...opts.initialState };
|
||||
|
|
@ -167,6 +176,7 @@ export class Agent {
|
|||
this._transport = opts.transport ?? "sse";
|
||||
this._maxRetryDelayMs = opts.maxRetryDelayMs;
|
||||
this._externalToolExecution = opts.externalToolExecution;
|
||||
this._getProviderOptions = opts.getProviderOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -486,8 +496,10 @@ export class Agent {
|
|||
};
|
||||
|
||||
let skipInitialSteeringPoll = options?.skipInitialSteeringPoll === true;
|
||||
const providerOptions = await this._getProviderOptions?.(model);
|
||||
|
||||
const config: AgentLoopConfig = {
|
||||
...(providerOptions ?? {}),
|
||||
model,
|
||||
reasoning,
|
||||
sessionId: this._sessionId,
|
||||
|
|
|
|||
|
|
@ -12,7 +12,10 @@ export * from "./providers/google-vertex.js";
|
|||
export * from "./providers/mistral.js";
|
||||
export * from "./providers/openai-completions.js";
|
||||
export * from "./providers/openai-responses.js";
|
||||
export * from "./providers/provider-capabilities.js";
|
||||
export * from "./providers/register-builtins.js";
|
||||
export type { ProviderSwitchReport } from "./providers/transform-messages.js";
|
||||
export { createEmptyReport, hasTransformations, transformMessagesWithReport } from "./providers/transform-messages.js";
|
||||
export * from "./stream.js";
|
||||
export * from "./types.js";
|
||||
export * from "./utils/event-stream.js";
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
export interface BedrockOptions extends StreamOptions {
|
||||
region?: string;
|
||||
|
|
@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
|
|||
messages: convertMessages(context, model, cacheRetention),
|
||||
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
|
||||
inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
|
||||
toolConfig: convertToolConfig(context.tools, options.toolChoice),
|
||||
toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention),
|
||||
additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
|
||||
};
|
||||
const nextCommandInput = await options?.onPayload?.(commandInput, model);
|
||||
|
|
@ -487,7 +487,7 @@ function convertMessages(
|
|||
cacheRetention: CacheRetention,
|
||||
): Message[] {
|
||||
const result: Message[] = [];
|
||||
const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
|
||||
const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "bedrock-converse-stream");
|
||||
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const m = transformedMessages[i];
|
||||
|
|
@ -633,6 +633,8 @@ function convertMessages(
|
|||
function convertToolConfig(
|
||||
tools: Tool[] | undefined,
|
||||
toolChoice: BedrockOptions["toolChoice"],
|
||||
model: Model<"bedrock-converse-stream">,
|
||||
cacheRetention: CacheRetention,
|
||||
): ToolConfiguration | undefined {
|
||||
if (!tools?.length || toolChoice === "none") return undefined;
|
||||
|
||||
|
|
@ -644,6 +646,16 @@ function convertToolConfig(
|
|||
},
|
||||
}));
|
||||
|
||||
// Add cachePoint after last tool for supported models
|
||||
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
|
||||
bedrockTools.push({
|
||||
cachePoint: {
|
||||
type: CachePointType.DEFAULT,
|
||||
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
|
||||
},
|
||||
} as any);
|
||||
}
|
||||
|
||||
let bedrockToolChoice: ToolChoice | undefined;
|
||||
switch (toolChoice) {
|
||||
case "auto":
|
||||
|
|
|
|||
|
|
@ -1,6 +1,60 @@
|
|||
import { describe, it } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { mapStopReason } from "./anthropic-shared.js";
|
||||
import { convertTools, mapStopReason } from "./anthropic-shared.js";
|
||||
|
||||
const makeTool = (name: string) =>
|
||||
({
|
||||
name,
|
||||
description: `desc for ${name}`,
|
||||
parameters: {
|
||||
type: "object" as const,
|
||||
properties: { arg: { type: "string" } },
|
||||
required: ["arg"],
|
||||
},
|
||||
}) as any;
|
||||
|
||||
describe("convertTools cache_control", () => {
|
||||
it("adds cache_control to the last tool when cacheControl is provided", () => {
|
||||
const tools = [makeTool("Read"), makeTool("Write"), makeTool("Edit")];
|
||||
const cacheControl = { type: "ephemeral" as const };
|
||||
const result = convertTools(tools, false, cacheControl);
|
||||
|
||||
assert.equal(result.length, 3);
|
||||
assert.equal((result[0] as any).cache_control, undefined);
|
||||
assert.equal((result[1] as any).cache_control, undefined);
|
||||
assert.deepEqual((result[2] as any).cache_control, { type: "ephemeral" });
|
||||
});
|
||||
|
||||
it("does not add cache_control when cacheControl is undefined", () => {
|
||||
const tools = [makeTool("Read"), makeTool("Write")];
|
||||
const result = convertTools(tools, false);
|
||||
|
||||
for (const tool of result) {
|
||||
assert.equal((tool as any).cache_control, undefined);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles empty tools array without error", () => {
|
||||
const result = convertTools([], false, { type: "ephemeral" });
|
||||
assert.equal(result.length, 0);
|
||||
});
|
||||
|
||||
it("passes through ttl when provided", () => {
|
||||
const tools = [makeTool("Read")];
|
||||
const cacheControl = { type: "ephemeral" as const, ttl: "1h" as const };
|
||||
const result = convertTools(tools, false, cacheControl);
|
||||
|
||||
assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral", ttl: "1h" });
|
||||
});
|
||||
|
||||
it("single tool gets cache_control", () => {
|
||||
const tools = [makeTool("Read")];
|
||||
const result = convertTools(tools, false, { type: "ephemeral" });
|
||||
|
||||
assert.equal(result.length, 1);
|
||||
assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("mapStopReason", () => {
|
||||
it("maps end_turn to stop", () => {
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import type { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { hasXmlParameterTags, repairToolJson } from "../utils/repair-tool-json.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
export type AnthropicEffort = "low" | "medium" | "high" | "max";
|
||||
|
||||
|
|
@ -235,7 +235,7 @@ export function convertMessages(
|
|||
): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
|
||||
const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
|
||||
const transformedMessages = transformMessagesWithReport(messages, model, normalizeToolCallId, "anthropic-messages");
|
||||
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const msg = transformedMessages[i];
|
||||
|
|
@ -394,10 +394,14 @@ export function convertMessages(
|
|||
return params;
|
||||
}
|
||||
|
||||
export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
|
||||
export function convertTools(
|
||||
tools: Tool[],
|
||||
isOAuthToken: boolean,
|
||||
cacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
||||
): Anthropic.Messages.Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => {
|
||||
const result = tools.map((tool) => {
|
||||
const jsonSchema = tool.parameters as any;
|
||||
|
||||
return {
|
||||
|
|
@ -410,6 +414,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me
|
|||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Add cache breakpoint to last tool — covers entire tool block
|
||||
if (cacheControl && result.length > 0) {
|
||||
(result[result.length - 1] as any).cache_control = cacheControl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export function buildParams(
|
||||
|
|
@ -457,7 +468,7 @@ export function buildParams(
|
|||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, isOAuthToken);
|
||||
params.tools = convertTools(context.tools, isOAuthToken, cacheControl);
|
||||
}
|
||||
|
||||
if (options?.thinkingEnabled && model.reasoning) {
|
||||
|
|
|
|||
|
|
@ -34,9 +34,6 @@ async function getAnthropicClass(): Promise<typeof Anthropic> {
|
|||
return _AnthropicClass;
|
||||
}
|
||||
|
||||
// Stealth mode: Mimic Claude Code's tool naming exactly
|
||||
const claudeCodeVersion = "2.1.62";
|
||||
|
||||
function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {
|
||||
const merged: Record<string, string> = {};
|
||||
for (const headers of headerSources) {
|
||||
|
|
@ -47,10 +44,6 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
|
|||
return merged;
|
||||
}
|
||||
|
||||
function isOAuthToken(apiKey: string): boolean {
|
||||
return apiKey.includes("sk-ant-oat");
|
||||
}
|
||||
|
||||
async function createClient(
|
||||
model: Model<"anthropic-messages">,
|
||||
apiKey: string,
|
||||
|
|
@ -97,30 +90,7 @@ async function createClient(
|
|||
betaFeatures.push("interleaved-thinking-2025-05-14");
|
||||
}
|
||||
|
||||
// OAuth: Bearer auth, Claude Code identity headers
|
||||
if (isOAuthToken(apiKey)) {
|
||||
const client = new AnthropicClass({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders: mergeHeaders(
|
||||
{
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
...(betaFeatures.length > 0 ? { "anthropic-beta": `claude-code-20250219,oauth-2025-04-20,${betaFeatures.join(",")}` } : {}),
|
||||
"user-agent": `claude-cli/${claudeCodeVersion}`,
|
||||
"x-app": "cli",
|
||||
},
|
||||
model.headers,
|
||||
optionsHeaders,
|
||||
),
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: true };
|
||||
}
|
||||
|
||||
// API key auth
|
||||
// API key auth (Anthropic OAuth removed per TOS compliance — use API keys or Claude CLI)
|
||||
// Alibaba Coding Plan uses Bearer token auth instead of x-api-key
|
||||
const isAlibabaProvider = model.provider === "alibaba-coding-plan";
|
||||
const client = new AnthropicClass({
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
import { type Content, FinishReason, FunctionCallingConfigMode, type Part } from "@google/genai";
|
||||
import type { Context, ImageContent, Model, StopReason, TextContent, Tool } from "../types.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex";
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
|
|||
return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
||||
};
|
||||
|
||||
const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
|
||||
const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "google-generative-ai");
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import { shortHash } from "../utils/hash.js";
|
|||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { buildBaseOptions, clampReasoning } from "./simple-options.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
const MISTRAL_TOOL_CALL_ID_LENGTH = 9;
|
||||
const MAX_MISTRAL_ERROR_BODY_CHARS = 4000;
|
||||
|
|
@ -79,7 +79,7 @@ export const streamMistral: StreamFunction<"mistral-conversations", MistralOptio
|
|||
});
|
||||
|
||||
const normalizeMistralToolCallId = createMistralToolCallIdNormalizer();
|
||||
const transformedMessages = transformMessages(context.messages, model, (id) => normalizeMistralToolCallId(id));
|
||||
const transformedMessages = transformMessagesWithReport(context.messages, model, (id) => normalizeMistralToolCallId(id), "mistral-conversations");
|
||||
|
||||
let payload = buildChatPayload(model, context, transformedMessages, options);
|
||||
const nextPayload = await options?.onPayload?.(payload, model);
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import {
|
|||
finalizeStream,
|
||||
handleStreamError,
|
||||
} from "./openai-shared.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
/**
|
||||
* Check if conversation messages contain tool calls or tool results.
|
||||
|
|
@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools, compat);
|
||||
maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools);
|
||||
} else if (hasToolHistory(context.messages)) {
|
||||
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
|
||||
params.tools = [];
|
||||
|
|
@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|||
return params;
|
||||
}
|
||||
|
||||
function maybeAddOpenRouterAnthropicToolCacheControl(
|
||||
model: Model<"openai-completions">,
|
||||
tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined,
|
||||
): void {
|
||||
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
|
||||
if (!tools?.length) return;
|
||||
|
||||
const lastTool = tools[tools.length - 1];
|
||||
if ("function" in lastTool) {
|
||||
Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } });
|
||||
}
|
||||
}
|
||||
|
||||
function mapReasoningEffort(
|
||||
effort: NonNullable<OpenAICompletionsOptions["reasoningEffort"]>,
|
||||
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoningEffort"]>, string>>,
|
||||
|
|
@ -441,7 +455,7 @@ export function convertMessages(
|
|||
return id;
|
||||
};
|
||||
|
||||
const transformedMessages = transformMessages(context.messages, model, (id) => normalizeToolCallId(id));
|
||||
const transformedMessages = transformMessagesWithReport(context.messages, model, (id) => normalizeToolCallId(id), "openai-completions");
|
||||
|
||||
if (context.systemPrompt) {
|
||||
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import type { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|||
import { shortHash } from "../utils/hash.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
||||
import { transformMessages } from "./transform-messages.js";
|
||||
import { transformMessagesWithReport } from "./transform-messages.js";
|
||||
|
||||
// =============================================================================
|
||||
// Utilities
|
||||
|
|
@ -108,7 +108,7 @@ export function convertResponsesMessages<TApi extends Api>(
|
|||
return `${normalizedCallId}|${normalizedItemId}`;
|
||||
};
|
||||
|
||||
const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
|
||||
const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "openai-responses");
|
||||
|
||||
const includeSystemPrompt = options?.includeSystemPrompt ?? true;
|
||||
if (includeSystemPrompt && context.systemPrompt) {
|
||||
|
|
|
|||
174
packages/pi-ai/src/providers/provider-capabilities.test.ts
Normal file
174
packages/pi-ai/src/providers/provider-capabilities.test.ts
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
// GSD-2 — Provider Capabilities Registry Tests (ADR-005 Phase 1)
|
||||
import { describe, test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import {
|
||||
PROVIDER_CAPABILITIES,
|
||||
getProviderCapabilities,
|
||||
getUnsupportedFeatures,
|
||||
mergeCapabilityOverrides,
|
||||
getRegisteredApis,
|
||||
} from "./provider-capabilities.js";
|
||||
|
||||
// ─── Registry Completeness ──────────────────────────────────────────────────
|
||||
|
||||
describe("PROVIDER_CAPABILITIES registry", () => {
|
||||
const EXPECTED_APIS = [
|
||||
"anthropic-messages",
|
||||
"anthropic-vertex",
|
||||
"openai-responses",
|
||||
"azure-openai-responses",
|
||||
"openai-codex-responses",
|
||||
"openai-completions",
|
||||
"google-generative-ai",
|
||||
"google-gemini-cli",
|
||||
"google-vertex",
|
||||
"mistral-conversations",
|
||||
"bedrock-converse-stream",
|
||||
"ollama-chat",
|
||||
];
|
||||
|
||||
test("covers all expected API providers", () => {
|
||||
for (const api of EXPECTED_APIS) {
|
||||
assert.ok(
|
||||
PROVIDER_CAPABILITIES[api],
|
||||
`Missing capability entry for API: ${api}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("getRegisteredApis returns all entries", () => {
|
||||
const registered = getRegisteredApis();
|
||||
for (const api of EXPECTED_APIS) {
|
||||
assert.ok(registered.includes(api), `getRegisteredApis missing: ${api}`);
|
||||
}
|
||||
});
|
||||
|
||||
test("all entries have required fields", () => {
|
||||
for (const [api, caps] of Object.entries(PROVIDER_CAPABILITIES)) {
|
||||
assert.equal(typeof caps.toolCalling, "boolean", `${api}.toolCalling`);
|
||||
assert.equal(typeof caps.maxTools, "number", `${api}.maxTools`);
|
||||
assert.equal(typeof caps.imageToolResults, "boolean", `${api}.imageToolResults`);
|
||||
assert.equal(typeof caps.structuredOutput, "boolean", `${api}.structuredOutput`);
|
||||
assert.ok(caps.toolCallIdFormat, `${api}.toolCallIdFormat`);
|
||||
assert.equal(typeof caps.toolCallIdFormat.maxLength, "number", `${api}.toolCallIdFormat.maxLength`);
|
||||
assert.ok(caps.toolCallIdFormat.allowedChars instanceof RegExp, `${api}.toolCallIdFormat.allowedChars`);
|
||||
assert.ok(
|
||||
["full", "text-only", "none"].includes(caps.thinkingPersistence),
|
||||
`${api}.thinkingPersistence is "${caps.thinkingPersistence}"`,
|
||||
);
|
||||
assert.ok(Array.isArray(caps.unsupportedSchemaFeatures), `${api}.unsupportedSchemaFeatures`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Provider-specific Values ───────────────────────────────────────────────
|
||||
|
||||
describe("provider-specific capabilities", () => {
|
||||
test("Anthropic supports full thinking persistence", () => {
|
||||
assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].thinkingPersistence, "full");
|
||||
});
|
||||
|
||||
test("Anthropic supports image tool results", () => {
|
||||
assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].imageToolResults, true);
|
||||
});
|
||||
|
||||
test("Anthropic tool call ID is 64 chars max", () => {
|
||||
assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].toolCallIdFormat.maxLength, 64);
|
||||
});
|
||||
|
||||
test("Mistral tool call ID is 9 chars max", () => {
|
||||
assert.equal(PROVIDER_CAPABILITIES["mistral-conversations"].toolCallIdFormat.maxLength, 9);
|
||||
});
|
||||
|
||||
test("Mistral has no thinking persistence", () => {
|
||||
assert.equal(PROVIDER_CAPABILITIES["mistral-conversations"].thinkingPersistence, "none");
|
||||
});
|
||||
|
||||
test("Google does not support patternProperties", () => {
|
||||
assert.ok(
|
||||
PROVIDER_CAPABILITIES["google-generative-ai"].unsupportedSchemaFeatures.includes("patternProperties"),
|
||||
);
|
||||
});
|
||||
|
||||
test("Google does not support const", () => {
|
||||
assert.ok(
|
||||
PROVIDER_CAPABILITIES["google-generative-ai"].unsupportedSchemaFeatures.includes("const"),
|
||||
);
|
||||
});
|
||||
|
||||
test("OpenAI Responses does not support image tool results", () => {
|
||||
assert.equal(PROVIDER_CAPABILITIES["openai-responses"].imageToolResults, false);
|
||||
});
|
||||
|
||||
test("OpenAI Responses has text-only thinking persistence", () => {
|
||||
assert.equal(PROVIDER_CAPABILITIES["openai-responses"].thinkingPersistence, "text-only");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── getProviderCapabilities ────────────────────────────────────────────────
|
||||
|
||||
describe("getProviderCapabilities", () => {
|
||||
test("returns known provider capabilities", () => {
|
||||
const caps = getProviderCapabilities("anthropic-messages");
|
||||
assert.equal(caps.toolCalling, true);
|
||||
assert.equal(caps.thinkingPersistence, "full");
|
||||
});
|
||||
|
||||
test("returns permissive defaults for unknown providers", () => {
|
||||
const caps = getProviderCapabilities("unknown-provider-xyz");
|
||||
assert.equal(caps.toolCalling, true);
|
||||
assert.equal(caps.imageToolResults, true);
|
||||
assert.deepEqual(caps.unsupportedSchemaFeatures, []);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── getUnsupportedFeatures ─────────────────────────────────────────────────
|
||||
|
||||
describe("getUnsupportedFeatures", () => {
|
||||
test("returns unsupported features for Google", () => {
|
||||
const unsupported = getUnsupportedFeatures("google-generative-ai", ["patternProperties", "const"]);
|
||||
assert.deepEqual(unsupported, ["patternProperties", "const"]);
|
||||
});
|
||||
|
||||
test("returns empty for Anthropic with any features", () => {
|
||||
const unsupported = getUnsupportedFeatures("anthropic-messages", ["patternProperties", "const"]);
|
||||
assert.deepEqual(unsupported, []);
|
||||
});
|
||||
|
||||
test("returns empty for unknown provider", () => {
|
||||
const unsupported = getUnsupportedFeatures("unknown-xyz", ["patternProperties"]);
|
||||
assert.deepEqual(unsupported, []);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── mergeCapabilityOverrides ───────────────────────────────────────────────
|
||||
|
||||
describe("mergeCapabilityOverrides", () => {
|
||||
test("overrides individual fields", () => {
|
||||
const merged = mergeCapabilityOverrides("openai-responses", {
|
||||
imageToolResults: true,
|
||||
});
|
||||
assert.equal(merged.imageToolResults, true);
|
||||
// Non-overridden fields preserved
|
||||
assert.equal(merged.toolCalling, true);
|
||||
assert.equal(merged.thinkingPersistence, "text-only");
|
||||
});
|
||||
|
||||
test("deep-merges toolCallIdFormat", () => {
|
||||
const merged = mergeCapabilityOverrides("anthropic-messages", {
|
||||
toolCallIdFormat: { maxLength: 128 },
|
||||
});
|
||||
assert.equal(merged.toolCallIdFormat.maxLength, 128);
|
||||
// allowedChars preserved from base
|
||||
assert.ok(merged.toolCallIdFormat.allowedChars instanceof RegExp);
|
||||
});
|
||||
|
||||
test("uses permissive defaults for unknown provider", () => {
|
||||
const merged = mergeCapabilityOverrides("unknown-xyz", {
|
||||
imageToolResults: false,
|
||||
});
|
||||
assert.equal(merged.imageToolResults, false);
|
||||
assert.equal(merged.toolCalling, true); // from default
|
||||
});
|
||||
});
|
||||
215
packages/pi-ai/src/providers/provider-capabilities.ts
Normal file
215
packages/pi-ai/src/providers/provider-capabilities.ts
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
// GSD-2 — Provider Capabilities Registry (ADR-005 Phase 1)
|
||||
// Declarative registry of what each API provider supports, consolidating
|
||||
// scattered knowledge from *-shared.ts files into a queryable data structure.
|
||||
|
||||
import type { Api } from "../types.js";
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Declarative capability profile for an API provider.
|
||||
* Used by the model router to filter incompatible models and by the tool
|
||||
* system to adjust tool sets per provider.
|
||||
*/
|
||||
export interface ProviderCapabilities {
|
||||
/** Whether models from this provider support tool/function calling */
|
||||
toolCalling: boolean;
|
||||
/** Maximum number of tools the provider handles well (0 = unlimited) */
|
||||
maxTools: number;
|
||||
/** Whether tool results can contain images */
|
||||
imageToolResults: boolean;
|
||||
/** Whether the provider supports structured JSON output */
|
||||
structuredOutput: boolean;
|
||||
/** Tool call ID format constraints */
|
||||
toolCallIdFormat: {
|
||||
maxLength: number;
|
||||
allowedChars: RegExp;
|
||||
};
|
||||
/** Whether thinking/reasoning blocks are preserved cross-turn */
|
||||
thinkingPersistence: "full" | "text-only" | "none";
|
||||
/** Schema features NOT supported (tools using these get filtered) */
|
||||
unsupportedSchemaFeatures: string[];
|
||||
}
|
||||
|
||||
// ─── Registry ───────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Built-in provider capability profiles.
|
||||
*
|
||||
* Sources (consolidated from scattered *-shared.ts files):
|
||||
* - anthropic-shared.ts: normalizeToolCallId (64-char, [a-zA-Z0-9_-])
|
||||
* - openai-responses-shared.ts: ID normalization (64-char, fc_ prefix), image-in-tool-result workaround
|
||||
* - google-shared.ts: sanitizeSchemaForGoogle (patternProperties, const), requiresToolCallId
|
||||
* - mistral.ts: MISTRAL_TOOL_CALL_ID_LENGTH = 9
|
||||
* - amazon-bedrock.ts: normalizeToolCallId (64-char, [a-zA-Z0-9_-])
|
||||
*/
|
||||
export const PROVIDER_CAPABILITIES: Record<string, ProviderCapabilities> = {
|
||||
"anthropic-messages": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: true,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "full",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"anthropic-vertex": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: true,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "full",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"openai-responses": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: false, // images sent as separate user message, not in tool result
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"azure-openai-responses": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: false,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"openai-codex-responses": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: false,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"openai-completions": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: false,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"google-generative-ai": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: true,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: ["patternProperties", "const"],
|
||||
},
|
||||
"google-gemini-cli": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: true,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: ["patternProperties", "const"],
|
||||
},
|
||||
"google-vertex": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: true,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: ["patternProperties", "const"],
|
||||
},
|
||||
"mistral-conversations": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: false,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 9, allowedChars: /^[a-zA-Z0-9]+$/ },
|
||||
thinkingPersistence: "none",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"bedrock-converse-stream": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: true, // Bedrock supports image content blocks in tool results
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
"ollama-chat": {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: false,
|
||||
structuredOutput: false,
|
||||
toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ },
|
||||
thinkingPersistence: "none",
|
||||
unsupportedSchemaFeatures: [],
|
||||
},
|
||||
};
|
||||
|
||||
// ─── Default (permissive) profile for unknown providers ─────────────────────
|
||||
|
||||
const DEFAULT_CAPABILITIES: ProviderCapabilities = {
|
||||
toolCalling: true,
|
||||
maxTools: 0,
|
||||
imageToolResults: true,
|
||||
structuredOutput: true,
|
||||
toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ },
|
||||
thinkingPersistence: "text-only",
|
||||
unsupportedSchemaFeatures: [],
|
||||
};
|
||||
|
||||
// ─── Public API ─────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Get capabilities for a provider API. Returns a permissive default for
|
||||
* unknown providers (preserving existing behavior per ADR-005 principle 5).
|
||||
*/
|
||||
export function getProviderCapabilities(api: string): ProviderCapabilities {
|
||||
return PROVIDER_CAPABILITIES[api] ?? DEFAULT_CAPABILITIES;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a provider supports all required schema features.
|
||||
* Returns the list of unsupported features (empty if all supported).
|
||||
*/
|
||||
export function getUnsupportedFeatures(api: string, requiredFeatures: string[]): string[] {
|
||||
const caps = getProviderCapabilities(api);
|
||||
return requiredFeatures.filter(f => caps.unsupportedSchemaFeatures.includes(f));
|
||||
}
|
||||
|
||||
/**
|
||||
* Deep-merge user-provided capability overrides with built-in defaults.
|
||||
* Partial overrides merge with the built-in profile for the given API.
|
||||
*/
|
||||
export function mergeCapabilityOverrides(
|
||||
api: string,
|
||||
overrides: Partial<Omit<ProviderCapabilities, "toolCallIdFormat">> & {
|
||||
toolCallIdFormat?: Partial<ProviderCapabilities["toolCallIdFormat"]>;
|
||||
},
|
||||
): ProviderCapabilities {
|
||||
const base = getProviderCapabilities(api);
|
||||
return {
|
||||
...base,
|
||||
...overrides,
|
||||
toolCallIdFormat: overrides.toolCallIdFormat
|
||||
? { ...base.toolCallIdFormat, ...overrides.toolCallIdFormat }
|
||||
: base.toolCallIdFormat,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all registered API names in the capability registry.
|
||||
* Used by lint rules to verify all providers in register-builtins.ts
|
||||
* have corresponding capability entries.
|
||||
*/
|
||||
export function getRegisteredApis(): string[] {
|
||||
return Object.keys(PROVIDER_CAPABILITIES);
|
||||
}
|
||||
189
packages/pi-ai/src/providers/transform-messages-report.test.ts
Normal file
189
packages/pi-ai/src/providers/transform-messages-report.test.ts
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
// GSD-2 — ProviderSwitchReport Tests (ADR-005 Phase 3)
|
||||
import { describe, test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import { transformMessages, createEmptyReport, hasTransformations } from "./transform-messages.js";
|
||||
import type { ProviderSwitchReport } from "./transform-messages.js";
|
||||
import type { Message, Model, AssistantMessage, ToolCall } from "../types.js";
|
||||
|
||||
// ─── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
function makeModel(overrides: Partial<Model<any>> = {}): Model<any> {
|
||||
return {
|
||||
id: "claude-sonnet-4-6",
|
||||
name: "Claude Sonnet 4.6",
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
baseUrl: "",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
...overrides,
|
||||
} as Model<any>;
|
||||
}
|
||||
|
||||
function makeAssistantMsg(overrides: Partial<AssistantMessage> = {}): AssistantMessage {
|
||||
return {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4-6",
|
||||
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── createEmptyReport / hasTransformations ─────────────────────────────────
|
||||
|
||||
describe("createEmptyReport", () => {
|
||||
test("creates report with zero counters", () => {
|
||||
const report = createEmptyReport("anthropic-messages", "openai-responses");
|
||||
assert.equal(report.fromApi, "anthropic-messages");
|
||||
assert.equal(report.toApi, "openai-responses");
|
||||
assert.equal(report.thinkingBlocksDropped, 0);
|
||||
assert.equal(report.thinkingBlocksDowngraded, 0);
|
||||
assert.equal(report.toolCallIdsRemapped, 0);
|
||||
assert.equal(report.syntheticToolResultsInserted, 0);
|
||||
assert.equal(report.thoughtSignaturesDropped, 0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("hasTransformations", () => {
|
||||
test("returns false for empty report", () => {
|
||||
const report = createEmptyReport("a", "b");
|
||||
assert.equal(hasTransformations(report), false);
|
||||
});
|
||||
|
||||
test("returns true when any counter is non-zero", () => {
|
||||
const report = createEmptyReport("a", "b");
|
||||
report.thinkingBlocksDropped = 1;
|
||||
assert.equal(hasTransformations(report), true);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Report Tracking in transformMessages ───────────────────────────────────
|
||||
|
||||
describe("transformMessages with report tracking", () => {
|
||||
test("tracks thinking blocks dropped for redacted cross-model", () => {
|
||||
const model = makeModel({ id: "gpt-5", api: "openai-responses", provider: "openai" });
|
||||
const messages: Message[] = [
|
||||
makeAssistantMsg({
|
||||
content: [
|
||||
{ type: "thinking", thinking: "", redacted: true },
|
||||
{ type: "text", text: "Hello" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
const report = createEmptyReport("anthropic-messages", "openai-responses");
|
||||
transformMessages(messages, model, undefined, report);
|
||||
assert.equal(report.thinkingBlocksDropped, 1);
|
||||
});
|
||||
|
||||
test("tracks thinking blocks downgraded to plain text", () => {
|
||||
const model = makeModel({ id: "gpt-5", api: "openai-responses", provider: "openai" });
|
||||
const messages: Message[] = [
|
||||
makeAssistantMsg({
|
||||
content: [
|
||||
{ type: "thinking", thinking: "Let me think about this..." },
|
||||
{ type: "text", text: "Here is my answer" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
const report = createEmptyReport("anthropic-messages", "openai-responses");
|
||||
transformMessages(messages, model, undefined, report);
|
||||
assert.equal(report.thinkingBlocksDowngraded, 1);
|
||||
});
|
||||
|
||||
test("tracks tool call IDs remapped", () => {
|
||||
const model = makeModel({ id: "claude-sonnet-4-6", api: "anthropic-messages", provider: "anthropic" });
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: "original-long-id-that-needs-normalization|with-special-chars",
|
||||
name: "bash",
|
||||
arguments: { command: "ls" },
|
||||
};
|
||||
const messages: Message[] = [
|
||||
makeAssistantMsg({
|
||||
provider: "openai",
|
||||
api: "openai-responses",
|
||||
model: "gpt-5",
|
||||
content: [toolCall],
|
||||
}),
|
||||
];
|
||||
const normalizer = (id: string) => id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
||||
const report = createEmptyReport("openai-responses", "anthropic-messages");
|
||||
transformMessages(messages, model, normalizer, report);
|
||||
assert.equal(report.toolCallIdsRemapped, 1);
|
||||
});
|
||||
|
||||
test("tracks thought signatures dropped", () => {
|
||||
const model = makeModel({ id: "claude-sonnet-4-6", api: "anthropic-messages", provider: "anthropic" });
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: "tc_001",
|
||||
name: "bash",
|
||||
arguments: { command: "ls" },
|
||||
thoughtSignature: "some-opaque-signature",
|
||||
};
|
||||
const messages: Message[] = [
|
||||
makeAssistantMsg({
|
||||
provider: "google",
|
||||
api: "google-generative-ai",
|
||||
model: "gemini-2.5-pro",
|
||||
content: [toolCall],
|
||||
}),
|
||||
];
|
||||
const report = createEmptyReport("google-generative-ai", "anthropic-messages");
|
||||
transformMessages(messages, model, undefined, report);
|
||||
assert.equal(report.thoughtSignaturesDropped, 1);
|
||||
});
|
||||
|
||||
test("tracks synthetic tool results inserted", () => {
|
||||
const model = makeModel();
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: "tc_orphan",
|
||||
name: "bash",
|
||||
arguments: { command: "ls" },
|
||||
};
|
||||
// Assistant message with tool call followed by another assistant (no tool result)
|
||||
const messages: Message[] = [
|
||||
makeAssistantMsg({ content: [toolCall, { type: "text", text: "Using bash" }] }),
|
||||
makeAssistantMsg({ content: [{ type: "text", text: "Next message" }] }),
|
||||
];
|
||||
const report = createEmptyReport("anthropic-messages", "anthropic-messages");
|
||||
transformMessages(messages, model, undefined, report);
|
||||
assert.equal(report.syntheticToolResultsInserted, 1);
|
||||
});
|
||||
|
||||
test("does not count transformations for same-model messages", () => {
|
||||
const model = makeModel();
|
||||
const messages: Message[] = [
|
||||
makeAssistantMsg({
|
||||
content: [
|
||||
{ type: "thinking", thinking: "Let me think..." },
|
||||
{ type: "text", text: "Answer" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
const report = createEmptyReport("anthropic-messages", "anthropic-messages");
|
||||
transformMessages(messages, model, undefined, report);
|
||||
assert.equal(report.thinkingBlocksDowngraded, 0);
|
||||
assert.equal(report.thinkingBlocksDropped, 0);
|
||||
});
|
||||
|
||||
test("works without report parameter (backward compatible)", () => {
|
||||
const model = makeModel();
|
||||
const messages: Message[] = [
|
||||
makeAssistantMsg({ content: [{ type: "text", text: "Hello" }] }),
|
||||
];
|
||||
// Should not throw
|
||||
const result = transformMessages(messages, model);
|
||||
assert.ok(Array.isArray(result));
|
||||
});
|
||||
});
|
||||
|
|
@ -1,5 +1,87 @@
|
|||
import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage } from "../types.js";
|
||||
|
||||
/**
|
||||
* Report of context transformations during a cross-provider switch (ADR-005 Phase 3).
|
||||
* Tracks what was lost or downgraded when replaying conversation history to a different provider.
|
||||
*/
|
||||
export interface ProviderSwitchReport {
|
||||
/** API of the messages being transformed from */
|
||||
fromApi: string;
|
||||
/** API of the target model */
|
||||
toApi: string;
|
||||
/** Number of thinking blocks completely dropped (redacted/encrypted, cross-model) */
|
||||
thinkingBlocksDropped: number;
|
||||
/** Number of thinking blocks downgraded from structured to plain text */
|
||||
thinkingBlocksDowngraded: number;
|
||||
/** Number of tool call IDs that were remapped/normalized */
|
||||
toolCallIdsRemapped: number;
|
||||
/** Number of synthetic tool results inserted for orphaned tool calls */
|
||||
syntheticToolResultsInserted: number;
|
||||
/** Number of thought signatures dropped (Google-specific opaque context) */
|
||||
thoughtSignaturesDropped: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an empty provider switch report.
|
||||
*/
|
||||
export function createEmptyReport(fromApi: string, toApi: string): ProviderSwitchReport {
|
||||
return {
|
||||
fromApi,
|
||||
toApi,
|
||||
thinkingBlocksDropped: 0,
|
||||
thinkingBlocksDowngraded: 0,
|
||||
toolCallIdsRemapped: 0,
|
||||
syntheticToolResultsInserted: 0,
|
||||
thoughtSignaturesDropped: 0,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a provider switch report has any non-zero transformations.
|
||||
*/
|
||||
export function hasTransformations(report: ProviderSwitchReport): boolean {
|
||||
return (
|
||||
report.thinkingBlocksDropped > 0 ||
|
||||
report.thinkingBlocksDowngraded > 0 ||
|
||||
report.toolCallIdsRemapped > 0 ||
|
||||
report.syntheticToolResultsInserted > 0 ||
|
||||
report.thoughtSignaturesDropped > 0
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a report, run transformMessages, and log if non-empty.
|
||||
* Convenience wrapper for provider adapters (ADR-005).
|
||||
*/
|
||||
export function transformMessagesWithReport<TApi extends Api>(
|
||||
messages: Message[],
|
||||
model: Model<TApi>,
|
||||
normalizeToolCallId?: (id: string, model: Model<TApi>, source: AssistantMessage) => string,
|
||||
sourceApi?: string,
|
||||
): Message[] {
|
||||
const report = createEmptyReport(sourceApi ?? "unknown", model.api);
|
||||
const result = transformMessages(messages, model, normalizeToolCallId, report);
|
||||
if (hasTransformations(report)) {
|
||||
logProviderSwitchReport(report);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Log a non-empty ProviderSwitchReport as a debug-level warning. */
|
||||
function logProviderSwitchReport(report: ProviderSwitchReport): void {
|
||||
const parts: string[] = [`Provider switch ${report.fromApi} → ${report.toApi}:`];
|
||||
if (report.thinkingBlocksDropped > 0) parts.push(`${report.thinkingBlocksDropped} thinking blocks dropped`);
|
||||
if (report.thinkingBlocksDowngraded > 0) parts.push(`${report.thinkingBlocksDowngraded} thinking blocks downgraded`);
|
||||
if (report.toolCallIdsRemapped > 0) parts.push(`${report.toolCallIdsRemapped} tool call IDs remapped`);
|
||||
if (report.syntheticToolResultsInserted > 0) parts.push(`${report.syntheticToolResultsInserted} synthetic tool results inserted`);
|
||||
if (report.thoughtSignaturesDropped > 0) parts.push(`${report.thoughtSignaturesDropped} thought signatures dropped`);
|
||||
// Use process.stderr for debug output — this is observable in verbose/debug modes
|
||||
// without polluting stdout which may be used for structured output (RPC/MCP).
|
||||
if (process.env.GSD_VERBOSE === "1" || process.env.PI_VERBOSE === "1") {
|
||||
process.stderr.write(`[provider-switch] ${parts.join(", ")}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize tool call ID for cross-provider compatibility.
|
||||
* OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
|
||||
|
|
@ -9,6 +91,7 @@ export function transformMessages<TApi extends Api>(
|
|||
messages: Message[],
|
||||
model: Model<TApi>,
|
||||
normalizeToolCallId?: (id: string, model: Model<TApi>, source: AssistantMessage) => string,
|
||||
report?: ProviderSwitchReport,
|
||||
): Message[] {
|
||||
// Build a map of original tool call IDs to normalized IDs
|
||||
const toolCallIdMap = new Map<string, string>();
|
||||
|
|
@ -42,14 +125,20 @@ export function transformMessages<TApi extends Api>(
|
|||
// Redacted thinking is opaque encrypted content, only valid for the same model.
|
||||
// Drop it for cross-model to avoid API errors.
|
||||
if (block.redacted) {
|
||||
if (!isSameModel && report) report.thinkingBlocksDropped++;
|
||||
return isSameModel ? block : [];
|
||||
}
|
||||
// For same model: keep thinking blocks with signatures (needed for replay)
|
||||
// even if the thinking text is empty (OpenAI encrypted reasoning)
|
||||
if (isSameModel && block.thinkingSignature) return block;
|
||||
// Skip empty thinking blocks, convert others to plain text
|
||||
if (!block.thinking || block.thinking.trim() === "") return [];
|
||||
if (!block.thinking || block.thinking.trim() === "") {
|
||||
if (!isSameModel && report) report.thinkingBlocksDropped++;
|
||||
return [];
|
||||
}
|
||||
if (isSameModel) return block;
|
||||
// Downgrade: structured thinking → plain text
|
||||
if (report) report.thinkingBlocksDowngraded++;
|
||||
return {
|
||||
type: "text" as const,
|
||||
text: block.thinking,
|
||||
|
|
@ -71,6 +160,7 @@ export function transformMessages<TApi extends Api>(
|
|||
if (!isSameModel && toolCall.thoughtSignature) {
|
||||
normalizedToolCall = { ...toolCall };
|
||||
delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature;
|
||||
if (report) report.thoughtSignaturesDropped++;
|
||||
}
|
||||
|
||||
if (!isSameModel && normalizeToolCallId) {
|
||||
|
|
@ -78,6 +168,7 @@ export function transformMessages<TApi extends Api>(
|
|||
if (normalizedId !== toolCall.id) {
|
||||
toolCallIdMap.set(toolCall.id, normalizedId);
|
||||
normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
|
||||
if (report) report.toolCallIdsRemapped++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -117,6 +208,7 @@ export function transformMessages<TApi extends Api>(
|
|||
isError: true,
|
||||
timestamp: Date.now(),
|
||||
} as ToolResultMessage);
|
||||
if (report) report.syntheticToolResultsInserted++;
|
||||
}
|
||||
}
|
||||
pendingToolCalls = [];
|
||||
|
|
@ -157,6 +249,7 @@ export function transformMessages<TApi extends Api>(
|
|||
isError: true,
|
||||
timestamp: Date.now(),
|
||||
} as ToolResultMessage);
|
||||
if (report) report.syntheticToolResultsInserted++;
|
||||
}
|
||||
}
|
||||
pendingToolCalls = [];
|
||||
|
|
|
|||
|
|
@ -1,140 +0,0 @@
|
|||
/**
|
||||
* Anthropic OAuth flow (Claude Pro/Max)
|
||||
*/
|
||||
|
||||
import { generatePKCE } from "./pkce.js";
|
||||
import type { OAuthCredentials, OAuthLoginCallbacks, OAuthProviderInterface } from "./types.js";
|
||||
|
||||
const decode = (s: string) => atob(s);
|
||||
const CLIENT_ID = decode("OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl");
|
||||
const AUTHORIZE_URL = "https://claude.ai/oauth/authorize";
|
||||
const TOKEN_URL = "https://platform.claude.com/v1/oauth/token";
|
||||
const REDIRECT_URI = "https://platform.claude.com/oauth/code/callback";
|
||||
const SCOPES = "org:create_api_key user:profile user:inference";
|
||||
|
||||
/**
|
||||
* Login with Anthropic OAuth (device code flow)
|
||||
*
|
||||
* @param onAuthUrl - Callback to handle the authorization URL (e.g., open browser)
|
||||
* @param onPromptCode - Callback to prompt user for the authorization code
|
||||
*/
|
||||
export async function loginAnthropic(
|
||||
onAuthUrl: (url: string) => void,
|
||||
onPromptCode: () => Promise<string>,
|
||||
): Promise<OAuthCredentials> {
|
||||
const { verifier, challenge } = await generatePKCE();
|
||||
|
||||
// Build authorization URL
|
||||
const authParams = new URLSearchParams({
|
||||
code: "true",
|
||||
client_id: CLIENT_ID,
|
||||
response_type: "code",
|
||||
redirect_uri: REDIRECT_URI,
|
||||
scope: SCOPES,
|
||||
code_challenge: challenge,
|
||||
code_challenge_method: "S256",
|
||||
state: verifier,
|
||||
});
|
||||
|
||||
const authUrl = `${AUTHORIZE_URL}?${authParams.toString()}`;
|
||||
|
||||
// Notify caller with URL to open
|
||||
onAuthUrl(authUrl);
|
||||
|
||||
// Wait for user to paste authorization code (format: code#state)
|
||||
const authCode = await onPromptCode();
|
||||
const splits = authCode.split("#");
|
||||
const code = splits[0];
|
||||
const state = splits[1];
|
||||
|
||||
// Exchange code for tokens
|
||||
const tokenResponse = await fetch(TOKEN_URL, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
grant_type: "authorization_code",
|
||||
client_id: CLIENT_ID,
|
||||
code: code,
|
||||
state: state,
|
||||
redirect_uri: REDIRECT_URI,
|
||||
code_verifier: verifier,
|
||||
}),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
|
||||
if (!tokenResponse.ok) {
|
||||
const error = await tokenResponse.text();
|
||||
throw new Error(`Token exchange failed: ${error}`);
|
||||
}
|
||||
|
||||
const tokenData = (await tokenResponse.json()) as {
|
||||
access_token: string;
|
||||
refresh_token: string;
|
||||
expires_in: number;
|
||||
};
|
||||
|
||||
// Calculate expiry time (current time + expires_in seconds - 5 min buffer)
|
||||
const expiresAt = Date.now() + tokenData.expires_in * 1000 - 5 * 60 * 1000;
|
||||
|
||||
// Save credentials
|
||||
return {
|
||||
refresh: tokenData.refresh_token,
|
||||
access: tokenData.access_token,
|
||||
expires: expiresAt,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh Anthropic OAuth token
|
||||
*/
|
||||
export async function refreshAnthropicToken(refreshToken: string): Promise<OAuthCredentials> {
|
||||
const response = await fetch(TOKEN_URL, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
grant_type: "refresh_token",
|
||||
client_id: CLIENT_ID,
|
||||
refresh_token: refreshToken,
|
||||
}),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`Anthropic token refresh failed: ${error}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as {
|
||||
access_token: string;
|
||||
refresh_token: string;
|
||||
expires_in: number;
|
||||
};
|
||||
|
||||
return {
|
||||
refresh: data.refresh_token,
|
||||
access: data.access_token,
|
||||
expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000,
|
||||
};
|
||||
}
|
||||
|
||||
export const anthropicOAuthProvider: OAuthProviderInterface = {
|
||||
id: "anthropic",
|
||||
name: "Anthropic (Claude Pro/Max)",
|
||||
|
||||
async login(callbacks: OAuthLoginCallbacks): Promise<OAuthCredentials> {
|
||||
return loginAnthropic(
|
||||
(url) => callbacks.onAuth({ url }),
|
||||
() => callbacks.onPrompt({ message: "Paste the authorization code:" }),
|
||||
);
|
||||
},
|
||||
|
||||
async refreshToken(credentials: OAuthCredentials): Promise<OAuthCredentials> {
|
||||
return refreshAnthropicToken(credentials.refresh);
|
||||
},
|
||||
|
||||
getApiKey(credentials: OAuthCredentials): string {
|
||||
return credentials.access;
|
||||
},
|
||||
};
|
||||
|
|
@ -3,14 +3,14 @@
|
|||
*
|
||||
* This module handles login, token refresh, and credential storage
|
||||
* for OAuth-based providers:
|
||||
* - Anthropic (Claude Pro/Max)
|
||||
* - GitHub Copilot
|
||||
* - Google Cloud Code Assist (Gemini CLI)
|
||||
* - Antigravity (Gemini 3, Claude, GPT-OSS via Google Cloud)
|
||||
*
|
||||
* Note: Anthropic OAuth was removed per TOS compliance (see docs/user-docs/claude-code-auth-compliance.md).
|
||||
* Use API keys or the local Claude Code CLI for Anthropic access.
|
||||
*/
|
||||
|
||||
// Anthropic
|
||||
export { anthropicOAuthProvider, loginAnthropic, refreshAnthropicToken } from "./anthropic.js";
|
||||
// GitHub Copilot
|
||||
export {
|
||||
getGitHubCopilotBaseUrl,
|
||||
|
|
@ -32,7 +32,6 @@ export * from "./types.js";
|
|||
// Provider Registry
|
||||
// ============================================================================
|
||||
|
||||
import { anthropicOAuthProvider } from "./anthropic.js";
|
||||
import { githubCopilotOAuthProvider } from "./github-copilot.js";
|
||||
import { antigravityOAuthProvider } from "./google-antigravity.js";
|
||||
import { geminiCliOAuthProvider } from "./google-gemini-cli.js";
|
||||
|
|
@ -40,7 +39,6 @@ import { openaiCodexOAuthProvider } from "./openai-codex.js";
|
|||
import type { OAuthCredentials, OAuthProviderId, OAuthProviderInterface } from "./types.js";
|
||||
|
||||
const BUILT_IN_OAUTH_PROVIDERS: OAuthProviderInterface[] = [
|
||||
anthropicOAuthProvider,
|
||||
githubCopilotOAuthProvider,
|
||||
geminiCliOAuthProvider,
|
||||
antigravityOAuthProvider,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@gsd/pi-coding-agent",
|
||||
"version": "2.68.0",
|
||||
"version": "2.71.0",
|
||||
"description": "Coding agent CLI (vendored from pi-mono)",
|
||||
"type": "module",
|
||||
"piConfig": {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,468 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { test } from "node:test";
|
||||
|
||||
import { handleAgentEvent } from "../modes/interactive/controllers/chat-controller.js";
|
||||
|
||||
function makeUsage() {
|
||||
return {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
function makeAssistant(content: any[]) {
|
||||
return {
|
||||
role: "assistant",
|
||||
content,
|
||||
api: "anthropic-messages",
|
||||
provider: "claude-code",
|
||||
model: "claude-sonnet-4",
|
||||
usage: makeUsage(),
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function createHost() {
|
||||
const chatContainer = {
|
||||
children: [] as any[],
|
||||
addChild(component: any) {
|
||||
this.children.push(component);
|
||||
},
|
||||
removeChild(component: any) {
|
||||
const idx = this.children.indexOf(component);
|
||||
if (idx !== -1) this.children.splice(idx, 1);
|
||||
},
|
||||
clear() {
|
||||
this.children = [];
|
||||
},
|
||||
};
|
||||
|
||||
const pinnedMessageContainer = {
|
||||
children: [] as any[],
|
||||
addChild(component: any) {
|
||||
this.children.push(component);
|
||||
},
|
||||
removeChild(component: any) {
|
||||
const idx = this.children.indexOf(component);
|
||||
if (idx !== -1) this.children.splice(idx, 1);
|
||||
},
|
||||
clear() {
|
||||
this.children = [];
|
||||
},
|
||||
};
|
||||
|
||||
const host: any = {
|
||||
isInitialized: true,
|
||||
init: async () => {},
|
||||
defaultEditor: { onEscape: undefined },
|
||||
editor: {},
|
||||
session: { retryAttempt: 0, abortCompaction: () => {}, abortRetry: () => {} },
|
||||
ui: { requestRender: () => {}, terminal: { rows: 50 } },
|
||||
footer: { invalidate: () => {} },
|
||||
keybindings: {},
|
||||
statusContainer: { clear: () => {}, addChild: () => {} },
|
||||
chatContainer,
|
||||
settingsManager: { getTimestampFormat: () => "date-time-iso", getShowImages: () => false },
|
||||
pendingTools: new Map(),
|
||||
toolOutputExpanded: false,
|
||||
hideThinkingBlock: false,
|
||||
isBashMode: false,
|
||||
defaultWorkingMessage: "Working...",
|
||||
compactionQueuedMessages: [],
|
||||
editorContainer: {},
|
||||
pendingMessagesContainer: { clear: () => {} },
|
||||
pinnedMessageContainer,
|
||||
addMessageToChat: () => {},
|
||||
getMarkdownThemeWithSettings: () => ({}),
|
||||
formatWebSearchResult: () => "",
|
||||
getRegisteredToolDefinition: () => undefined,
|
||||
checkShutdownRequested: async () => {},
|
||||
rebuildChatFromMessages: () => {},
|
||||
flushCompactionQueue: async () => {},
|
||||
showStatus: () => {},
|
||||
showError: () => {},
|
||||
updatePendingMessagesDisplay: () => {},
|
||||
updateTerminalTitle: () => {},
|
||||
updateEditorBorderColor: () => {},
|
||||
};
|
||||
|
||||
return host;
|
||||
}
|
||||
|
||||
test("chat-controller keeps tool output ahead of delayed assistant text for external tool streams", async () => {
|
||||
// ToolExecutionComponent uses the global theme singleton.
|
||||
// Install a minimal no-op theme implementation for this unit test.
|
||||
(globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = {
|
||||
fg: (_key: string, text: string) => text,
|
||||
bg: (_key: string, text: string) => text,
|
||||
bold: (text: string) => text,
|
||||
italic: (text: string) => text,
|
||||
truncate: (text: string) => text,
|
||||
};
|
||||
|
||||
const host = createHost();
|
||||
const toolId = "mcp-tool-1";
|
||||
const toolCall = {
|
||||
type: "toolCall",
|
||||
id: toolId,
|
||||
name: "exec_command",
|
||||
arguments: { cmd: "echo hi" },
|
||||
};
|
||||
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
assert.equal(host.streamingComponent, undefined, "assistant component should be deferred at message_start");
|
||||
assert.equal(host.chatContainer.children.length, 0, "nothing should render before content arrives");
|
||||
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant([toolCall]),
|
||||
assistantMessageEvent: {
|
||||
type: "toolcall_end",
|
||||
contentIndex: 0,
|
||||
toolCall: {
|
||||
...toolCall,
|
||||
externalResult: {
|
||||
content: [{ type: "text", text: "tool output" }],
|
||||
details: {},
|
||||
isError: false,
|
||||
},
|
||||
},
|
||||
partial: makeAssistant([toolCall]),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.equal(host.streamingComponent, undefined, "assistant text container should remain deferred for tool-only updates");
|
||||
assert.equal(host.chatContainer.children.length, 1, "tool execution block should render immediately");
|
||||
assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent");
|
||||
|
||||
// Re-assert required host method before the text-bearing update path.
|
||||
host.getMarkdownThemeWithSettings = () => ({});
|
||||
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant([toolCall, { type: "text", text: "done" }]),
|
||||
assistantMessageEvent: {
|
||||
type: "text_delta",
|
||||
contentIndex: 1,
|
||||
delta: "done",
|
||||
partial: makeAssistant([toolCall, { type: "text", text: "done" }]),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.equal(host.chatContainer.children.length, 2, "assistant content should render after existing tool output");
|
||||
assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent");
|
||||
assert.equal(host.chatContainer.children[1]?.constructor?.name, "AssistantMessageComponent");
|
||||
});
|
||||
|
||||
test("chat-controller keeps serverToolUse output ahead of assistant text when external results arrive", async () => {
|
||||
(globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = {
|
||||
fg: (_key: string, text: string) => text,
|
||||
bg: (_key: string, text: string) => text,
|
||||
bold: (text: string) => text,
|
||||
italic: (text: string) => text,
|
||||
truncate: (text: string) => text,
|
||||
};
|
||||
|
||||
const host = createHost();
|
||||
const toolId = "mcp-secure-1";
|
||||
const serverToolUse = {
|
||||
type: "serverToolUse",
|
||||
id: toolId,
|
||||
name: "mcp__gsd-workflow__secure_env_collect",
|
||||
input: { projectDir: "/tmp/project", keys: [{ key: "SECURE_PASSWORD" }], destination: "dotenv" },
|
||||
};
|
||||
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant([serverToolUse]),
|
||||
assistantMessageEvent: {
|
||||
type: "server_tool_use",
|
||||
contentIndex: 0,
|
||||
partial: makeAssistant([serverToolUse]),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.equal(host.streamingComponent, undefined, "assistant content should stay deferred while only tool content streams");
|
||||
assert.equal(host.chatContainer.children.length, 1, "server tool block should render immediately");
|
||||
assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent");
|
||||
|
||||
host.getMarkdownThemeWithSettings = () => ({});
|
||||
const resultMessage = makeAssistant([
|
||||
{
|
||||
...serverToolUse,
|
||||
externalResult: {
|
||||
content: [{ type: "text", text: "secure_env_collect was cancelled by user." }],
|
||||
details: {},
|
||||
isError: true,
|
||||
},
|
||||
},
|
||||
{ type: "text", text: "The secure password collection was cancelled." },
|
||||
]);
|
||||
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: resultMessage,
|
||||
assistantMessageEvent: {
|
||||
type: "server_tool_use",
|
||||
contentIndex: 0,
|
||||
partial: resultMessage,
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.equal(host.chatContainer.children.length, 2, "assistant text should render after existing server tool output");
|
||||
assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent");
|
||||
assert.equal(host.chatContainer.children[1]?.constructor?.name, "AssistantMessageComponent");
|
||||
});
|
||||
|
||||
test("chat-controller pins latest assistant text above editor when tool calls are present", async () => {
|
||||
(globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = {
|
||||
fg: (_key: string, text: string) => text,
|
||||
bg: (_key: string, text: string) => text,
|
||||
bold: (text: string) => text,
|
||||
italic: (text: string) => text,
|
||||
truncate: (text: string) => text,
|
||||
};
|
||||
|
||||
const host = createHost();
|
||||
const toolId = "tool-pin-1";
|
||||
const toolCall = {
|
||||
type: "toolCall",
|
||||
id: toolId,
|
||||
name: "exec_command",
|
||||
arguments: { cmd: "echo hi" },
|
||||
};
|
||||
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should be empty at message_start");
|
||||
|
||||
// Send a message with text followed by a tool call
|
||||
host.getMarkdownThemeWithSettings = () => ({});
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant([
|
||||
{ type: "text", text: "Looking at the files now." },
|
||||
toolCall,
|
||||
]),
|
||||
assistantMessageEvent: {
|
||||
type: "toolcall_end",
|
||||
contentIndex: 1,
|
||||
toolCall: {
|
||||
...toolCall,
|
||||
externalResult: {
|
||||
content: [{ type: "text", text: "file contents" }],
|
||||
details: {},
|
||||
isError: false,
|
||||
},
|
||||
},
|
||||
partial: makeAssistant([{ type: "text", text: "Looking at the files now." }, toolCall]),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
// Pinned zone should now have a DynamicBorder and a Markdown component
|
||||
assert.equal(host.pinnedMessageContainer.children.length, 2, "pinned zone should have border + markdown");
|
||||
assert.equal(host.pinnedMessageContainer.children[0]?.constructor?.name, "DynamicBorder");
|
||||
assert.equal(host.pinnedMessageContainer.children[1]?.constructor?.name, "Markdown");
|
||||
});
|
||||
|
||||
test("chat-controller clears pinned zone when a new assistant message starts", async () => {
|
||||
(globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = {
|
||||
fg: (_key: string, text: string) => text,
|
||||
bg: (_key: string, text: string) => text,
|
||||
bold: (text: string) => text,
|
||||
italic: (text: string) => text,
|
||||
truncate: (text: string) => text,
|
||||
};
|
||||
|
||||
const host = createHost();
|
||||
const toolCall = {
|
||||
type: "toolCall",
|
||||
id: "tool-clear-1",
|
||||
name: "exec_command",
|
||||
arguments: { cmd: "echo hi" },
|
||||
};
|
||||
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
// Populate the pinned zone
|
||||
host.getMarkdownThemeWithSettings = () => ({});
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]),
|
||||
assistantMessageEvent: {
|
||||
type: "toolcall_end",
|
||||
contentIndex: 1,
|
||||
toolCall: {
|
||||
...toolCall,
|
||||
externalResult: {
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
details: {},
|
||||
isError: false,
|
||||
},
|
||||
},
|
||||
partial: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated");
|
||||
|
||||
// Start a new assistant message — pinned zone should clear
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on new assistant message");
|
||||
});
|
||||
|
||||
test("chat-controller clears pinned zone when the agent turn ends", async () => {
|
||||
(globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = {
|
||||
fg: (_key: string, text: string) => text,
|
||||
bg: (_key: string, text: string) => text,
|
||||
bold: (text: string) => text,
|
||||
italic: (text: string) => text,
|
||||
truncate: (text: string) => text,
|
||||
};
|
||||
|
||||
const host = createHost();
|
||||
const toolCall = {
|
||||
type: "toolCall",
|
||||
id: "tool-clear-on-end-1",
|
||||
name: "exec_command",
|
||||
arguments: { cmd: "echo hi" },
|
||||
};
|
||||
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
host.getMarkdownThemeWithSettings = () => ({});
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]),
|
||||
assistantMessageEvent: {
|
||||
type: "toolcall_end",
|
||||
contentIndex: 1,
|
||||
toolCall: {
|
||||
...toolCall,
|
||||
externalResult: {
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
details: {},
|
||||
isError: false,
|
||||
},
|
||||
},
|
||||
partial: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated before agent_end");
|
||||
|
||||
await handleAgentEvent(host, { type: "agent_end" } as any);
|
||||
|
||||
assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on agent_end");
|
||||
});
|
||||
|
||||
test("chat-controller clears pinned zone when assistant message ends", async () => {
|
||||
(globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = {
|
||||
fg: (_key: string, text: string) => text,
|
||||
bg: (_key: string, text: string) => text,
|
||||
bold: (text: string) => text,
|
||||
italic: (text: string) => text,
|
||||
truncate: (text: string) => text,
|
||||
};
|
||||
|
||||
const host = createHost();
|
||||
const toolCall = {
|
||||
type: "toolCall",
|
||||
id: "tool-msg-end-1",
|
||||
name: "exec_command",
|
||||
arguments: { cmd: "echo hi" },
|
||||
};
|
||||
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
host.getMarkdownThemeWithSettings = () => ({});
|
||||
const msgContent = [{ type: "text", text: "Summary after tools." }, toolCall];
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant(msgContent),
|
||||
assistantMessageEvent: {
|
||||
type: "toolcall_end",
|
||||
contentIndex: 1,
|
||||
toolCall: {
|
||||
...toolCall,
|
||||
externalResult: {
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
details: {},
|
||||
isError: false,
|
||||
},
|
||||
},
|
||||
partial: makeAssistant(msgContent),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated during streaming");
|
||||
|
||||
// End the assistant message (e.g. before form elicitation) — pinned zone should clear
|
||||
await handleAgentEvent(host, { type: "message_end", message: makeAssistant(msgContent) } as any);
|
||||
|
||||
assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on message_end to prevent duplicate display");
|
||||
});
|
||||
|
||||
test("chat-controller does not pin when there are no tool calls", async () => {
|
||||
(globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = {
|
||||
fg: (_key: string, text: string) => text,
|
||||
bg: (_key: string, text: string) => text,
|
||||
bold: (text: string) => text,
|
||||
italic: (text: string) => text,
|
||||
truncate: (text: string) => text,
|
||||
};
|
||||
|
||||
const host = createHost();
|
||||
|
||||
await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any);
|
||||
|
||||
host.getMarkdownThemeWithSettings = () => ({});
|
||||
await handleAgentEvent(
|
||||
host,
|
||||
{
|
||||
type: "message_update",
|
||||
message: makeAssistant([{ type: "text", text: "Just some text, no tools." }]),
|
||||
assistantMessageEvent: {
|
||||
type: "text_delta",
|
||||
contentIndex: 0,
|
||||
delta: "Just some text, no tools.",
|
||||
partial: makeAssistant([{ type: "text", text: "Just some text, no tools." }]),
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
|
||||
assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should stay empty without tool calls");
|
||||
});
|
||||
|
|
@ -43,6 +43,9 @@ export type {
|
|||
BeforeProviderRequestEventResult,
|
||||
// Context
|
||||
CompactOptions,
|
||||
// Events - Adjust Tool Set (ADR-005)
|
||||
AdjustToolSetEvent,
|
||||
AdjustToolSetResult,
|
||||
// Events - Agent
|
||||
ContextEvent,
|
||||
// Event Results
|
||||
|
|
@ -135,6 +138,7 @@ export type {
|
|||
ToolCallEvent,
|
||||
ToolCallEventResult,
|
||||
// Tools
|
||||
ToolCompatibility,
|
||||
ToolDefinition,
|
||||
// Events - Tool Execution
|
||||
ToolExecutionEndEvent,
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import type { ExecOptions } from "../exec.js";
|
|||
import { execCommand } from "../exec.js";
|
||||
import { getUntrustedExtensionPaths } from "./project-trust.js";
|
||||
export { isProjectTrusted, trustProject, getUntrustedExtensionPaths } from "./project-trust.js";
|
||||
import { registerToolCompatibility } from "../tools/tool-compatibility-registry.js";
|
||||
import type {
|
||||
Extension,
|
||||
ExtensionAPI,
|
||||
|
|
@ -428,8 +429,9 @@ export function createExtensionRuntime(): ExtensionRuntime {
|
|||
unregisterProvider: (name) => {
|
||||
runtime.pendingProviderRegistrations = runtime.pendingProviderRegistrations.filter((r) => r.name !== name);
|
||||
},
|
||||
// Stub replaced by ExtensionRunner at construction time via bindEmitMethods().
|
||||
// Stubs replaced by ExtensionRunner at construction time via bindEmitMethods().
|
||||
emitBeforeModelSelect: async () => undefined,
|
||||
emitAdjustToolSet: async () => undefined,
|
||||
};
|
||||
|
||||
return runtime;
|
||||
|
|
@ -459,6 +461,10 @@ function createExtensionAPI(
|
|||
definition: tool,
|
||||
extensionPath: extension.path,
|
||||
});
|
||||
// ADR-005: auto-register tool compatibility metadata
|
||||
if (tool.compatibility) {
|
||||
registerToolCompatibility(tool.name, tool.compatibility);
|
||||
}
|
||||
runtime.refreshTools();
|
||||
},
|
||||
|
||||
|
|
@ -585,6 +591,10 @@ function createExtensionAPI(
|
|||
return runtime.emitBeforeModelSelect(event);
|
||||
},
|
||||
|
||||
async emitAdjustToolSet(event: Omit<import("./types.js").AdjustToolSetEvent, "type">): Promise<import("./types.js").AdjustToolSetResult | undefined> {
|
||||
return runtime.emitAdjustToolSet(event);
|
||||
},
|
||||
|
||||
events: eventBus,
|
||||
} as ExtensionAPI;
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ import type { KeyAction, KeybindingsConfig } from "../keybindings.js";
|
|||
import type { ModelRegistry } from "../model-registry.js";
|
||||
import type { SessionManager } from "../session-manager.js";
|
||||
import type {
|
||||
AdjustToolSetEvent,
|
||||
AdjustToolSetResult,
|
||||
BeforeAgentStartEvent,
|
||||
BeforeAgentStartEventResult,
|
||||
BeforeModelSelectEvent,
|
||||
|
|
@ -234,6 +236,7 @@ export class ExtensionRunner {
|
|||
this.modelRegistry = modelRegistry;
|
||||
// Bind emit methods into the shared runtime so createExtensionAPI can delegate to them.
|
||||
this.runtime.emitBeforeModelSelect = (event) => this.emitBeforeModelSelect(event);
|
||||
this.runtime.emitAdjustToolSet = (event) => this.emitAdjustToolSet(event);
|
||||
}
|
||||
|
||||
bindCore(actions: ExtensionActions, contextActions: ExtensionContextActions): void {
|
||||
|
|
@ -713,6 +716,21 @@ export class ExtensionRunner {
|
|||
return result;
|
||||
}
|
||||
|
||||
async emitAdjustToolSet(event: Omit<AdjustToolSetEvent, "type">): Promise<AdjustToolSetResult | undefined> {
|
||||
let result: AdjustToolSetResult | undefined;
|
||||
await this.invokeHandlers("adjust_tool_set", () => ({
|
||||
type: "adjust_tool_set" as const,
|
||||
...event,
|
||||
} satisfies AdjustToolSetEvent), (handlerResult) => {
|
||||
if (handlerResult) {
|
||||
result = handlerResult as AdjustToolSetResult;
|
||||
return { done: true }; // first override wins
|
||||
}
|
||||
return { done: false };
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
async emitBeforeAgentStart(
|
||||
prompt: string,
|
||||
images: ImageContent[] | undefined,
|
||||
|
|
|
|||
|
|
@ -88,6 +88,8 @@ export interface ExtensionUIDialogOptions {
|
|||
timeout?: number;
|
||||
/** When true, the user can select multiple options. The return type becomes `string[]`. */
|
||||
allowMultiple?: boolean;
|
||||
/** When true, text input dialogs should hide typed characters if supported by the client surface. */
|
||||
secure?: boolean;
|
||||
}
|
||||
|
||||
/** Placement for extension widgets. */
|
||||
|
|
@ -331,6 +333,19 @@ export interface ToolRenderResultOptions {
|
|||
isPartial: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool compatibility metadata for provider-aware tool filtering (ADR-005 Phase 2).
|
||||
* Tools without compatibility metadata are assumed universally compatible.
|
||||
*/
|
||||
export interface ToolCompatibility {
|
||||
/** Tool produces image content in results (filtered for providers without imageToolResults) */
|
||||
producesImages?: boolean;
|
||||
/** Tool requires schema features that some providers don't support (e.g., ["patternProperties"]) */
|
||||
schemaFeatures?: string[];
|
||||
/** Tool is effective only with models above a minimum capability threshold */
|
||||
minCapabilityTier?: "light" | "standard" | "heavy";
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool definition for registerTool().
|
||||
*/
|
||||
|
|
@ -347,6 +362,8 @@ export interface ToolDefinition<TParams extends TSchema = TSchema, TDetails = un
|
|||
promptGuidelines?: string[];
|
||||
/** Parameter schema (TypeBox) */
|
||||
parameters: TParams;
|
||||
/** Provider compatibility metadata (ADR-005). Omit for universally compatible tools. */
|
||||
compatibility?: ToolCompatibility;
|
||||
|
||||
/** Execute the tool. */
|
||||
execute(
|
||||
|
|
@ -619,6 +636,30 @@ export interface BeforeModelSelectResult {
|
|||
modelId: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fired after model selection to allow extensions to adjust the active tool set (ADR-005 Phase 4).
|
||||
* Extensions can add, remove, or reorder tools based on the selected model's provider capabilities.
|
||||
*/
|
||||
export interface AdjustToolSetEvent {
|
||||
type: "adjust_tool_set";
|
||||
/** The selected model's API type */
|
||||
selectedModelApi: string;
|
||||
/** The selected model's provider */
|
||||
selectedModelProvider: string;
|
||||
/** The selected model ID */
|
||||
selectedModelId: string;
|
||||
/** Current active tool names */
|
||||
activeToolNames: string[];
|
||||
/** Tools already filtered by provider compatibility */
|
||||
filteredTools: string[];
|
||||
}
|
||||
|
||||
/** Result from adjust_tool_set event handler. Return { toolNames } to override tool set. */
|
||||
export interface AdjustToolSetResult {
|
||||
/** Replacement tool names. If omitted, the default filtering is used. */
|
||||
toolNames?: string[];
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// User Bash Events
|
||||
// ============================================================================
|
||||
|
|
@ -1069,6 +1110,7 @@ export interface ExtensionAPI {
|
|||
on(event: "user_bash", handler: ExtensionHandler<UserBashEvent, UserBashEventResult>): void;
|
||||
on(event: "input", handler: ExtensionHandler<InputEvent, InputEventResult>): void;
|
||||
on(event: "before_model_select", handler: ExtensionHandler<BeforeModelSelectEvent, BeforeModelSelectResult>): void;
|
||||
on(event: "adjust_tool_set", handler: ExtensionHandler<AdjustToolSetEvent, AdjustToolSetResult>): void;
|
||||
|
||||
// =========================================================================
|
||||
// Event Emission (for host extensions that orchestrate model selection)
|
||||
|
|
@ -1077,6 +1119,9 @@ export interface ExtensionAPI {
|
|||
/** Emit before_model_select event. Returns override model ID or undefined. */
|
||||
emitBeforeModelSelect(event: Omit<BeforeModelSelectEvent, "type">): Promise<BeforeModelSelectResult | undefined>;
|
||||
|
||||
/** Emit adjust_tool_set event (ADR-005). Returns override tool names or undefined. */
|
||||
emitAdjustToolSet(event: Omit<AdjustToolSetEvent, "type">): Promise<AdjustToolSetResult | undefined>;
|
||||
|
||||
// =========================================================================
|
||||
// Tool Registration
|
||||
// =========================================================================
|
||||
|
|
@ -1395,6 +1440,8 @@ export interface ExtensionRuntimeState {
|
|||
unregisterProvider: (name: string) => void;
|
||||
/** Emit before_model_select event to all registered handlers. Bound by ExtensionRunner. */
|
||||
emitBeforeModelSelect: (event: Omit<BeforeModelSelectEvent, "type">) => Promise<BeforeModelSelectResult | undefined>;
|
||||
/** Emit adjust_tool_set event to all registered handlers. Bound by ExtensionRunner (ADR-005). */
|
||||
emitAdjustToolSet: (event: Omit<AdjustToolSetEvent, "type">) => Promise<AdjustToolSetResult | undefined>;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -172,16 +172,49 @@ export function hasRootMarkers(cwd: string, markers: string[]): boolean {
|
|||
// Local Binary Resolution
|
||||
// =============================================================================
|
||||
|
||||
const LOCAL_BIN_PATHS: Array<{ markers: string[]; binDir: string }> = [
|
||||
{ markers: ["package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml"], binDir: "node_modules/.bin" },
|
||||
{ markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: ".venv/bin" },
|
||||
{ markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: "venv/bin" },
|
||||
{ markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: ".env/bin" },
|
||||
{ markers: ["Gemfile", "Gemfile.lock"], binDir: "vendor/bundle/bin" },
|
||||
{ markers: ["Gemfile", "Gemfile.lock"], binDir: "bin" },
|
||||
{ markers: ["go.mod", "go.sum"], binDir: "bin" },
|
||||
const LOCAL_BIN_PATHS: Array<{ markers: string[]; binDirs: string[] }> = [
|
||||
{ markers: ["package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml"], binDirs: ["node_modules/.bin"] },
|
||||
{ markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: [".venv/bin", ".venv/Scripts"] },
|
||||
{ markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: ["venv/bin", "venv/Scripts"] },
|
||||
{ markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: [".env/bin", ".env/Scripts"] },
|
||||
{ markers: ["Gemfile", "Gemfile.lock"], binDirs: ["vendor/bundle/bin"] },
|
||||
{ markers: ["Gemfile", "Gemfile.lock"], binDirs: ["bin"] },
|
||||
{ markers: ["go.mod", "go.sum"], binDirs: ["bin"] },
|
||||
];
|
||||
|
||||
function getWindowsBinaryCandidates(command: string): string[] {
|
||||
const ext = path.extname(command).toLowerCase();
|
||||
if (ext) {
|
||||
return [command];
|
||||
}
|
||||
|
||||
return [
|
||||
command,
|
||||
`${command}.cmd`,
|
||||
`${command}.bat`,
|
||||
`${command}.exe`,
|
||||
];
|
||||
}
|
||||
|
||||
export function resolveLocalBinaryPath(command: string, cwd: string, isWindows: boolean): string | null {
|
||||
for (const { markers, binDirs } of LOCAL_BIN_PATHS) {
|
||||
if (!hasRootMarkers(cwd, markers)) continue;
|
||||
|
||||
for (const binDir of binDirs) {
|
||||
const basePath = path.join(cwd, binDir, command);
|
||||
const candidates = isWindows ? getWindowsBinaryCandidates(basePath) : [basePath];
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (fs.existsSync(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function which(command: string): string | null {
|
||||
// On Windows, prefer `where.exe` over `which` — MSYS/Git Bash's `which`
|
||||
// returns POSIX paths (/c/Users/...) that Node's spawn() can't execute.
|
||||
|
|
@ -196,15 +229,8 @@ export function which(command: string): string | null {
|
|||
}
|
||||
|
||||
export function resolveCommand(command: string, cwd: string): string | null {
|
||||
for (const { markers, binDir } of LOCAL_BIN_PATHS) {
|
||||
if (hasRootMarkers(cwd, markers)) {
|
||||
const localPath = path.join(cwd, binDir, command);
|
||||
if (fs.existsSync(localPath)) {
|
||||
return localPath;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const localPath = resolveLocalBinaryPath(command, cwd, process.platform === "win32");
|
||||
if (localPath) return localPath;
|
||||
return which(command);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -341,6 +341,14 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|||
thinkingBudgets: settingsManager.getThinkingBudgets(),
|
||||
maxRetryDelayMs: settingsManager.getRetrySettings().maxDelayMs,
|
||||
externalToolExecution: (m) => modelRegistry.getProviderAuthMode(m.provider) === "externalCli",
|
||||
getProviderOptions: async (currentModel) => {
|
||||
if (currentModel.provider !== "claude-code") return undefined;
|
||||
const runner = extensionRunnerRef.current;
|
||||
if (!runner?.hasUI()) return undefined;
|
||||
return {
|
||||
extensionUIContext: runner.getUIContext(),
|
||||
};
|
||||
},
|
||||
getApiKey: async (provider) => {
|
||||
// Use the provider argument from the in-flight request;
|
||||
// agent.state.model may already be switched mid-turn.
|
||||
|
|
|
|||
|
|
@ -112,6 +112,13 @@ export {
|
|||
lspTool,
|
||||
} from "../lsp/index.js";
|
||||
export type { LspServerStatus } from "../lsp/client.js";
|
||||
export {
|
||||
registerToolCompatibility,
|
||||
getToolCompatibility,
|
||||
getAllToolCompatibility,
|
||||
registerMcpToolCompatibility,
|
||||
resetToolCompatibilityRegistry,
|
||||
} from "./tool-compatibility-registry.js";
|
||||
|
||||
import type { AgentTool } from "@gsd/pi-agent-core";
|
||||
import { type BashToolOptions, bashTool, createBashTool } from "./bash.js";
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
// GSD-2 — Tool Compatibility Registry (ADR-005 Phase 2)
|
||||
// Maps tool names to their provider compatibility metadata.
|
||||
// Used by the model router to filter tools incompatible with the selected provider.
|
||||
|
||||
import type { ToolCompatibility } from "../extensions/types.js";
|
||||
|
||||
// ─── Registry State ─────────────────────────────────────────────────────────
|
||||
|
||||
const registry = new Map<string, ToolCompatibility>();
|
||||
|
||||
// ─── Built-in Tool Compatibility (universally compatible) ───────────────────
|
||||
// Built-in tools (bash, read, write, edit, grep, find, ls) produce text-only
|
||||
// results and use standard JSON Schema — compatible with all providers.
|
||||
|
||||
const BUILTIN_TOOLS: Record<string, ToolCompatibility> = {
|
||||
bash: {},
|
||||
read: {},
|
||||
write: {},
|
||||
edit: {},
|
||||
grep: {},
|
||||
find: {},
|
||||
ls: {},
|
||||
lsp: {},
|
||||
hashline_edit: {},
|
||||
hashline_read: {},
|
||||
};
|
||||
|
||||
// Pre-populate registry with built-in tools
|
||||
for (const [name, compat] of Object.entries(BUILTIN_TOOLS)) {
|
||||
registry.set(name, compat);
|
||||
}
|
||||
|
||||
// ─── MCP Tool Defaults ─────────────────────────────────────────────────────
|
||||
// MCP tools may use complex schemas. Default to cautious compatibility.
|
||||
|
||||
const MCP_TOOL_DEFAULTS: ToolCompatibility = {
|
||||
schemaFeatures: ["patternProperties"],
|
||||
};
|
||||
|
||||
// ─── Public API ─────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Register compatibility metadata for a tool.
|
||||
* Called automatically by registerTool() for extension tools that include
|
||||
* compatibility metadata in their ToolDefinition.
|
||||
*/
|
||||
export function registerToolCompatibility(toolName: string, compatibility: ToolCompatibility): void {
|
||||
registry.set(toolName, compatibility);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get compatibility metadata for a tool.
|
||||
* Returns undefined for unknown tools (treated as universally compatible
|
||||
* per ADR-005 principle: "fail open, don't restrict without data").
|
||||
*/
|
||||
export function getToolCompatibility(toolName: string): ToolCompatibility | undefined {
|
||||
return registry.get(toolName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all registered tool compatibility entries.
|
||||
*/
|
||||
export function getAllToolCompatibility(): ReadonlyMap<string, ToolCompatibility> {
|
||||
return registry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an MCP tool with default cautious compatibility.
|
||||
* MCP tools may use complex schemas that some providers don't support.
|
||||
*/
|
||||
export function registerMcpToolCompatibility(toolName: string, overrides?: Partial<ToolCompatibility>): void {
|
||||
registry.set(toolName, { ...MCP_TOOL_DEFAULTS, ...overrides });
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all non-builtin entries (for testing).
|
||||
*/
|
||||
export function resetToolCompatibilityRegistry(): void {
|
||||
registry.clear();
|
||||
for (const [name, compat] of Object.entries(BUILTIN_TOOLS)) {
|
||||
registry.set(name, compat);
|
||||
}
|
||||
}
|
||||
|
|
@ -49,6 +49,8 @@ export {
|
|||
export { createEventBus, type EventBus, type EventBusController } from "./core/event-bus.js";
|
||||
// Extension system
|
||||
export type {
|
||||
AdjustToolSetEvent,
|
||||
AdjustToolSetResult,
|
||||
AgentEndEvent,
|
||||
AgentStartEvent,
|
||||
AgentToolResult,
|
||||
|
|
@ -118,6 +120,7 @@ export type {
|
|||
SlashCommandSource,
|
||||
TerminalInputHandler,
|
||||
ToolCallEvent,
|
||||
ToolCompatibility,
|
||||
ToolDefinition,
|
||||
ToolInfo,
|
||||
SortResult,
|
||||
|
|
@ -310,6 +313,12 @@ export {
|
|||
type HashlineReadToolDetails,
|
||||
type HashlineReadToolInput,
|
||||
type HashlineReadToolOptions,
|
||||
// Tool compatibility registry (ADR-005)
|
||||
registerToolCompatibility,
|
||||
getToolCompatibility,
|
||||
getAllToolCompatibility,
|
||||
registerMcpToolCompatibility,
|
||||
resetToolCompatibilityRegistry,
|
||||
} from "./core/tools/index.js";
|
||||
// Main entry point
|
||||
export { main } from "./main.js";
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
import type { Component } from "@gsd/pi-tui";
|
||||
import type { Component, TUI } from "@gsd/pi-tui";
|
||||
import { visibleWidth } from "@gsd/pi-tui";
|
||||
import { theme } from "../theme/theme.js";
|
||||
|
||||
/**
|
||||
* Dynamic border component that adjusts to viewport width.
|
||||
* Supports an optional animated spinner in the label area.
|
||||
*
|
||||
* Note: When used from extensions loaded via jiti, the global `theme` may be undefined
|
||||
* because jiti creates a separate module cache. Always pass an explicit color
|
||||
|
|
@ -10,11 +12,51 @@ import { theme } from "../theme/theme.js";
|
|||
*/
|
||||
export class DynamicBorder implements Component {
|
||||
private color: (str: string) => string;
|
||||
private label?: string;
|
||||
private spinnerFrames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
||||
private spinnerIndex = 0;
|
||||
private spinnerInterval: NodeJS.Timeout | null = null;
|
||||
private spinnerColorFn?: (str: string) => string;
|
||||
|
||||
constructor(color: (str: string) => string = (str) => {
|
||||
try { return theme.fg("border", str); } catch { return str; }
|
||||
}) {
|
||||
}, label?: string) {
|
||||
this.color = color;
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
setLabel(label: string | undefined): void {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start an animated spinner that prepends to the label.
|
||||
* The spinner rotates every 80ms and triggers a re-render via the TUI.
|
||||
*/
|
||||
startSpinner(ui: TUI, colorFn: (str: string) => string): void {
|
||||
this.stopSpinner();
|
||||
this.spinnerColorFn = colorFn;
|
||||
this.spinnerIndex = 0;
|
||||
this.spinnerInterval = setInterval(() => {
|
||||
this.spinnerIndex = (this.spinnerIndex + 1) % this.spinnerFrames.length;
|
||||
ui.requestRender();
|
||||
}, 80);
|
||||
ui.requestRender();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the spinner animation. The border reverts to a static label.
|
||||
*/
|
||||
stopSpinner(): void {
|
||||
if (this.spinnerInterval) {
|
||||
clearInterval(this.spinnerInterval);
|
||||
this.spinnerInterval = null;
|
||||
}
|
||||
this.spinnerColorFn = undefined;
|
||||
}
|
||||
|
||||
get isSpinning(): boolean {
|
||||
return this.spinnerInterval !== null;
|
||||
}
|
||||
|
||||
invalidate(): void {
|
||||
|
|
@ -22,6 +64,20 @@ export class DynamicBorder implements Component {
|
|||
}
|
||||
|
||||
render(width: number): string[] {
|
||||
const spinnerPrefix = this.spinnerInterval && this.spinnerColorFn
|
||||
? this.spinnerColorFn(this.spinnerFrames[this.spinnerIndex]) + " "
|
||||
: "";
|
||||
|
||||
if (this.label) {
|
||||
const labelText = ` ${spinnerPrefix}${this.label} `;
|
||||
const labelVisible = visibleWidth(labelText);
|
||||
const leading = "── ";
|
||||
const remaining = Math.max(0, width - labelVisible - leading.length);
|
||||
const trailing = "─".repeat(Math.max(1, remaining));
|
||||
// Color leading and trailing separately so embedded ANSI in the
|
||||
// spinner/label doesn't bleed into the trailing dashes.
|
||||
return [this.color(leading) + labelText + this.color(trailing)];
|
||||
}
|
||||
return [this.color("─".repeat(Math.max(1, width)))];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { keyHint } from "./keybinding-hints.js";
|
|||
export interface ExtensionInputOptions {
|
||||
tui?: TUI;
|
||||
timeout?: number;
|
||||
secure?: boolean;
|
||||
}
|
||||
|
||||
export class ExtensionInputComponent extends Container implements Focusable {
|
||||
|
|
@ -61,6 +62,7 @@ export class ExtensionInputComponent extends Container implements Focusable {
|
|||
}
|
||||
|
||||
this.input = new Input();
|
||||
this.input.secure = opts?.secure === true;
|
||||
if (placeholder) {
|
||||
this.input.placeholder = placeholder;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,36 @@
|
|||
import { Loader, Spacer, Text } from "@gsd/pi-tui";
|
||||
import { Loader, Markdown, Spacer, Text } from "@gsd/pi-tui";
|
||||
|
||||
import type { InteractiveModeEvent, InteractiveModeStateHost } from "../interactive-mode-state.js";
|
||||
import { theme } from "../theme/theme.js";
|
||||
import { AssistantMessageComponent } from "../components/assistant-message.js";
|
||||
import { ToolExecutionComponent } from "../components/tool-execution.js";
|
||||
import { DynamicBorder } from "../components/dynamic-border.js";
|
||||
import { appKey } from "../components/keybinding-hints.js";
|
||||
|
||||
// Tracks the last processed content index to avoid re-scanning all blocks on every message_update
|
||||
let lastProcessedContentIndex = 0;
|
||||
|
||||
function hasVisibleAssistantContent(message: { content: Array<any> }): boolean {
|
||||
return message.content.some(
|
||||
(c) =>
|
||||
(c.type === "text" && typeof c.text === "string" && c.text.trim().length > 0)
|
||||
|| (c.type === "thinking" && typeof c.thinking === "string" && c.thinking.trim().length > 0),
|
||||
);
|
||||
}
|
||||
|
||||
function hasAssistantToolBlocks(message: { content: Array<any> }): boolean {
|
||||
return message.content.some((c) => c.type === "toolCall" || c.type === "serverToolUse");
|
||||
}
|
||||
|
||||
// Tracks the latest assistant text for the pinned message zone
|
||||
let lastPinnedText = "";
|
||||
// Whether any tool execution has been added in this assistant turn (triggers pinned display)
|
||||
let hasToolsInTurn = false;
|
||||
// Reference to the pinned border so we can toggle its label between working/idle
|
||||
let pinnedBorder: DynamicBorder | undefined;
|
||||
// Reference to the pinned markdown component below the border
|
||||
let pinnedTextComponent: Markdown | undefined;
|
||||
|
||||
export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
||||
init: () => Promise<void>;
|
||||
getMarkdownThemeWithSettings: () => any;
|
||||
|
|
@ -31,9 +53,15 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
|||
|
||||
host.footer.invalidate();
|
||||
|
||||
// Reset content index tracker when a new assistant message starts
|
||||
// Reset content index tracker and pinned state when a new assistant message starts
|
||||
if (event.type === "message_start" && event.message.role === "assistant") {
|
||||
lastProcessedContentIndex = 0;
|
||||
lastPinnedText = "";
|
||||
hasToolsInTurn = false;
|
||||
if (pinnedBorder) pinnedBorder.stopSpinner();
|
||||
pinnedBorder = undefined;
|
||||
pinnedTextComponent = undefined;
|
||||
host.pinnedMessageContainer.clear();
|
||||
}
|
||||
|
||||
switch (event.type) {
|
||||
|
|
@ -46,6 +74,12 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
|||
host.streamingMessage = undefined;
|
||||
host.pendingTools.clear();
|
||||
host.pendingMessagesContainer.clear();
|
||||
host.pinnedMessageContainer.clear();
|
||||
lastPinnedText = "";
|
||||
hasToolsInTurn = false;
|
||||
if (pinnedBorder) pinnedBorder.stopSpinner();
|
||||
pinnedBorder = undefined;
|
||||
pinnedTextComponent = undefined;
|
||||
host.compactionQueuedMessages = [];
|
||||
host.rebuildChatFromMessages();
|
||||
host.updatePendingMessagesDisplay();
|
||||
|
|
@ -104,45 +138,54 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
|||
host.updatePendingMessagesDisplay();
|
||||
host.ui.requestRender();
|
||||
} else if (event.message.role === "assistant") {
|
||||
host.streamingComponent = new AssistantMessageComponent(
|
||||
undefined,
|
||||
host.hideThinkingBlock,
|
||||
host.getMarkdownThemeWithSettings(),
|
||||
host.settingsManager.getTimestampFormat(),
|
||||
);
|
||||
host.streamingMessage = event.message;
|
||||
host.chatContainer.addChild(host.streamingComponent);
|
||||
host.streamingComponent.updateContent(host.streamingMessage);
|
||||
// External-tool providers can stream multiple assistant turns through
|
||||
// one response. Delay component creation until visible assistant text
|
||||
// arrives so tool outputs keep chronological ordering.
|
||||
host.ui.requestRender();
|
||||
}
|
||||
break;
|
||||
|
||||
case "message_update":
|
||||
if (host.streamingComponent && event.message.role === "assistant") {
|
||||
if (event.message.role === "assistant") {
|
||||
host.streamingMessage = event.message;
|
||||
host.streamingComponent.updateContent(host.streamingMessage);
|
||||
|
||||
// When the stream adapter signals a completed tool call with an
|
||||
// external result (from Claude Code SDK), update the pending
|
||||
// ToolExecutionComponent immediately so output is visible in
|
||||
// real-time instead of waiting for the session to end.
|
||||
const innerEvent = event.assistantMessageEvent;
|
||||
|
||||
let externalToolResult:
|
||||
| { toolCallId: string; content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>; details: Record<string, unknown>; isError: boolean }
|
||||
| undefined;
|
||||
if (innerEvent.type === "toolcall_end" && innerEvent.toolCall) {
|
||||
const tc = innerEvent.toolCall as any;
|
||||
const externalResult = tc.externalResult;
|
||||
if (externalResult) {
|
||||
const component = host.pendingTools.get(tc.id);
|
||||
if (component) {
|
||||
component.updateResult({
|
||||
content: externalResult.content ?? [{ type: "text", text: "" }],
|
||||
details: externalResult.details ?? {},
|
||||
isError: externalResult.isError ?? false,
|
||||
});
|
||||
}
|
||||
const ext = tc.externalResult;
|
||||
if (ext) {
|
||||
externalToolResult = {
|
||||
toolCallId: tc.id,
|
||||
content: ext.content ?? [{ type: "text", text: "" }],
|
||||
details: ext.details ?? {},
|
||||
isError: ext.isError ?? false,
|
||||
};
|
||||
}
|
||||
} else if (innerEvent.type === "server_tool_use") {
|
||||
const idx = typeof innerEvent.contentIndex === "number" ? innerEvent.contentIndex : -1;
|
||||
const block = idx >= 0 ? (host.streamingMessage.content[idx] as any) : undefined;
|
||||
const ext = block?.externalResult;
|
||||
if (block?.id && ext) {
|
||||
externalToolResult = {
|
||||
toolCallId: block.id,
|
||||
content: ext.content ?? [{ type: "text", text: "" }],
|
||||
details: ext.details ?? {},
|
||||
isError: ext.isError ?? false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const contentBlocks = host.streamingMessage.content;
|
||||
// Some adapters reuse a single assistant lifecycle while internally
|
||||
// spanning multiple provider turns. When a new turn starts, content
|
||||
// length can shrink back to 0/1; reset scan index to avoid skipping.
|
||||
if (lastProcessedContentIndex >= contentBlocks.length) {
|
||||
lastProcessedContentIndex = 0;
|
||||
}
|
||||
for (let i = lastProcessedContentIndex; i < contentBlocks.length; i++) {
|
||||
const content = contentBlocks[i];
|
||||
if (content.type === "toolCall") {
|
||||
|
|
@ -192,19 +235,108 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// When the stream adapter signals a completed tool call with an
|
||||
// external result (from Claude Code SDK), update the pending
|
||||
// ToolExecutionComponent immediately so output is visible in
|
||||
// real-time instead of waiting for the session to end.
|
||||
if (externalToolResult) {
|
||||
const component = host.pendingTools.get(externalToolResult.toolCallId);
|
||||
if (component) {
|
||||
component.updateResult({
|
||||
content: externalToolResult.content,
|
||||
details: externalToolResult.details,
|
||||
isError: externalToolResult.isError,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Render assistant text/thinking after tool components so mixed
|
||||
// streams keep chronological ordering in the chat container.
|
||||
const hasToolBlocks = hasAssistantToolBlocks(host.streamingMessage);
|
||||
if (!host.streamingComponent && hasVisibleAssistantContent(host.streamingMessage)) {
|
||||
host.streamingComponent = new AssistantMessageComponent(
|
||||
undefined,
|
||||
host.hideThinkingBlock,
|
||||
host.getMarkdownThemeWithSettings(),
|
||||
host.settingsManager.getTimestampFormat(),
|
||||
);
|
||||
host.chatContainer.addChild(host.streamingComponent);
|
||||
}
|
||||
if (host.streamingComponent) {
|
||||
if (hasToolBlocks) {
|
||||
host.chatContainer.removeChild(host.streamingComponent);
|
||||
host.chatContainer.addChild(host.streamingComponent);
|
||||
}
|
||||
host.streamingComponent.updateContent(host.streamingMessage);
|
||||
}
|
||||
|
||||
// Update index: fully processed blocks won't need re-scanning.
|
||||
// Keep the last block's index (it may still be accumulating data),
|
||||
// so we re-check it next time but skip all earlier ones.
|
||||
if (contentBlocks.length > 0) {
|
||||
lastProcessedContentIndex = Math.max(0, contentBlocks.length - 1);
|
||||
}
|
||||
|
||||
// Pinned message: mirror the latest assistant text above the editor
|
||||
// when tool executions push it out of the viewport.
|
||||
const hasTools = contentBlocks.some(
|
||||
(c: any) => c.type === "toolCall" || c.type === "serverToolUse",
|
||||
);
|
||||
if (hasTools) hasToolsInTurn = true;
|
||||
|
||||
if (hasToolsInTurn) {
|
||||
// Collect the latest text block(s) from the assistant message
|
||||
let latestText = "";
|
||||
for (let i = contentBlocks.length - 1; i >= 0; i--) {
|
||||
const c = contentBlocks[i] as any;
|
||||
if (c.type === "text" && c.text?.trim()) {
|
||||
latestText = c.text.trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (latestText && latestText !== lastPinnedText) {
|
||||
lastPinnedText = latestText;
|
||||
|
||||
if (!pinnedBorder) {
|
||||
// First time: create border + text component
|
||||
host.pinnedMessageContainer.clear();
|
||||
pinnedBorder = new DynamicBorder(
|
||||
(str: string) => theme.fg("dim", str),
|
||||
"Working · Latest Output",
|
||||
);
|
||||
pinnedBorder.startSpinner(host.ui, (str: string) => theme.fg("accent", str));
|
||||
host.pinnedMessageContainer.addChild(pinnedBorder);
|
||||
pinnedTextComponent = new Markdown(latestText, 1, 0, host.getMarkdownThemeWithSettings());
|
||||
// Cap pinned content to ~40% of terminal height so tall output
|
||||
// doesn't exceed the viewport and cause render flashing.
|
||||
pinnedTextComponent.maxLines = Math.max(3, Math.floor(host.ui.terminal.rows * 0.4));
|
||||
host.pinnedMessageContainer.addChild(pinnedTextComponent);
|
||||
// Hide the separate status loader — the pinned zone replaces it
|
||||
if (host.loadingAnimation) {
|
||||
host.loadingAnimation.stop();
|
||||
host.loadingAnimation = undefined;
|
||||
}
|
||||
host.statusContainer.clear();
|
||||
} else {
|
||||
// Update existing markdown component in-place
|
||||
pinnedTextComponent?.setText(latestText);
|
||||
// Refresh maxLines in case terminal was resized
|
||||
if (pinnedTextComponent) {
|
||||
pinnedTextComponent.maxLines = Math.max(3, Math.floor(host.ui.terminal.rows * 0.4));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
host.ui.requestRender();
|
||||
}
|
||||
break;
|
||||
|
||||
case "message_end":
|
||||
if (event.message.role === "user") break;
|
||||
if (host.streamingComponent && event.message.role === "assistant") {
|
||||
if (event.message.role === "assistant") {
|
||||
host.streamingMessage = event.message;
|
||||
let errorMessage: string | undefined;
|
||||
if (host.streamingMessage.stopReason === "aborted") {
|
||||
|
|
@ -214,7 +346,25 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
|||
: "Operation aborted";
|
||||
host.streamingMessage.errorMessage = errorMessage;
|
||||
}
|
||||
host.streamingComponent.updateContent(host.streamingMessage);
|
||||
|
||||
const shouldRenderAssistant = hasVisibleAssistantContent(host.streamingMessage)
|
||||
|| (
|
||||
(host.streamingMessage.stopReason === "aborted" || host.streamingMessage.stopReason === "error")
|
||||
&& !hasAssistantToolBlocks(host.streamingMessage)
|
||||
);
|
||||
if (!host.streamingComponent && shouldRenderAssistant) {
|
||||
host.streamingComponent = new AssistantMessageComponent(
|
||||
undefined,
|
||||
host.hideThinkingBlock,
|
||||
host.getMarkdownThemeWithSettings(),
|
||||
host.settingsManager.getTimestampFormat(),
|
||||
);
|
||||
host.chatContainer.addChild(host.streamingComponent);
|
||||
}
|
||||
if (host.streamingComponent) {
|
||||
host.streamingComponent.updateContent(host.streamingMessage);
|
||||
}
|
||||
|
||||
if (host.streamingMessage.stopReason === "aborted" || host.streamingMessage.stopReason === "error") {
|
||||
if (!errorMessage) {
|
||||
errorMessage = host.streamingMessage.errorMessage || "Error";
|
||||
|
|
@ -230,6 +380,15 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
|||
}
|
||||
host.streamingComponent = undefined;
|
||||
host.streamingMessage = undefined;
|
||||
// Clear pinned output once the message is finalized in the chat
|
||||
// container — prevents duplicate display when the agent continues
|
||||
// (e.g. form elicitation) after the assistant message ends.
|
||||
if (pinnedBorder) pinnedBorder.stopSpinner();
|
||||
host.pinnedMessageContainer.clear();
|
||||
lastPinnedText = "";
|
||||
hasToolsInTurn = false;
|
||||
pinnedBorder = undefined;
|
||||
pinnedTextComponent = undefined;
|
||||
host.footer.invalidate();
|
||||
}
|
||||
host.ui.requestRender();
|
||||
|
|
@ -282,6 +441,16 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
|
|||
host.streamingMessage = undefined;
|
||||
}
|
||||
host.pendingTools.clear();
|
||||
// Pinned output is only useful while work is actively streaming.
|
||||
// Keep chat history as the single source after completion.
|
||||
if (pinnedBorder) {
|
||||
pinnedBorder.stopSpinner();
|
||||
}
|
||||
host.pinnedMessageContainer.clear();
|
||||
lastPinnedText = "";
|
||||
hasToolsInTurn = false;
|
||||
pinnedBorder = undefined;
|
||||
pinnedTextComponent = undefined;
|
||||
await host.checkShutdownRequested();
|
||||
host.ui.requestRender();
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ export interface InteractiveModeStateHost {
|
|||
keybindings: any;
|
||||
statusContainer: any;
|
||||
chatContainer: any;
|
||||
pinnedMessageContainer: any;
|
||||
settingsManager: any;
|
||||
pendingTools: Map<string, any>;
|
||||
toolOutputExpanded: boolean;
|
||||
|
|
|
|||
|
|
@ -168,6 +168,7 @@ export class InteractiveMode {
|
|||
private chatContainer: Container;
|
||||
private pendingMessagesContainer: Container;
|
||||
private statusContainer: Container;
|
||||
private pinnedMessageContainer: Container;
|
||||
private defaultEditor: CustomEditor;
|
||||
private editor: EditorComponent;
|
||||
private autocompleteProvider: CombinedAutocompleteProvider | undefined;
|
||||
|
|
@ -285,6 +286,7 @@ export class InteractiveMode {
|
|||
this.chatContainer = new Container();
|
||||
this.pendingMessagesContainer = new Container();
|
||||
this.statusContainer = new Container();
|
||||
this.pinnedMessageContainer = new Container();
|
||||
this.widgetContainerAbove = new Container();
|
||||
this.widgetContainerBelow = new Container();
|
||||
this.keybindings = KeybindingsManager.create();
|
||||
|
|
@ -490,6 +492,7 @@ export class InteractiveMode {
|
|||
this.ui.addChild(this.chatContainer);
|
||||
this.ui.addChild(this.pendingMessagesContainer);
|
||||
this.ui.addChild(this.statusContainer);
|
||||
this.ui.addChild(this.pinnedMessageContainer);
|
||||
this.renderWidgets(); // Initialize with default spacer
|
||||
this.ui.addChild(this.widgetContainerAbove);
|
||||
this.ui.addChild(this.editorContainer);
|
||||
|
|
@ -1396,7 +1399,19 @@ export class InteractiveMode {
|
|||
*/
|
||||
private renderWidgets(): void {
|
||||
if (!this.widgetContainerAbove || !this.widgetContainerBelow) return;
|
||||
this.renderWidgetContainer(this.widgetContainerAbove, this.extensionWidgetsAbove, true, true);
|
||||
|
||||
// widgetContainerAbove: spacer collapses when pinned content is visible
|
||||
// so there's no extra blank line between pinned output and the editor border.
|
||||
this.widgetContainerAbove.clear();
|
||||
const pinned = this.pinnedMessageContainer;
|
||||
this.widgetContainerAbove.addChild({
|
||||
render: () => pinned.children.length > 0 ? [] : [""],
|
||||
invalidate: () => {},
|
||||
});
|
||||
for (const component of this.extensionWidgetsAbove.values()) {
|
||||
this.widgetContainerAbove.addChild(component);
|
||||
}
|
||||
|
||||
this.renderWidgetContainer(this.widgetContainerBelow, this.extensionWidgetsBelow, false, false);
|
||||
this.ui.requestRender();
|
||||
}
|
||||
|
|
@ -1631,7 +1646,7 @@ export class InteractiveMode {
|
|||
this.hideExtensionInput();
|
||||
resolve(undefined);
|
||||
},
|
||||
{ tui: this.ui, timeout: opts?.timeout },
|
||||
{ tui: this.ui, timeout: opts?.timeout, secure: opts?.secure },
|
||||
);
|
||||
|
||||
this.editorContainer.clear();
|
||||
|
|
@ -2264,6 +2279,7 @@ export class InteractiveMode {
|
|||
updateFooter: true,
|
||||
populateHistory: true,
|
||||
});
|
||||
this.populatePinnedFromMessages(context.messages);
|
||||
|
||||
// Show compaction info if session was compacted
|
||||
const allEntries = this.sessionManager.getEntries();
|
||||
|
|
@ -2287,6 +2303,54 @@ export class InteractiveMode {
|
|||
this.chatContainer.clear();
|
||||
const context = this.sessionManager.buildSessionContext();
|
||||
this.renderSessionContext(context);
|
||||
this.populatePinnedFromMessages(context.messages);
|
||||
}
|
||||
|
||||
/**
|
||||
* After rebuilding chat from messages, pin the last assistant text above the
|
||||
* editor if tool results would otherwise push it out of the viewport.
|
||||
*/
|
||||
private populatePinnedFromMessages(messages: AgentMessage[]): void {
|
||||
this.pinnedMessageContainer.clear();
|
||||
|
||||
// Walk backwards to find the last assistant message
|
||||
let lastAssistant: AssistantMessage | undefined;
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (msg && "role" in msg && msg.role === "assistant") {
|
||||
lastAssistant = msg as AssistantMessage;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!lastAssistant) return;
|
||||
|
||||
// Check if any tool calls follow the last text block
|
||||
const content = lastAssistant.content;
|
||||
let lastTextIndex = -1;
|
||||
let hasToolAfterText = false;
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (content[i].type === "text") lastTextIndex = i;
|
||||
}
|
||||
if (lastTextIndex >= 0) {
|
||||
for (let i = lastTextIndex + 1; i < content.length; i++) {
|
||||
if (content[i].type === "toolCall" || content[i].type === "serverToolUse") {
|
||||
hasToolAfterText = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!hasToolAfterText || lastTextIndex < 0) return;
|
||||
|
||||
const textBlock = content[lastTextIndex] as { type: "text"; text: string };
|
||||
const text = textBlock.text?.trim();
|
||||
if (!text) return;
|
||||
|
||||
this.pinnedMessageContainer.addChild(
|
||||
new DynamicBorder((str: string) => theme.fg("dim", str), "Latest Output"),
|
||||
);
|
||||
this.pinnedMessageContainer.addChild(
|
||||
new Markdown(text, 1, 0, this.getMarkdownThemeWithSettings()),
|
||||
);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
|
|
|
|||
|
|
@ -305,11 +305,13 @@ async function handleShareCommand(ctx: SlashCommandContext): Promise<void> {
|
|||
ctx.showStatus("Share cancelled");
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await new Promise<{ stdout: string; stderr: string; code: number | null }>((resolve) => {
|
||||
proc = spawn("gh", ["gist", "create", "--public=false", tmpFile]);
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
try {
|
||||
const result = await new Promise<{ stdout: string; stderr: string; code: number | null }>((resolve) => {
|
||||
proc = spawn("gh", ["gist", "create", "--public=false", tmpFile], {
|
||||
shell: process.platform === "win32",
|
||||
});
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
proc.stdout?.on("data", (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
|
|||
),
|
||||
|
||||
input: (title, placeholder, opts) =>
|
||||
createDialogPromise(opts, undefined, { method: "input", title, placeholder, timeout: opts?.timeout }, (r) =>
|
||||
createDialogPromise(opts, undefined, { method: "input", title, placeholder, timeout: opts?.timeout, secure: opts?.secure }, (r) =>
|
||||
"cancelled" in r && r.cancelled ? undefined : "value" in r ? r.value : undefined,
|
||||
),
|
||||
|
||||
|
|
|
|||
|
|
@ -291,6 +291,7 @@ export type RpcExtensionUIRequest =
|
|||
title: string;
|
||||
placeholder?: string;
|
||||
timeout?: number;
|
||||
secure?: boolean;
|
||||
}
|
||||
| { type: "extension_ui_request"; id: string; method: "editor"; title: string; prefill?: string }
|
||||
| {
|
||||
|
|
|
|||
|
|
@ -32,4 +32,15 @@ describe("Input", () => {
|
|||
input.focused = false;
|
||||
assert.equal(input.focused, false);
|
||||
});
|
||||
|
||||
it("secure mode obscures typed characters in render output", () => {
|
||||
const input = new Input();
|
||||
input.secure = true;
|
||||
input.focused = true;
|
||||
input.handleInput("secret123");
|
||||
|
||||
const line = input.render(40)[0] ?? "";
|
||||
assert.ok(!line.includes("secret123"), "rendered line must not expose raw secret text");
|
||||
assert.ok(line.includes("*********"), "rendered line should include masked characters");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { test } from "node:test";
|
||||
|
||||
import { Markdown, type MarkdownTheme } from "../markdown.js";
|
||||
|
||||
function noopTheme(): MarkdownTheme {
|
||||
const identity = (text: string) => text;
|
||||
return {
|
||||
heading: identity,
|
||||
link: identity,
|
||||
linkUrl: identity,
|
||||
code: identity,
|
||||
codeBlock: identity,
|
||||
codeBlockBorder: identity,
|
||||
quote: identity,
|
||||
quoteBorder: identity,
|
||||
hr: identity,
|
||||
listBullet: identity,
|
||||
bold: identity,
|
||||
italic: identity,
|
||||
strikethrough: identity,
|
||||
underline: identity,
|
||||
};
|
||||
}
|
||||
|
||||
test("Markdown renders all lines when maxLines is not set", () => {
|
||||
const text = "Line 1\n\nLine 2\n\nLine 3\n\nLine 4\n\nLine 5";
|
||||
const md = new Markdown(text, 0, 0, noopTheme());
|
||||
const lines = md.render(80);
|
||||
// Each paragraph produces a line + an inter-paragraph blank line
|
||||
const contentLines = lines.filter((l) => l.trim().length > 0);
|
||||
assert.ok(contentLines.length >= 5, `expected at least 5 content lines, got ${contentLines.length}`);
|
||||
});
|
||||
|
||||
test("Markdown truncates from the top when maxLines is exceeded", () => {
|
||||
const text = "Line 1\n\nLine 2\n\nLine 3\n\nLine 4\n\nLine 5";
|
||||
const md = new Markdown(text, 0, 0, noopTheme());
|
||||
md.maxLines = 3;
|
||||
const lines = md.render(80);
|
||||
assert.ok(lines.length <= 3, `expected at most 3 lines, got ${lines.length}`);
|
||||
// First line should be the ellipsis indicator
|
||||
assert.ok(lines[0].includes("…"), "first line should contain ellipsis indicator");
|
||||
assert.ok(lines[0].includes("above"), "first line should mention lines above");
|
||||
});
|
||||
|
||||
test("Markdown preserves most recent content when truncating", () => {
|
||||
const text = "First paragraph\n\nSecond paragraph\n\nThird paragraph\n\nFourth paragraph\n\nFifth paragraph";
|
||||
const md = new Markdown(text, 0, 0, noopTheme());
|
||||
md.maxLines = 3;
|
||||
const lines = md.render(80);
|
||||
// The last rendered line should contain "Fifth paragraph" (the most recent content)
|
||||
const lastContentLine = lines.filter((l) => !l.includes("…")).pop() ?? "";
|
||||
assert.ok(
|
||||
lastContentLine.includes("Fifth paragraph"),
|
||||
`expected last content line to contain "Fifth paragraph", got "${lastContentLine}"`,
|
||||
);
|
||||
});
|
||||
|
||||
test("Markdown does not truncate when content fits within maxLines", () => {
|
||||
const text = "Short text";
|
||||
const md = new Markdown(text, 0, 0, noopTheme());
|
||||
md.maxLines = 10;
|
||||
const lines = md.render(80);
|
||||
assert.ok(!lines.some((l) => l.includes("…")), "should not contain ellipsis when content fits");
|
||||
assert.ok(lines.some((l) => l.includes("Short text")), "should contain the original text");
|
||||
});
|
||||
|
||||
test("Markdown trims trailing empty lines", () => {
|
||||
const text = "Some text\n\n";
|
||||
const md = new Markdown(text, 0, 0, noopTheme());
|
||||
const lines = md.render(80);
|
||||
// Last line should not be empty (trailing empties are trimmed)
|
||||
const lastLine = lines[lines.length - 1];
|
||||
assert.ok(lastLine.trim().length > 0 || lines.length === 1, "trailing empty lines should be trimmed");
|
||||
});
|
||||
|
|
@ -21,6 +21,8 @@ export class Input implements Component, Focusable {
|
|||
public onSubmit?: (value: string) => void;
|
||||
public onEscape?: () => void;
|
||||
public placeholder: string = "";
|
||||
/** When true, render obscured characters instead of the actual value. */
|
||||
public secure: boolean = false;
|
||||
|
||||
/** Focusable interface - set by TUI when focus changes */
|
||||
private _focused: boolean = false;
|
||||
|
|
@ -446,6 +448,7 @@ export class Input implements Component, Focusable {
|
|||
// Calculate visible window
|
||||
const prompt = "> ";
|
||||
const availableWidth = width - prompt.length;
|
||||
const renderValue = this.secure ? "*".repeat(this.value.length) : this.value;
|
||||
|
||||
if (availableWidth <= 0) {
|
||||
return [prompt];
|
||||
|
|
@ -466,7 +469,7 @@ export class Input implements Component, Focusable {
|
|||
|
||||
if (this.value.length < availableWidth) {
|
||||
// Everything fits (leave room for cursor at end)
|
||||
visibleText = this.value;
|
||||
visibleText = renderValue;
|
||||
} else {
|
||||
// Need horizontal scrolling
|
||||
// Reserve one character for cursor if it's at the end
|
||||
|
|
@ -501,17 +504,17 @@ export class Input implements Component, Focusable {
|
|||
|
||||
if (this.cursor < halfWidth) {
|
||||
// Cursor near start
|
||||
visibleText = this.value.slice(0, findValidEnd(scrollWidth));
|
||||
visibleText = renderValue.slice(0, findValidEnd(scrollWidth));
|
||||
cursorDisplay = this.cursor;
|
||||
} else if (this.cursor > this.value.length - halfWidth) {
|
||||
// Cursor near end
|
||||
const start = findValidStart(this.value.length - scrollWidth);
|
||||
visibleText = this.value.slice(start);
|
||||
visibleText = renderValue.slice(start);
|
||||
cursorDisplay = this.cursor - start;
|
||||
} else {
|
||||
// Cursor in middle
|
||||
const start = findValidStart(this.cursor - halfWidth);
|
||||
visibleText = this.value.slice(start, findValidEnd(start + scrollWidth));
|
||||
visibleText = renderValue.slice(start, findValidEnd(start + scrollWidth));
|
||||
cursorDisplay = halfWidth;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,10 +58,13 @@ export class Markdown implements Component {
|
|||
private defaultTextStyle?: DefaultTextStyle;
|
||||
private theme: MarkdownTheme;
|
||||
private defaultStylePrefix?: string;
|
||||
/** Maximum rendered lines (excluding padding). When set, content is truncated from the top with an ellipsis indicator so the most recent output remains visible. */
|
||||
maxLines?: number;
|
||||
|
||||
// Cache for rendered output
|
||||
private cachedText?: string;
|
||||
private cachedWidth?: number;
|
||||
private cachedMaxLines?: number;
|
||||
private cachedLines?: string[];
|
||||
|
||||
constructor(
|
||||
|
|
@ -86,12 +89,13 @@ export class Markdown implements Component {
|
|||
invalidate(): void {
|
||||
this.cachedText = undefined;
|
||||
this.cachedWidth = undefined;
|
||||
this.cachedMaxLines = undefined;
|
||||
this.cachedLines = undefined;
|
||||
}
|
||||
|
||||
render(width: number): string[] {
|
||||
// Check cache
|
||||
if (this.cachedLines && this.cachedText === this.text && this.cachedWidth === width) {
|
||||
if (this.cachedLines && this.cachedText === this.text && this.cachedWidth === width && this.cachedMaxLines === this.maxLines) {
|
||||
return this.cachedLines;
|
||||
}
|
||||
|
||||
|
|
@ -104,6 +108,7 @@ export class Markdown implements Component {
|
|||
// Update cache
|
||||
this.cachedText = this.text;
|
||||
this.cachedWidth = width;
|
||||
this.cachedMaxLines = this.maxLines;
|
||||
this.cachedLines = result;
|
||||
return result;
|
||||
}
|
||||
|
|
@ -124,6 +129,12 @@ export class Markdown implements Component {
|
|||
for (let j = 0; j < tokenLines.length; j++) renderedLines.push(tokenLines[j]);
|
||||
}
|
||||
|
||||
// Trim trailing empty lines — inter-block spacing at the end just adds
|
||||
// unwanted whitespace before whatever follows (e.g. pinned output border).
|
||||
while (renderedLines.length > 0 && renderedLines[renderedLines.length - 1] === "") {
|
||||
renderedLines.pop();
|
||||
}
|
||||
|
||||
// Wrap lines (NO padding, NO background yet)
|
||||
const wrappedLines: string[] = [];
|
||||
for (const line of renderedLines) {
|
||||
|
|
@ -143,6 +154,15 @@ export class Markdown implements Component {
|
|||
}
|
||||
}
|
||||
|
||||
// Truncate from the top when maxLines is set so the most recent content
|
||||
// stays visible. This prevents the pinned output zone from exceeding the
|
||||
// terminal height and causing render flashing.
|
||||
if (this.maxLines !== undefined && wrappedLines.length > this.maxLines) {
|
||||
const keep = Math.max(1, this.maxLines - 1); // Reserve one line for the ellipsis indicator
|
||||
const truncated = wrappedLines.length - keep;
|
||||
wrappedLines.splice(0, truncated, `… ${truncated} line${truncated !== 1 ? "s" : ""} above`);
|
||||
}
|
||||
|
||||
// Add margins and background to each wrapped line
|
||||
const leftMargin = " ".repeat(this.paddingX);
|
||||
const rightMargin = " ".repeat(this.paddingX);
|
||||
|
|
@ -181,6 +201,7 @@ export class Markdown implements Component {
|
|||
// Update cache
|
||||
this.cachedText = this.text;
|
||||
this.cachedWidth = width;
|
||||
this.cachedMaxLines = this.maxLines;
|
||||
this.cachedLines = result;
|
||||
|
||||
return result.length > 0 ? result : [""];
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue