diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e8dbfe9c0..ea2b9844d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,16 @@ +## Linked issue + + + +Closes # + +- [ ] I have linked an issue above. I understand that PRs without a linked issue will be closed without review. + +--- + ## TL;DR **What:** @@ -10,7 +23,7 @@ ## Why - + ## How diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17351ebb2..b45996d89 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -130,6 +130,15 @@ jobs: - name: Install web host dependencies run: npm --prefix web ci + - name: Cache Next.js build + uses: useblacksmith/cache@v5 + with: + path: web/.next/cache + key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }} + restore-keys: | + nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}- + nextjs-${{ runner.os }}- + - name: Build run: npm run build @@ -148,15 +157,45 @@ jobs: - name: Run package tests run: npm run test:packages - - name: Run integration tests - run: npm run test:integration - - name: Check test coverage thresholds run: npm run test:coverage - windows-portability: + integration-tests: timeout-minutes: 15 needs: detect-changes + if: needs.detect-changes.outputs.docs-only != 'true' + runs-on: blacksmith-4vcpu-ubuntu-2404 + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: '24' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + # Integration tests need the same compiled artifacts as the build job: + # - dist/loader.js and packages/pi-coding-agent/dist/** from `npm run build` + # - web/node_modules/.bin/next for tests that shell `build:web-host` at runtime + # Duplicating the build here (instead of sharing artifacts via needs: build) + # preserves wall-clock parallelism with the build job — see PR #4093. + - name: Install web host dependencies + run: npm --prefix web ci + + - name: Build + run: npm run build + + - name: Run integration tests + run: npm run test:integration + + windows-portability: + timeout-minutes: 25 + needs: detect-changes if: >- needs.detect-changes.outputs.docs-only != 'true' runs-on: blacksmith-4vcpu-windows-2025 @@ -180,12 +219,17 @@ jobs: - name: Typecheck extensions run: npm run typecheck:extensions - - name: Run unit tests - run: npm run test:unit - - name: Run package tests run: npm run test:packages + - name: Run Windows portability tests + run: >- + node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs + --experimental-strip-types --test + src/tests/windows-portability.test.ts + src/resources/extensions/gsd/tests/validate-directory.test.ts + src/tests/integration/web-mode-windows-hide.test.ts + rtk-portability: timeout-minutes: 20 needs: detect-changes diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index 75ad95508..7ab9807c2 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -24,6 +24,10 @@ jobs: credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + env: + BLACKSMITH_CACHE_TOKEN: ${{ env.BLACKSMITH_CACHE_TOKEN }} + BLACKSMITH_CACHE_URL: ${{ env.BLACKSMITH_CACHE_URL }} + GITHUB_REPO_NAME: ${{ github.repository }} outputs: dev-version: ${{ steps.stamp.outputs.version }} steps: @@ -41,6 +45,15 @@ jobs: - name: Install dependencies run: npm ci + - name: Cache Next.js build + uses: useblacksmith/cache@v5 + with: + path: web/.next/cache + key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }} + restore-keys: | + nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}- + nextjs-${{ runner.os }}- + - name: Build run: npm run build @@ -153,9 +166,18 @@ jobs: - name: Install dependencies run: npm ci + - name: Cache Next.js build + uses: useblacksmith/cache@v5 + with: + path: web/.next/cache + key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }} + restore-keys: | + nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}- + nextjs-${{ runner.os }}- + - name: Run live LLM tests (optional) continue-on-error: true - run: npm run test:live + run: npm run test:live || echo "::warning::Live LLM tests failed — non-blocking, but worth investigating" env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -175,21 +197,26 @@ jobs: RELEASE_VERSION: ${{ steps.release.outputs.version }} run: node scripts/bump-version.mjs "$RELEASE_VERSION" + - name: Validate package files after version bump + run: | + node -e "require('./package.json')" && \ + node -e "require('./packages/pi-coding-agent/package.json')" && \ + node -e "require('./pkg/package.json')" && \ + echo "All package.json files are valid" + - name: Update CHANGELOG.md run: node scripts/update-changelog.mjs /tmp/changelog-entry.md - - name: Commit, tag, and push + - name: Commit and tag release env: RELEASE_VERSION: ${{ steps.release.outputs.version }} run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add package.json package-lock.json CHANGELOG.md native/npm/*/package.json pkg/package.json packages/pi-coding-agent/package.json + git add package.json package-lock.json web/package-lock.json CHANGELOG.md native/npm/*/package.json pkg/package.json packages/pi-coding-agent/package.json git commit -m "release: v${RELEASE_VERSION}" git tag "v${RELEASE_VERSION}" git pull --rebase origin main - git push origin main - git push origin "v${RELEASE_VERSION}" - name: Build release run: npm run build @@ -209,6 +236,13 @@ jobs: fi } + - name: Push release commit and tag + env: + RELEASE_VERSION: ${{ steps.release.outputs.version }} + run: | + git push origin main + git push origin "v${RELEASE_VERSION}" + - name: Create GitHub Release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.mcp.json b/.mcp.json deleted file mode 100644 index a8e68079d..000000000 --- a/.mcp.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "mcpServers": { - "repowise": { - "command": "repowise", - "args": [ - "mcp", - "/Users/jeremymcspadden/Github/gsd-2", - "--transport", - "stdio" - ], - "description": "repowise: codebase intelligence \u2014 docs, graph, git signals, dead code, decisions" - } - } -} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e79c71de..74aead452 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,188 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [2.73.0] - 2026-04-13 + +### Added +- **pi-ai**: add Alibaba DashScope as standalone provider (#3891) +- **gsd**: add layered depth enforcement to discuss.md (#4079) + +### Fixed +- **gsd**: reconcile stale slice rows and rebuild STATE.md before DB close (#3658) +- **gsd**: block direct writes to gsd.db via hooks to prevent corruption (#3674) +- **gsd**: break 3 circular dependencies in extension modules (#3730) +- **claude-code**: default GSD subagents to bypassPermissions and pre-authorize safe built-ins (#4099 follow-up) +- **gsd**: add memory pressure watchdog and persist stuck detection state (#3708) +- **state**: prevent false degraded-mode warning when DB not yet initialized (#3922) +- **async-jobs**: suppress stale follow-up for jobs consumed by await_job (#3787) (#3788) +- **gsd**: rebuild STATE.md after unit completion (#3876) +- **gsd**: let doctor heal dispatch fixable warnings (#3875) +- **gsd**: preserve experimental preferences in merges (#3847) +- **gsd**: heal legacy task arrays and evidence rows (#4027) +- **gsd**: unlock depth verification outside guided flow (#4058) +- **gsd**: preserve paused auto badge after provider pause (#4062) +- **ollama**: add cloud auth support and resolve real context window via /api/show (#4017) +- **security**: activate auth middleware and harden shutdown/update routes (#4023) +- **gsd**: normalize workingDirectory prompt paths (#4057) +- **claude-code**: pre-authorize workflow MCP tools so interactive acceptEdits mode stops blocking GSD commands +- **cli**: resolve duplicate validateConfiguredModel and missing getPiDefaultModelAndProvider import +- update GSD runtime ignore patterns for team mode (#2824) +- **gsd**: prevent double frontmatter in task SUMMARY.md from projection re-render (#2818) +- flush extension provider registrations before model resolution (#1923) +- **gsd**: reset db-open attempted flag on close (#4024) +- **gsd**: unblock mixed-dependency zero-dep slices (#4025) +- **pi-tui**: filter kitty keypad private-use input (#4026) +- **gsd**: disable db mmap on darwin (#4029) +- **gsd**: reject empty roadmap stubs as milestone plans (#4063) +- persist defaultProvider when user selects Claude Code CLI in onboarding (#4104) +- **pi-ai**: filter unavailable github copilot models (#4031) +- **claude-code**: wrap prompt history in XML tags to stop transcript fabrication +- clean up MCP tool rendering in Claude Code CLI stream + +### Changed +- **pi-ai**: regenerate model registry from upstream APIs (#3887) +- require linked issue in PR template (#4112) + +## [2.72.0] - 2026-04-13 + +### Added +- **agents**: add GSD phase guard to prevent subagent/phase conflicts +- **agents**: add 8 specialist subagents and slim pro agents +- **tui**: improve gsd overlays, shortcuts, and notification flows + +### Fixed +- **ci**: build artifacts in integration-tests job +- **auto**: recover from OpenRouter credit affordability errors +- **gsd**: cast unknown gate id in test to satisfy GateId type +- **gsd**: route quality gates through a per-turn registry +- **mcp**: expose every registered tool and fix SDK subpath resolution +- **mcp**: resolve rebase regressions in stream-adapter +- **mcp**: thread abort signals, restore tool fidelity, and fix subpath imports +- **doctor**: skip key check for CLI-authenticated providers +- **tui**: overlay subscription + Ctrl+Shift+P shortcut conflict +- **models**: block unconfigured models from selection surfaces +- **ollama**: clear footer status when provider unavailable +- **gsd**: guard model override in minimal command contexts +- **model**: require provider readiness for saved default selection +- **gsd**: honor /gsd model as session override across dispatch +- **gsd**: use milestone branch for merged worktree cleanup +- **pi-coding-agent**: show full OAuth login URLs +- **auto**: add structured cooldown error and bounded retry budget +- **auto**: survive transient 429 credential cooldown in auto sessions +- **pi-coding-agent**: match renderable tools case-insensitively +- **headless**: keep idle timeout off during interactive tools +- **claude-code-cli**: surface result text for success errors +- **pi-ai**: use bearer auth for MiniMax Anthropic API +- **gsd**: scope stuck-loop forensics to auto sessions +- **gsd**: repair DB-only milestone unpark state +- **gsd**: detach auto start from active turns +- **cli**: include all internal node_modules entries in pnpm merged dir +- **gsd**: enforce anti-fabrication turn-taking in discuss prompts +- **cli**: address review findings for pnpm merged node_modules +- **cli**: handle pnpm global installs by merging both node_modules roots +- **gsd**: keep project db path after worktree enter +- **gsd**: ignore prose inputs in pre-exec checks +- **gsd**: read existing artifacts before write +- **mcp-server**: use explicit sdk js subpaths +- **cli**: preserve anthropic api provider +- **gsd**: document flat task summary layout +- **gsd**: require verification classes in validation prompts +- **mcp-server**: open the DB for inline workflow tools +- **gsd**: ignore pre-existing files in task ordering +- **gsd**: detect property-value JSON invocation errors +- **cli**: honor custom-provider defaults before onboarding +- **gsd**: dedupe repeated notifications +- **gsd**: open DB before bootstrap deriveState +- **cli**: clean up stdin after sessions command readline interface closes +- **gsd**: skip reverse dependents in dispatch fallback +- **gsd**: classify plain connection-error as transient +- **cli**: resolve hoisted node_modules for global installs +- **pi-ai**: cast test tool fixtures to any for TSchema compatibility +- **commands**: use specific validation reason in blocked-directory warning +- **commands**: show friendly message when /gsd runs from $HOME instead of unhandled error + +### Changed +- **ci**: run integration tests in parallel with build +- **ci**: cache Next.js build artifacts with Blacksmith cache +- sync package-lock.json version fields to 2.68.0 +- **pi-ai**: add cache_control breakpoints to tool definitions + +## [2.71.0] - 2026-04-11 + +### Added +- **mcp-server**: add secure_env_collect tool via MCP form elicitation + +### Fixed +- **tui**: clear pinned output on message_end to prevent duplicate display +- **tui**: clear pinned latest output on turn completion +- **tui**: restore pinned output above editor during tool execution +- TOCTOU file locking race conditions in event log and custom workflow graph +- **tui**: mask secure extension input values in interactive mode +- **claude-code**: harden MCP elicitation schema handling +- **claude-code**: accept secure_env_collect MCP elicitation forms +- **interactive**: keep MCP tool output ordered and restore secure prompt fallback +- **interactive**: preserve MCP tool output stream ordering +- **gsd**: resolve workflow MCP test typing regressions +- **mcp**: return isError flag on workflow tool execution failures +- **discuss**: add structuredQuestionsAvailable conditional to all gates +- **discuss**: add multi-round questioning to new-project discuss phase +- **gsd**: harden claude-code workflow MCP bootstrap +- **web**: drop provisional pre-tool question text + +### Changed +- extract deriveStateFromDb logic into composable helpers +- **pr**: drop web-layer changes from MCP stream-order fix + +## [2.70.1] - 2026-04-11 + +### Fixed +- **routing**: address codex review — complete interactive bypass and accurate banner +- **routing**: skip dynamic routing for interactive dispatches, always show model changes (#3962) +- **ci**: trim windows portability integration load +- **ci**: narrow windows portability coverage +- **ci**: skip validate-pack in windows portability job +- **ci**: unblock windows portability follow-up +- **windows**: harden portability across runtime and tooling +- **auto**: use pathToFileURL for cross-platform import and reconcile regression test +- **auto**: resolve resource-loader.js from GSD_PKG_ROOT on resume (#3949) +- **mcp-server**: importLocalModule resolves src/ paths from dist/ context +- **gsd**: surface scoped doctor health warnings +- **gsd**: skip skipped slices in milestone prompts +- **gsd**: handle doubled-backtick pre-exec paths +- **update**: fetch latest version from registry + +## [2.70.0] - 2026-04-10 + +### Added +- **mcp-server**: expose ask_user_questions via elicitation + +### Fixed +- **pi-ai**: remove Anthropic OAuth flow for TOS compliance +- **mcp-server**: hydrate model credentials into env +- **mcp-server**: hydrate stored tool credentials on startup +- **gsd**: auto-enable cmux when detected instead of prompting +- **mcp-server**: URL scheme regex no longer matches Windows drive letters + +## [2.69.0] - 2026-04-10 + +### Added +- **gsd**: implement ADR-005 multi-model provider and tool strategy +- **gsd**: complete ADR-004 capability-aware model routing implementation + +### Fixed +- **gsd**: add missing directories to codebase generator exclude list +- **gsd**: wire ADR-005 infrastructure into live paths +- **gsd**: replace empty catch with logWarning for CI compliance +- **gsd**: merge enhanced context sections into standard template, clean up stale gate patterns +- **gsd**: remove broken discuss-prepared template, inject briefs into discuss.md + +## [2.68.1] - 2026-04-10 + +### Fixed +- **ci**: update FILE-SYSTEM-MAP.md path after docs reorganization +- **test**: update discord invite test path after docs reorganization +- **gsd**: resolve resource-loader import for deployed extensions + ## [2.68.0] - 2026-04-10 ### Added @@ -2664,7 +2846,14 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - License updated to MIT -[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.68.0...HEAD +[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.73.0...HEAD +[2.73.0]: https://github.com/gsd-build/gsd-2/compare/v2.72.0...v2.73.0 +[2.72.0]: https://github.com/gsd-build/gsd-2/compare/v2.71.0...v2.72.0 +[2.71.0]: https://github.com/gsd-build/gsd-2/compare/v2.70.1...v2.71.0 +[2.70.1]: https://github.com/gsd-build/gsd-2/compare/v2.70.0...v2.70.1 +[2.70.0]: https://github.com/gsd-build/gsd-2/compare/v2.69.0...v2.70.0 +[2.69.0]: https://github.com/gsd-build/gsd-2/compare/v2.68.1...v2.69.0 +[2.68.1]: https://github.com/gsd-build/gsd-2/compare/v2.68.0...v2.68.1 [2.68.0]: https://github.com/gsd-build/gsd-2/compare/v2.67.0...v2.68.0 [2.67.0]: https://github.com/gsd-build/gsd-2/compare/v2.66.1...v2.67.0 [2.66.1]: https://github.com/gsd-build/gsd-2/compare/v2.66.0...v2.66.1 diff --git a/README.md b/README.md index a906da402..467623ddb 100644 --- a/README.md +++ b/README.md @@ -21,42 +21,74 @@ One command. Walk away. Come back to a built project with clean git history. > GSD now provisions a managed [RTK](https://github.com/rtk-ai/rtk) binary on supported macOS, Linux, and Windows installs to compress shell-command output in `bash`, `async_bash`, `bg_shell`, and verification flows. GSD forces `RTK_TELEMETRY_DISABLED=1` for all managed invocations. Set `GSD_RTK_DISABLED=1` to disable the integration. -> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues. +> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/user-docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues. --- -## What's New in v2.68 +## What's New in v2.71 -### MCP Workflow Tools +### MCP Secure Env Collect -- **Full workflow over MCP** — slice replanning, milestone management, slice completion, task completion, and core planning tools are now exposed over MCP for external integrations. -- **Transport-gated MCP** — workflow tool availability adapts to provider transport capabilities automatically. -- **Write gate enforcement** — workflow MCP respects write gates, preventing unauthorized state mutations from external clients. +- **Secure credential collection over MCP** — the new `secure_env_collect` tool uses MCP form elicitation to collect secrets (API keys, tokens) from external clients without exposing values in tool output. Masks input in interactive mode. +- **Hardened elicitation schema** — MCP elicitation schema handling is stricter, with proper validation and fallback for providers that don't support forms. -### Reliability & Recovery +### MCP Reliability -- **False degraded-mode fix** — eliminates spurious degraded-mode warnings when the DB hasn't been initialized yet. -- **Stale session resume suppression** — prevents stale interrupted-session resume prompts from hijacking fresh sessions. -- **Merge conflict recovery** — `autoCommitDirtyState` guarded with cwd restore on `MergeConflictError`. -- **Auto-resume hardening** — `autoStartTime` restored on resume, managed resources resynced on auto resume. +- **Stream ordering preserved** — MCP tool output now renders in the correct order, fixing interleaved output in Claude Code and other MCP clients. +- **isError flag propagation** — workflow tool execution failures now correctly return `isError: true`, so MCP clients can distinguish success from failure. +- **Multi-round discuss questions** — new-project discuss phase supports multi-round questioning with structured question gates. -### TUI & Developer Experience +### Model Selection Hardening -- **Contextual tips system** — TUI and web terminal now surface contextual tips based on workflow state. -- **Claude Code MCP streaming** — real-time streaming and tool output rendering for Claude Code MCP connections. +- **Unconfigured models blocked** — models without a configured provider are filtered from selection surfaces, preventing dispatch failures. +- **Provider readiness required** — saved default model selection now verifies the provider is ready before accepting it. +- **Session override honored** — `/gsd model` selection persists as a session override across all dispatch phases. +- **Minimal context guard** — model override logic is skipped in minimal command contexts where it doesn't apply. -### Infrastructure +### Auto-Mode Resilience -- **Weekly model registry refresh** — CI workflow auto-regenerates the model registry on a weekly schedule. -- **Codebase cache auto-refresh** — stale codebase cache is refreshed automatically without manual intervention. +- **Credential cooldown recovery** — auto-mode survives transient 429 rate-limit responses with structured cooldown errors and a bounded retry budget. +- **Fire-and-forget auto start** — auto start is detached from active turns to prevent blocking. +- **Scoped forensics** — stuck-loop forensics are now scoped to auto sessions only, preventing false positives in interactive use. + +### TUI Improvements + +- **Overlay subscription fix** — resolved overlay subscription lifecycle and `Ctrl+Shift+P` shortcut conflict. +- **Improved overlays and shortcuts** — GSD overlays, keyboard shortcuts, and notification flows redesigned for consistency. +- **Pinned output restored** — pinned output bar displays above the editor during tool execution again. +- **Turn completion cleanup** — pinned latest output is cleared on turn completion, preventing stale output from persisting. +- **Secure input masking** — extension input values are masked in interactive mode when collecting secrets. + +### Provider Fixes + +- **Full OAuth login URLs** — OAuth login URLs are now displayed in full instead of being truncated. +- **MiniMax bearer auth** — MiniMax Anthropic API requests use proper bearer authentication. +- **Case-insensitive tool rendering** — renderable tool matching is now case-insensitive, fixing missed tool output. +- **Headless idle timeout** — idle timeout is kept off during interactive tool execution in headless mode. + +### Reliability & Internals + +- **TOCTOU file locking** — race conditions in event log and custom workflow graph file locking are fixed with proper atomic lock acquisition. +- **State derive refactor** — `deriveStateFromDb` god function extracted into composable, testable helpers. +- **Windows portability** — hardened cross-platform portability across runtime, tooling, and CI. +- **Model routing transparency** — dynamic routing is skipped for interactive dispatches; model changes are always shown in the banner. +- **Capability-aware routing (ADR-004)** — full implementation of capability scoring, `before_model_select` hook, and task metadata extraction. +- **Multi-model provider strategy (ADR-005)** — infrastructure for multi-provider model selection wired into live paths. +- **Anti-fabrication guardrails** — discuss prompts enforce turn-taking to prevent fabricated user responses. +- **Milestone worktree cleanup** — merged worktree cleanup uses the milestone branch instead of generic lookups. +- **Tool cache control** — `cache_control` breakpoints added to tool definitions for improved prompt caching. See the full [Changelog](./CHANGELOG.md) for details on every release.
-Previous highlights (v2.67 and earlier) +Previous highlights (v2.70 and earlier) +- **Full workflow over MCP (v2.68)** — slice replanning, milestone management, slice completion, task completion, and core planning tools exposed over MCP +- **Transport-gated MCP (v2.68)** — workflow tool availability adapts to provider transport capabilities automatically +- **Contextual tips system (v2.68)** — TUI and web terminal surface contextual tips based on workflow state +- **Ask user questions over MCP (v2.70)** — interactive questions exposed via elicitation for external integrations - **Tiered Context Injection (M005)** — relevance-scoped context with 65%+ token reduction - **Resilient transient error recovery** — defers to Core RetryHandler and fixes cmdCtx race conditions - **Anthropic subscription routing** — auto-routed through Claude Code CLI provider with proper display names @@ -86,30 +118,35 @@ See the full [Changelog](./CHANGELOG.md) for details on every release. ## Documentation -Full documentation is available at **[gsd.build](https://gsd.build)** (powered by Mintlify) and in the [`docs/`](./docs/) directory: +Full documentation is in the [`docs/`](./docs/) directory: -- **[Getting Started](./docs/getting-started.md)** — install, first run, basic usage -- **[Auto Mode](./docs/auto-mode.md)** — autonomous execution deep-dive -- **[Configuration](./docs/configuration.md)** — all preferences, models, git, and hooks -- **[Custom Models](./docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies) -- **[Token Optimization](./docs/token-optimization.md)** — profiles, context compression, complexity routing -- **[Cost Management](./docs/cost-management.md)** — budgets, tracking, projections -- **[Git Strategy](./docs/git-strategy.md)** — worktree isolation, branching, merge behavior -- **[Parallel Orchestration](./docs/parallel-orchestration.md)** — run multiple milestones simultaneously -- **[Working in Teams](./docs/working-in-teams.md)** — unique IDs, shared artifacts -- **[Skills](./docs/skills.md)** — bundled skills, discovery, custom authoring -- **[Commands Reference](./docs/commands.md)** — all commands and keyboard shortcuts -- **[Architecture](./docs/architecture.md)** — system design and dispatch pipeline -- **[Troubleshooting](./docs/troubleshooting.md)** — common issues, doctor, forensics, recovery -- **[CI/CD Pipeline](./docs/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod) -- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration -- **[Visualizer](./docs/visualizer.md)** — workflow visualizer with stats and discussion status -- **[Remote Questions](./docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed -- **[Dynamic Model Routing](./docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure -- **[Web Interface](./docs/web-interface.md)** — browser-based project management and real-time progress -- **[Pipeline Simplification (ADR-003)](./docs/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion +### User Guides + +- **[Getting Started](./docs/user-docs/getting-started.md)** — install, first run, basic usage +- **[Auto Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive +- **[Configuration](./docs/user-docs/configuration.md)** — all preferences, models, git, and hooks +- **[Custom Models](./docs/user-docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies) +- **[Token Optimization](./docs/user-docs/token-optimization.md)** — profiles, context compression, complexity routing +- **[Cost Management](./docs/user-docs/cost-management.md)** — budgets, tracking, projections +- **[Git Strategy](./docs/user-docs/git-strategy.md)** — worktree isolation, branching, merge behavior +- **[Parallel Orchestration](./docs/user-docs/parallel-orchestration.md)** — run multiple milestones simultaneously +- **[Working in Teams](./docs/user-docs/working-in-teams.md)** — unique IDs, shared artifacts +- **[Skills](./docs/user-docs/skills.md)** — bundled skills, discovery, custom authoring +- **[Commands Reference](./docs/user-docs/commands.md)** — all commands and keyboard shortcuts +- **[Troubleshooting](./docs/user-docs/troubleshooting.md)** — common issues, doctor, forensics, recovery +- **[Visualizer](./docs/user-docs/visualizer.md)** — workflow visualizer with stats and discussion status +- **[Remote Questions](./docs/user-docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed +- **[Dynamic Model Routing](./docs/user-docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure +- **[Web Interface](./docs/user-docs/web-interface.md)** — browser-based project management and real-time progress +- **[Migration from v1](./docs/user-docs/migration.md)** — `.planning` → `.gsd` migration - **[Docker Sandbox](./docker/README.md)** — run GSD auto mode in an isolated Docker container -- **[Migration from v1](./docs/migration.md)** — `.planning` → `.gsd` migration + +### Developer Docs + +- **[Architecture](./docs/dev/architecture.md)** — system design and dispatch pipeline +- **[CI/CD Pipeline](./docs/dev/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod) +- **[Pipeline Simplification (ADR-003)](./docs/dev/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion +- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration --- @@ -325,7 +362,7 @@ gsd headless query gsd headless dispatch plan ``` -Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed. +Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/user-docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed. **Multi-session orchestration** — headless mode supports file-based IPC in `.gsd/parallel/` for coordinating multiple GSD workers across milestones. Build orchestrators that spawn, monitor, and budget-cap a fleet of GSD workers. @@ -498,9 +535,8 @@ auto_report: true | `verification_commands`| Array of shell commands to run after task execution (e.g., `["npm run lint", "npm run test"]`) | | `verification_auto_fix`| Auto-retry on verification failures (default: true) | | `verification_max_retries` | Max retries for verification failures (default: 2) | -| `require_slice_discussion` | Pause auto-mode before each slice for human discussion review | +| `phases.require_slice_discussion` | Pause auto-mode before each slice for human discussion review | | `auto_report` | Auto-generate HTML reports after milestone completion (default: true) | -| `searchExcludeDirs` | Directories to exclude from `@` file autocomplete (e.g., `["node_modules", ".git", "dist"]`) | ### Agent Instructions @@ -530,7 +566,7 @@ token_profile: budget # or balanced (default), quality **Budget pressure** graduates model downgrading as you approach your budget ceiling — 50%, 75%, and 90% thresholds progressively shift work to cheaper tiers. -See the full [Token Optimization Guide](./docs/token-optimization.md) for details. +See the full [Token Optimization Guide](./docs/user-docs/token-optimization.md) for details. ### Bundled Tools @@ -565,13 +601,15 @@ GSD ships with 24 extensions, all loaded automatically: ### Bundled Agents -Three specialized subagents for delegated work: +Five specialized subagents for delegated work: -| Agent | Role | -| -------------- | ------------------------------------------------------------ | -| **Scout** | Fast codebase recon — returns compressed context for handoff | -| **Researcher** | Web research — finds and synthesizes current information | -| **Worker** | General-purpose execution in an isolated context window | +| Agent | Role | +| ------------------- | ------------------------------------------------------------ | +| **Scout** | Fast codebase recon — returns compressed context for handoff | +| **Researcher** | Web research — finds and synthesizes current information | +| **Worker** | General-purpose execution in an isolated context window | +| **JavaScript Pro** | JavaScript-specialized execution and debugging | +| **TypeScript Pro** | TypeScript-specialized execution and debugging | --- @@ -585,8 +623,10 @@ The best practice for working in teams is to ensure unique milestone names acros # ── GSD: Runtime / Ephemeral (per-developer, per-session) ────────────────── # Crash detection sentinel — PID lock, written per auto-mode session .gsd/auto.lock -# Auto-mode dispatch tracker — prevents re-running completed units -.gsd/completed-units.json +# Auto-mode dispatch tracker — prevents re-running completed units (includes archived per-milestone files) +.gsd/completed-units*.json +# State manifest — workflow state for recovery +.gsd/state-manifest.json # Derived state cache — regenerated from plan/roadmap files on disk .gsd/STATE.md # Per-developer token/cost accumulator @@ -599,6 +639,14 @@ The best practice for working in teams is to ensure unique milestone names acros .gsd/worktrees/ # Parallel orchestration IPC and worker status .gsd/parallel/ +# SQLite database and WAL sidecars — checkpoint state, forensics data +.gsd/gsd.db* +# Daily-rotated event journal — structured event log for forensics +.gsd/journal/ +# Doctor run history — diagnostic check results +.gsd/doctor-history.jsonl +# Workflow event log — structured event stream +.gsd/event-log.jsonl # Generated HTML reports (regenerable via /gsd export --html) .gsd/reports/ # Session-specific interrupted-work markers @@ -646,9 +694,8 @@ gsd (CLI binary) ├─ resource-loader.ts Syncs bundled extensions + agents to ~/.gsd/agent/ └─ src/resources/ ├─ extensions/gsd/ Core GSD extension (auto, state, commands, ...) - ├─ extensions/... 23 supporting extensions - ├─ agents/ scout, researcher, worker - ├─ AGENTS.md Agent routing instructions + ├─ extensions/... 21 supporting extensions + ├─ agents/ scout, researcher, worker, javascript-pro, typescript-pro └─ GSD-WORKFLOW.md Manual bootstrap protocol ``` @@ -720,6 +767,14 @@ Use expensive models where quality matters (planning, complex execution) and che --- +## Ecosystem + +| Project | Description | +| ------- | ----------- | +| [GSD2 Config Utility](https://github.com/jeremymcs/gsd2-config) | Standalone configuration tool for managing GSD preferences, providers, and API keys | + +--- + ## Star History diff --git a/docs/README.md b/docs/README.md index e74a67039..ec409e448 100644 --- a/docs/README.md +++ b/docs/README.md @@ -6,6 +6,8 @@ Welcome to the GSD documentation. This covers everything from getting started to Guides for installing, configuring, and using GSD day-to-day. Located in [`user-docs/`](./user-docs/). +Simplified Chinese translation: [`zh-CN/`](./zh-CN/). + | Guide | Description | |-------|-------------| | [Getting Started](./user-docs/getting-started.md) | Installation, first run, and basic usage | diff --git a/docs/dev/ADR-005-multi-model-provider-tool-strategy.md b/docs/dev/ADR-005-multi-model-provider-tool-strategy.md new file mode 100644 index 000000000..bdf00706a --- /dev/null +++ b/docs/dev/ADR-005-multi-model-provider-tool-strategy.md @@ -0,0 +1,67 @@ +# ADR-005: Multi-Model, Multi-Provider, and Tool Strategy + +**Status:** Accepted +**Date:** 2026-03-27 +**Deciders:** Jeremy McSpadden +**Related:** ADR-004 (capability-aware model routing), ADR-003 (pipeline simplification), [Issue #2790](https://github.com/gsd-build/gsd-2/issues/2790) + +## Context + +PR #2755 lands capability-aware model routing (ADR-004), extending the router from a one-dimensional complexity-tier system to a two-dimensional system that scores models across 7 capability dimensions. GSD can now intelligently pick the best model for a task from a heterogeneous pool. + +But model selection is only one piece of the multi-model puzzle. The system faces structural gaps as users configure diverse provider pools: + +1. **Tool compatibility is assumed, not verified** — Every registered tool is sent to every model regardless of provider capabilities. +2. **No tool-aware model routing** — ADR-004 scores 7 capability dimensions but none encode whether a model can actually use the tools a task requires. +3. **Provider failover loses context fidelity** — Cross-provider switches silently degrade conversation quality (thinking blocks dropped, tool IDs remapped). +4. **Tool availability is static across a session** — The same tools are presented regardless of the selected model's capabilities. +5. **No provider capability registry** — Provider quirks are scattered across `*-shared.ts` files. + +## Decision + +Introduce a provider capability registry and tool compatibility layer that integrates with ADR-004's capability-aware model router. + +### Design Principles + +1. **Layered on ADR-004, not replacing it.** Capability scoring remains primary. This adds tool compatibility as a hard constraint. +2. **Hard constraints filter; soft scores rank.** Tool support is binary — it filters the eligible set before scoring. +3. **Provider knowledge is declarative, not scattered.** Provider capabilities move to an explicit registry. +4. **Tool sets adapt to model capabilities.** Active tool set adjusts when the router selects a different model. +5. **Graceful degradation preserved.** Unknown providers get full tool access — same as today. + +### Implementation Phases + +1. **Phase 1:** Provider Capabilities Registry (`packages/pi-ai/src/providers/provider-capabilities.ts`) +2. **Phase 2:** Tool Compatibility Metadata (extend `ToolDefinition` with `compatibility` field) +3. **Phase 3:** Tool-compatibility filter in routing pipeline + `ProviderSwitchReport` in `transform-messages.ts` +4. **Phase 4:** `adjustToolSet` extension hook + +## Consequences + +### Positive +- Eliminates silent tool failures when routing to incompatible providers +- Makes cross-provider routing safe by default +- Provider knowledge becomes queryable (registry vs scattered code) +- Cross-provider context loss becomes visible via `ProviderSwitchReport` + +### Negative +- More metadata to maintain (provider capabilities, tool compatibility) +- Tool filtering adds a pipeline step (sub-millisecond, O(models × tools)) +- Risk of over-filtering (mitigated: opt-in per tool, permissive defaults) + +### Neutral +- Existing behavior unchanged without metadata +- ADR-004 scoring is unmodified +- Provider implementations simplify over time as registry replaces scattered workarounds + +## Appendix: Architecture Reference + +| File | Role | +|------|------| +| `packages/pi-ai/src/providers/register-builtins.ts` | Provider registration | +| `packages/pi-ai/src/providers/*-shared.ts` | Provider-specific handling | +| `packages/pi-ai/src/providers/transform-messages.ts` | Cross-provider normalization | +| `packages/pi-ai/src/types.ts` | Core types | +| `packages/pi-coding-agent/src/core/extensions/types.ts` | ToolDefinition, ExtensionAPI | +| `src/resources/extensions/gsd/model-router.ts` | Capability scoring (ADR-004) | +| `src/resources/extensions/gsd/auto-model-selection.ts` | Model selection orchestration | diff --git a/docs/user-docs/claude-code-auth-compliance.md b/docs/user-docs/claude-code-auth-compliance.md index f930afd46..0c6b77466 100644 --- a/docs/user-docs/claude-code-auth-compliance.md +++ b/docs/user-docs/claude-code-auth-compliance.md @@ -86,18 +86,15 @@ Implication for GSD2: These are directionally correct because GSD is using the user's own local Claude Code installation as the authenticated Anthropic surface. -### Medium/high-risk pieces +### Medium/high-risk pieces — RESOLVED -- `packages/pi-ai/src/utils/oauth/anthropic.ts` - Still implements a first-party-looking Anthropic OAuth flow for GSD itself using `claude.ai/oauth/authorize` and `platform.claude.com/v1/oauth/token`. -- `packages/pi-ai/src/utils/oauth/index.ts` - Still registers `anthropicOAuthProvider` as a built-in OAuth provider. -- `src/web/onboarding-service.ts` - Still advertises Anthropic as `supportsOAuth: true`, which keeps the web onboarding surface inconsistent with the TUI stance. -- `packages/daemon/src/orchestrator.ts` - Reads Anthropic OAuth credentials from `~/.gsd/agent/auth.json`, refreshes them, and then uses the access token for Anthropic API calls. +All Anthropic OAuth code paths have been removed: -The key risk is not just stale UI. The repo still contains code paths where GSD can behave as a third-party Anthropic OAuth client and then convert that credential into direct API access. +- `packages/pi-ai/src/utils/oauth/anthropic.ts` — **Deleted.** No longer implements Anthropic OAuth flow. +- `packages/pi-ai/src/utils/oauth/index.ts` — **Updated.** `anthropicOAuthProvider` removed from built-in registry. +- `src/web/onboarding-service.ts` — **Updated.** Anthropic set to `supportsOAuth: false`. +- `packages/daemon/src/orchestrator.ts` — **Updated.** OAuth token refresh removed; requires `ANTHROPIC_API_KEY` env var. +- `packages/pi-ai/src/providers/anthropic.ts` — **Updated.** OAuth client branch removed; `isOAuthToken` always returns false. ## Recommended Policy For GSD2 @@ -149,14 +146,14 @@ This is the best long-term UX because it separates: - API-billed usage - cloud-routed usage -## Concrete Repo Follow-ups +## Concrete Repo Follow-ups — COMPLETED -1. Delete or disable `packages/pi-ai/src/utils/oauth/anthropic.ts`. -2. Remove `anthropicOAuthProvider` from `packages/pi-ai/src/utils/oauth/index.ts`. -3. Change `src/web/onboarding-service.ts` so Anthropic does not claim OAuth support. -4. Audit `packages/daemon/src/orchestrator.ts` and any other callers that treat Anthropic OAuth access tokens as API credentials. -5. Update docs/UI labels to prefer `anthropic-api` for direct API usage and `claude-code` for subscription usage. -6. Add tests that fail if Anthropic subscription OAuth is reintroduced through the onboarding/provider registry. +1. ~~Delete or disable `packages/pi-ai/src/utils/oauth/anthropic.ts`.~~ **Done** — file deleted. +2. ~~Remove `anthropicOAuthProvider` from `packages/pi-ai/src/utils/oauth/index.ts`.~~ **Done.** +3. ~~Change `src/web/onboarding-service.ts` so Anthropic does not claim OAuth support.~~ **Done.** +4. ~~Audit `packages/daemon/src/orchestrator.ts` and any other callers that treat Anthropic OAuth access tokens as API credentials.~~ **Done** — daemon now requires `ANTHROPIC_API_KEY`. +5. ~~Update docs/UI labels to prefer `anthropic-api` for direct API usage and `claude-code` for subscription usage.~~ **Done** — providers.md and getting-started.md updated. +6. Add tests that fail if Anthropic subscription OAuth is reintroduced through the onboarding/provider registry. — **TODO.** ## Decision Rule diff --git a/docs/user-docs/configuration.md b/docs/user-docs/configuration.md index 00512fa22..b3e873e72 100644 --- a/docs/user-docs/configuration.md +++ b/docs/user-docs/configuration.md @@ -148,6 +148,7 @@ Recommended verification order: - Use absolute paths for local executables and scripts when possible. - For `stdio` servers, prefer setting required environment variables directly in the MCP config instead of relying on an interactive shell profile. +- GSD and `gsd-mcp-server` both hydrate supported model and tool keys saved in `~/.gsd/agent/auth.json`, so MCP configs can safely reference them through `${ENV_VAR}` placeholders without committing raw credentials. - If a server is team-shared and safe to commit, `.mcp.json` is usually the better home. - If a server depends on machine-local paths, personal services, or local-only secrets, prefer `.gsd/mcp.json`. diff --git a/docs/user-docs/getting-started.md b/docs/user-docs/getting-started.md index 6fbcf2422..d095ef8f9 100644 --- a/docs/user-docs/getting-started.md +++ b/docs/user-docs/getting-started.md @@ -1,74 +1,311 @@ -# Getting Started +# Getting Started with GSD -## Install +GSD is an AI coding agent that handles planning, execution, verification, and shipping so you can focus on what to build. This guide walks you through installation on macOS, Windows, and Linux, then gets you running your first session. + +--- + +## Prerequisites + +| Requirement | Minimum | Recommended | +|-------------|---------|-------------| +| **[Node.js](https://nodejs.org/)** | 22.0.0 | 24 LTS | +| **[Git](https://git-scm.com/)** | 2.20+ | Latest | +| **LLM API key** | Any supported provider | Anthropic (Claude) | + +Don't have Node.js or Git yet? Follow the OS-specific instructions below. + +--- + +## Install by Operating System + +### macOS + +> **Downloads:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/mac) | [Homebrew](https://brew.sh/) + +**Step 1 — Install Homebrew** (skip if you already have it): + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +``` + +**Step 2 — Install Node.js and Git:** + +```bash +brew install node git +``` + +**Step 3 — Verify dependencies are installed:** + +```bash +node --version # should print v22.x or higher +git --version # should print 2.20+ +``` + +**Step 4 — Install GSD:** ```bash npm install -g gsd-pi ``` -Requires Node.js ≥ 22.0.0 (24 LTS recommended) and Git. - -> **`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [Troubleshooting](./troubleshooting.md#command-not-found-gsd-after-install) for details. - -GSD checks for updates once every 24 hours. When a new version is available, you'll see an interactive prompt at startup with the option to update immediately or skip. You can also update from within a session with `/gsd update`. - -### Set up API keys - -If you use a non-Anthropic model, you'll need a search API key for web search. Run `/gsd config` to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects: +**Step 5 — Set up your LLM provider:** ```bash -# Inside any GSD session: -/gsd config -``` +# Option A: Set an environment variable (Anthropic recommended) +export ANTHROPIC_API_KEY="sk-ant-..." -See [Global API Keys](./configuration.md#global-api-keys-gsd-config) for details on supported keys. - -### Set up custom MCP servers - -If you want GSD to call local or external MCP servers, add project-local config in `.mcp.json` or `.gsd/mcp.json`. - -See [Configuration → MCP Servers](./configuration.md#mcp-servers) for examples and verification steps. - -### VS Code Extension - -GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions. The extension provides: - -- **`@gsd` chat participant** — talk to the agent in VS Code Chat -- **Sidebar dashboard** — connection status, model info, token usage, quick actions -- **Full command palette** — start/stop agent, switch models, export sessions - -The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. - -### Web Interface - -GSD also has a browser-based interface. Run `gsd --web` to start a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](./web-interface.md) for details. - -## First Launch - -Run `gsd` in any directory: - -```bash -gsd -``` - -GSD displays a welcome screen showing your version, active model, and available tool keys. Then on first launch, it runs a setup wizard: - -1. **LLM Provider** — select from 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth flows handle Claude Max and Copilot subscriptions automatically; otherwise paste an API key. -2. **Tool API Keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any. - -If you have an existing Pi installation, provider credentials are imported automatically. - -For detailed setup instructions for specific providers (OpenRouter, Ollama, LM Studio, vLLM, and more), see the [Provider Setup Guide](./providers.md). - -Re-run the wizard anytime with: - -```bash +# Option B: Use the built-in config wizard gsd config ``` -## Choose a Model +To persist the key, add the export line to `~/.zshrc`: -GSD auto-selects a default model after login. Switch later with: +```bash +echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.zshrc +source ~/.zshrc +``` + +See [Provider Setup Guide](./providers.md) for all 20+ supported providers. + +**Step 6 — Launch GSD:** + +```bash +cd ~/my-project # navigate to any project +gsd # start a session +``` + +**Step 7 — Verify everything works:** + +```bash +gsd --version # prints the installed version +``` + +Inside the session, type `/model` to confirm your LLM is connected. + +> **Apple Silicon PATH fix:** If `gsd` isn't found after install, npm's global bin may not be in your PATH: +> ```bash +> echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc +> source ~/.zshrc +> ``` + +> **oh-my-zsh conflict:** The oh-my-zsh git plugin defines `alias gsd='git svn dcommit'`. Fix with `unalias gsd 2>/dev/null` in `~/.zshrc`, or use `gsd-cli` instead. + +--- + +### Windows + +> **Downloads:** [Node.js](https://nodejs.org/) | [Git for Windows](https://git-scm.com/download/win) | [Windows Terminal](https://aka.ms/terminal) + +#### Option A: winget (recommended for Windows 10/11) + +**Step 1 — Install Node.js and Git:** + +```powershell +winget install OpenJS.NodeJS.LTS +winget install Git.Git +``` + +**Step 2 — Restart your terminal** (close and reopen PowerShell or Windows Terminal). + +**Step 3 — Verify dependencies are installed:** + +```powershell +node --version # should print v22.x or higher +git --version # should print 2.20+ +``` + +**Step 4 — Install GSD:** + +```powershell +npm install -g gsd-pi +``` + +**Step 5 — Set up your LLM provider:** + +```powershell +# Option A: Set an environment variable (current session) +$env:ANTHROPIC_API_KEY = "sk-ant-..." + +# Option B: Use the built-in config wizard +gsd config +``` + +To persist the key permanently, add it via System Settings > Environment Variables, or run: + +```powershell +[System.Environment]::SetEnvironmentVariable("ANTHROPIC_API_KEY", "sk-ant-...", "User") +``` + +See [Provider Setup Guide](./providers.md) for all 20+ supported providers. + +**Step 6 — Launch GSD:** + +```powershell +cd C:\Users\you\my-project # navigate to any project +gsd # start a session +``` + +**Step 7 — Verify everything works:** + +```powershell +gsd --version # prints the installed version +``` + +Inside the session, type `/model` to confirm your LLM is connected. + +#### Option B: Manual install + +1. Download and install [Node.js LTS](https://nodejs.org/) — check **"Add to PATH"** during setup +2. Download and install [Git for Windows](https://git-scm.com/download/win) — use default options +3. Open a **new** terminal, then follow Steps 3-7 above + +> **Windows tips:** +> - Use **Windows Terminal** or **PowerShell** for the best experience. Command Prompt works but has limited color support. +> - If `gsd` isn't recognized, restart your terminal. Windows needs a fresh terminal to pick up new PATH entries. +> - **WSL2** also works — install WSL, then follow the Linux instructions inside your distro. + +--- + +### Linux + +> **Downloads:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/linux) | [nvm](https://github.com/nvm-sh/nvm) + +Pick your distro, then follow the steps. + +#### Ubuntu / Debian + +**Step 1 — Install Node.js and Git:** + +```bash +curl -fsSL https://deb.nodesource.com/setup_24.x | sudo -E bash - +sudo apt-get install -y nodejs git +``` + +#### Fedora / RHEL / CentOS + +**Step 1 — Install Node.js and Git:** + +```bash +curl -fsSL https://rpm.nodesource.com/setup_24.x | sudo bash - +sudo dnf install -y nodejs git +``` + +#### Arch Linux + +**Step 1 — Install Node.js and Git:** + +```bash +sudo pacman -S nodejs npm git +``` + +#### Using nvm (any distro) + +**Step 1 — Install nvm, then Node.js:** + +```bash +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.0/install.sh | bash +source ~/.bashrc # or ~/.zshrc +nvm install 24 +nvm use 24 +``` + +#### All distros: Steps 2-7 + +**Step 2 — Verify dependencies are installed:** + +```bash +node --version # should print v22.x or higher +git --version # should print 2.20+ +``` + +**Step 3 — Install GSD:** + +```bash +npm install -g gsd-pi +``` + +**Step 4 — Set up your LLM provider:** + +```bash +# Option A: Set an environment variable (Anthropic recommended) +export ANTHROPIC_API_KEY="sk-ant-..." + +# Option B: Use the built-in config wizard +gsd config +``` + +To persist the key, add the export line to `~/.bashrc` (or `~/.zshrc`): + +```bash +echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.bashrc +source ~/.bashrc +``` + +See [Provider Setup Guide](./providers.md) for all 20+ supported providers. + +**Step 5 — Launch GSD:** + +```bash +cd ~/my-project # navigate to any project +gsd # start a session +``` + +**Step 6 — Verify everything works:** + +```bash +gsd --version # prints the installed version +``` + +Inside the session, type `/model` to confirm your LLM is connected. + +> **Permission errors on `npm install -g`?** Don't use `sudo npm`. Fix npm's global directory instead: +> ```bash +> mkdir -p ~/.npm-global +> npm config set prefix '~/.npm-global' +> echo 'export PATH="$HOME/.npm-global/bin:$PATH"' >> ~/.bashrc +> source ~/.bashrc +> npm install -g gsd-pi +> ``` + +--- + +### Docker (any OS) + +> **Downloads:** [Docker Desktop](https://www.docker.com/products/docker-desktop/) + +Run GSD in an isolated sandbox without installing Node.js on your host. + +**Step 1 — Install Docker Desktop** (4.58+ required). + +**Step 2 — Clone the GSD repo:** + +```bash +git clone https://github.com/gsd-build/gsd-2.git +cd gsd-2/docker +``` + +**Step 3 — Create and enter a sandbox:** + +```bash +docker sandbox create --template . --name gsd-sandbox +docker sandbox exec -it gsd-sandbox bash +``` + +**Step 4 — Set your API key and run GSD:** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +gsd auto "implement the feature described in issue #42" +``` + +See [Docker Sandbox docs](../../docker/README.md) for full configuration, resource limits, and compose files. + +--- + +## After Installation + +### Choose a Model + +GSD auto-selects a default model after provider setup. Switch anytime inside a session: ``` /model @@ -76,18 +313,20 @@ GSD auto-selects a default model after login. Switch later with: Or configure per-phase models in preferences — see [Configuration](./configuration.md). +--- + ## Two Ways to Work ### Step Mode — `/gsd` Type `/gsd` inside a session. GSD executes one unit of work at a time, pausing between each with a wizard showing what completed and what's next. -- **No `.gsd/` directory** → starts a discussion flow to capture your project vision -- **Milestone exists, no roadmap** → discuss or research the milestone -- **Roadmap exists, slices pending** → plan the next slice or execute a task -- **Mid-task** → resume where you left off +- **No `.gsd/` directory** — starts a discussion flow to capture your project vision +- **Milestone exists, no roadmap** — discuss or research the milestone +- **Roadmap exists, slices pending** — plan the next slice or execute a task +- **Mid-task** — resume where you left off -Step mode is the on-ramp. You stay in the loop, reviewing output between each step. +Step mode keeps you in the loop, reviewing output between each step. ### Auto Mode — `/gsd auto` @@ -99,9 +338,11 @@ Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, ve See [Auto Mode](./auto-mode.md) for full details. -## Two Terminals, One Project +--- -The recommended workflow: auto mode in one terminal, steering from another. +## Recommended Workflow: Two Terminals + +Run auto mode in one terminal, steer from another. **Terminal 1 — let it build:** @@ -121,9 +362,9 @@ gsd Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically. -## Project Structure +--- -GSD organizes work into a hierarchy: +## How GSD Organizes Work ``` Milestone → a shippable version (4-10 slices) @@ -138,25 +379,45 @@ All state lives on disk in `.gsd/`: ``` .gsd/ PROJECT.md — what the project is right now - REQUIREMENTS.md — requirement contract (active/validated/deferred) + REQUIREMENTS.md — requirement contract DECISIONS.md — append-only architectural decisions - KNOWLEDGE.md — cross-session rules, patterns, and lessons - RUNTIME.md — runtime context: API endpoints, env vars, services (v2.39) + KNOWLEDGE.md — cross-session rules and patterns STATE.md — quick-glance status milestones/ M001/ - M001-ROADMAP.md — slice plan with risk levels and dependencies - M001-CONTEXT.md — scope and goals from discussion + M001-ROADMAP.md — slice plan with dependencies slices/ S01/ S01-PLAN.md — task decomposition S01-SUMMARY.md — what happened - S01-UAT.md — human test script - tasks/ - T01-PLAN.md - T01-SUMMARY.md ``` +--- + +## VS Code Extension + +GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions: + +- **`@gsd` chat participant** — talk to the agent in VS Code Chat +- **Sidebar dashboard** — connection status, model info, token usage +- **Full command palette** — start/stop agent, switch models, export sessions + +The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. + +--- + +## Web Interface + +GSD has a browser-based interface for visual project management: + +```bash +gsd --web +``` + +See [Web Interface](./web-interface.md) for details. + +--- + ## Resume a Session ```bash @@ -165,36 +426,48 @@ gsd --continue # or gsd -c Resumes the most recent session for the current directory. -To browse and pick from all saved sessions: +Browse all saved sessions: ```bash gsd sessions ``` -Shows each session's date, message count, and first-message preview so you can choose which one to resume. +--- + +## Updating GSD + +GSD checks for updates every 24 hours and prompts at startup. You can also update manually: + +```bash +npm update -g gsd-pi +``` + +Or from within a session: + +``` +/gsd update +``` + +--- + +## Quick Troubleshooting + +| Problem | Fix | +|---------|-----| +| `command not found: gsd` | Add npm global bin to PATH (see OS-specific notes above) | +| `gsd` runs `git svn dcommit` | oh-my-zsh conflict — `unalias gsd` or use `gsd-cli` | +| Permission errors on `npm install -g` | Fix npm prefix (see Linux notes) or use nvm | +| Can't connect to LLM | Check API key with `gsd config`, verify network access | +| `gsd` hangs on start | Check Node.js version: `node --version` (need 22+) | + +For more, see [Troubleshooting](./troubleshooting.md). + +--- ## Next Steps - [Auto Mode](./auto-mode.md) — deep dive into autonomous execution - [Configuration](./configuration.md) — model selection, timeouts, budgets - [Commands Reference](./commands.md) — all commands and shortcuts - -## Troubleshooting - -### `gsd` command runs `git svn dcommit` instead of GSD - -The [oh-my-zsh git plugin](https://github.com/ohmyzsh/ohmyzsh/tree/master/plugins/git) defines `alias gsd='git svn dcommit'`, which shadows the GSD binary. - -**Option 1** — Remove the alias in your `~/.zshrc` (add after the `source $ZSH/oh-my-zsh.sh` line): - -```bash -unalias gsd 2>/dev/null -``` - -**Option 2** — Use the alternative binary name: - -```bash -gsd-cli -``` - -Both `gsd` and `gsd-cli` point to the same binary. +- [Provider Setup](./providers.md) — detailed setup for every provider +- [Working in Teams](./working-in-teams.md) — multi-developer workflows diff --git a/docs/user-docs/providers.md b/docs/user-docs/providers.md index 984ee369c..cfa3df939 100644 --- a/docs/user-docs/providers.md +++ b/docs/user-docs/providers.md @@ -30,7 +30,7 @@ Step-by-step setup instructions for every LLM provider GSD supports. If you ran | Provider | Auth Method | Env Variable | Config File | |----------|-------------|-------------|-------------| -| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | — | +| Anthropic | API key | `ANTHROPIC_API_KEY` | — | | OpenAI | API key | `OPENAI_API_KEY` | — | | Google Gemini | API key | `GEMINI_API_KEY` | — | | OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` | @@ -55,25 +55,91 @@ Built-in providers have models pre-registered in GSD. You only need to supply cr **Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching. -**Option A — Browser sign-in (recommended):** - -```bash -gsd config -# Choose "Sign in with your browser" → "Anthropic (Claude)" -``` - -Or inside a session: `/login` - -**Option B — API key:** +**Option A — API key (recommended):** ```bash export ANTHROPIC_API_KEY="sk-ant-..." ``` -Or paste it during `gsd config` when prompted. +Or run `gsd config` and paste your key when prompted. **Get a key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) +**Option B — Claude Code CLI:** + +If you have a Claude Pro or Max subscription, you can authenticate through Anthropic's official Claude Code CLI. Install it, sign in with `claude`, then GSD will detect and route through it automatically: + +```bash +# Install Claude Code CLI (see https://docs.anthropic.com/en/docs/claude-code) +claude +# Sign in when prompted, then start GSD +gsd +``` + +GSD detects your local Claude Code installation and uses it as the authenticated Anthropic surface. This is the TOS-compliant path for subscription users — GSD never handles your subscription credentials directly. + +> **Note:** GSD does not support browser-based OAuth sign-in for Anthropic. Use an API key or the Claude Code CLI instead. + +**Option C — Use your Claude Pro/Max plan with GSD inside Claude Code:** + +If you already have a Claude Pro or Max subscription and want to use GSD's planning, execution, and milestone orchestration directly from Claude Code — without switching to a separate terminal — you can connect GSD as an MCP server. This gives Claude Code access to GSD's full workflow toolset via the [Model Context Protocol](https://modelcontextprotocol.io), so you get GSD's structured project management powered by your existing Claude plan. + +**Automatic setup (recommended):** + +When GSD detects a Claude Code model during startup, it automatically writes a `.mcp.json` file in your project root with the GSD workflow MCP server configured. No manual steps needed — just start GSD once with Claude Code as the provider and the config is created for you. + +You can also trigger this manually from inside a GSD session: + +```bash +/gsd mcp init +``` + +This writes (or updates) the `gsd-workflow` entry in your project's `.mcp.json`. Claude Code discovers this file automatically on its next session start. + +**Manual setup:** + +If you prefer to configure it yourself, add GSD to your project's `.mcp.json`: + +```json +{ + "mcpServers": { + "gsd": { + "command": "npx", + "args": ["gsd-mcp-server"], + "env": { + "GSD_CLI_PATH": "/path/to/gsd" + } + } + } +} +``` + +Or if `gsd-mcp-server` is installed globally: + +```json +{ + "mcpServers": { + "gsd": { + "command": "gsd-mcp-server" + } + } +} +``` + +You can also add this to `~/.claude/settings.json` under `mcpServers` to make GSD available across all projects. + +**What's exposed:** + +The MCP server provides GSD's full workflow tool surface — milestone planning, task completion, slice management, roadmap reassessment, journal queries, and more. Session management tools (`gsd_execute`, `gsd_status`, `gsd_result`, `gsd_cancel`) let Claude Code start and monitor GSD auto-mode sessions. See [Commands → MCP Server Mode](./commands.md#mcp-server-mode) for the full tool list. + +**Verify the connection:** + +From inside a GSD session, check that the MCP server is reachable: + +```bash +/gsd mcp status +``` + ### OpenAI ```bash diff --git a/docs/zh-CN/README.md b/docs/zh-CN/README.md new file mode 100644 index 000000000..5a9fc25d0 --- /dev/null +++ b/docs/zh-CN/README.md @@ -0,0 +1,32 @@ +# GSD 文档 + +欢迎使用 GSD 文档。这里涵盖了从快速开始到高级配置、自动模式内部机制,以及如何基于 Pi SDK 扩展 GSD 的内容。 + +> 本目录是主文档的简体中文翻译。目前优先覆盖 `docs/user-docs/` 这套用户手册;如中英文内容有差异,请以英文原文为准。 + +## 用户文档 + +用于安装、配置和日常使用 GSD 的指南。文件位于 [`user-docs/`](./user-docs/)。 + +| 指南 | 说明 | +|------|------| +| [快速开始](./user-docs/getting-started.md) | 安装、首次运行和基础使用 | +| [自动模式](./user-docs/auto-mode.md) | 自主执行如何工作,包括状态机、崩溃恢复和引导控制 | +| [命令参考](./user-docs/commands.md) | 所有命令、键盘快捷键和 CLI 参数 | +| [远程提问](./user-docs/remote-questions.md) | 用于无头自动模式的 Discord、Slack 和 Telegram 集成 | +| [配置](./user-docs/configuration.md) | 偏好设置、模型选择、Git 设置和 Token 配置 | +| [提供商设置](./user-docs/providers.md) | OpenRouter、Ollama、LM Studio、vLLM 以及所有受支持提供商的分步配置 | +| [自定义模型](./user-docs/custom-models.md) | 高级模型配置,包括 `models.json` 结构、兼容标志和覆盖项 | +| [Token 优化](./user-docs/token-optimization.md) | Token 配置、上下文压缩、复杂度路由和自适应学习 | +| [动态模型路由](./user-docs/dynamic-model-routing.md) | 基于复杂度的模型选择、成本表、升级策略和预算压力 | +| [捕获与分流](./user-docs/captures-triage.md) | 自动模式中的随手记录,以及自动分流处理 | +| [工作流可视化器](./user-docs/visualizer.md) | 用于查看进度、依赖、指标和时间线的交互式 TUI 叠层 | +| [成本管理](./user-docs/cost-management.md) | 预算上限、成本跟踪、成本预测和执行策略 | +| [Git 策略](./user-docs/git-strategy.md) | 工作树隔离、分支模型和合并行为 | +| [并行编排](./user-docs/parallel-orchestration.md) | 通过隔离的工作线程和协调机制同时运行多个 milestones | +| [团队协作](./user-docs/working-in-teams.md) | 唯一 milestone ID、`.gitignore` 设置和共享规划产物 | +| [技能](./user-docs/skills.md) | 内置技能、技能发现和自定义技能编写 | +| [从 v1 迁移](./user-docs/migration.md) | 将 `.planning` 目录迁移到新的 `.gsd` 格式 | +| [故障排查](./user-docs/troubleshooting.md) | 常见问题、`/gsd doctor`、`/gsd forensics` 和恢复流程 | +| [Web 界面](./user-docs/web-interface.md) | 通过 `gsd --web` 使用基于浏览器的项目管理界面 | +| [VS Code 扩展](../../vscode-extension/README.md) | 聊天参与者、侧边栏仪表板以及 VS Code 的 RPC 集成 | diff --git a/docs/zh-CN/user-docs/auto-mode.md b/docs/zh-CN/user-docs/auto-mode.md new file mode 100644 index 000000000..914355b5e --- /dev/null +++ b/docs/zh-CN/user-docs/auto-mode.md @@ -0,0 +1,301 @@ +# 自动模式 + +自动模式是 GSD 的自主执行引擎。运行 `/gsd auto`,然后离开;回来时你会看到已经构建好的软件,以及干净的 git 历史。 + +## 工作原理 + +自动模式本质上是一个**由磁盘文件驱动的状态机**。它会读取 `.gsd/STATE.md`,确定下一个工作单元,创建一个新的 agent 会话,把所有相关上下文预先内联到一个聚焦 prompt 中,再让 LLM 执行。LLM 完成后,自动模式会再次读取磁盘状态,并派发下一个工作单元。 + +### 执行循环 + +每个 slice 都会自动经历以下阶段: + +``` +Plan (with integrated research) → Execute (per task) → Complete → Reassess Roadmap → Next Slice + ↓ (all slices done) + Validate Milestone → Complete Milestone +``` + +- **Plan**:巡检代码库、研究相关文档,把 slice 分解成带 must-have 的 task +- **Execute**:在新的上下文窗口中逐个执行 task +- **Complete**:写 summary、UAT 脚本、标记 roadmap、提交代码 +- **Reassess**:检查 roadmap 是否仍然合理 +- **Validate Milestone**:在所有 slices 完成后做一致性校验,把 roadmap 的成功标准与实际结果对照,避免在封板前漏掉关键缺口 + +## 关键特性 + +### 每个单元都用全新会话 + +每个 task、research 阶段和 planning 步骤都会得到一个干净的上下文窗口。没有历史垃圾堆积,也不会因为上下文膨胀导致质量下降。派发 prompt 中已经包含 task plan、历史 summary、依赖上下文、决策记录等必要信息,因此 LLM 一开始就能对齐,而不必先花工具调用去读文件。 + +### 预加载上下文 + +派发 prompt 会精心组装以下内容: + +| 内联产物 | 用途 | +|----------|------| +| Task plan | 告诉 agent 要构建什么 | +| Slice plan | 说明当前 task 在整体中的位置 | +| 历史 task summaries | 告诉 agent 已经完成了什么 | +| 依赖 summary | 提供跨 slice 上下文 | +| Roadmap 摘要 | 说明整体方向 | +| Decisions register | 提供架构上下文 | + +具体内联多少内容由你的 [token profile](./token-optimization.md) 控制。`budget` 模式只内联最少上下文,`quality` 模式则把所有内容都内联进去。 + +### Git 隔离 + +GSD 支持三种 milestone 隔离模式(通过偏好设置中的 `git.isolation` 配置): + +- **`worktree`**(默认):每个 milestone 都运行在 `.gsd/worktrees//` 下自己的 git worktree 中,分支名为 `milestone/`。所有 slice 工作都顺序提交,不需要切分支,也不会在 milestone 内部产生合并冲突。milestone 完成后,再整体 squash merge 回主分支,形成一个干净提交。 +- **`branch`**:工作发生在项目根目录下的 `milestone/` 分支上。适合子模块较多、worktree 表现不佳的仓库。 +- **`none`**:直接在当前分支工作。没有 worktree,也没有 milestone 分支。适合文件隔离会破坏开发工具的热重载场景。 + +详见 [Git 策略](./git-strategy.md)。 + +### 并行执行 + +当项目里存在彼此独立的 milestones 时,可以同时运行它们。每个 milestone 都拥有自己的 worker 进程和 worktree。配置与用法见 [并行编排](./parallel-orchestration.md)。 + +### 崩溃恢复 + +自动模式会用锁文件跟踪当前工作单元。如果会话中途退出,下一次执行 `/gsd auto` 时,会读取残留的会话文件,从所有已经落盘的工具调用中综合生成一份恢复简报,然后带着完整上下文继续执行。 + +**Headless 自动重启(v2.26):** 当运行 `gsd headless auto` 时,崩溃会触发带指数退避的自动重启(5s → 10s → 30s 上限,默认最多 3 次)。通过 `--max-restarts N` 配置。SIGINT/SIGTERM 不会触发重启。结合崩溃恢复机制,这让真正的“跑一夜直到完成”成为可能。 + +### Provider 错误恢复 + +GSD 会对 provider 错误分类,并在安全时自动恢复: + +| 错误类型 | 示例 | 动作 | +|----------|------|------| +| **限流** | 429、`too many requests` | 按 `retry-after` 头等待,或默认 60 秒后自动恢复 | +| **服务端错误** | 500、502、503、`overloaded`、`api_error` | 30 秒后自动恢复 | +| **永久错误** | `unauthorized`、`invalid key`、`billing` | 无限期暂停,等待人工恢复 | + +对临时性错误通常不需要人工介入,系统会短暂暂停后自动继续。 + +### 增量记忆(v2.26) + +GSD 会维护一个 `KNOWLEDGE.md` 文件,作为项目特有规则、模式和经验的追加式记录。agent 在每个工作单元开始时都会读取它;当发现反复出现的问题、非显而易见的模式或未来会话需要遵循的规则时,也会把内容追加进去。这样一来,自动模式就有了跨会话、跨上下文窗口的持久记忆。 + +### 上下文压力监视器(v2.26) + +当上下文使用达到 70% 时,GSD 会向 agent 发送收尾信号,提醒它优先完成可持久化的输出(例如提交、写 summary),避免在 task 中途因为上下文打满而什么都没来得及落盘。 + +### 有意义的提交信息(v2.26) + +提交信息不是通用的 “complete task”,而是从 task summary 生成的。每条提交消息都反映了真正完成了什么,因此 `git log` 看起来更像一份高质量的变更日志。 + +### 卡死检测(v2.39) + +GSD 使用滑动窗口分析来检测卡死循环。它不只是简单地统计“同一单元是否重复派发两次”,而是会分析近期派发历史中的重复模式,因此既能发现单点重复,也能发现 A→B→A→B 这样的循环。一旦检测到,GSD 会先带着更深的诊断 prompt 重试一次;如果仍然失败,自动模式就会停止,并指出它原本期待的具体文件,便于你介入。 + +这种滑动窗口方法能降低合法重试场景(例如可自动修复的 verification 失败)的误报,同时更快抓到真正的卡死循环。 + +### 事后取证(v2.40) + +`/gsd forensics` 是一个面向自动模式失败分析的全访问 GSD 调试器,提供: + +- **异常检测**:对卡死循环、成本尖峰、超时、产物缺失和崩溃做结构化识别,并标注严重级别 +- **单元追踪**:最近 10 次单元执行,包含错误细节和执行时长 +- **指标分析**:成本、token 数量和执行时间拆分 +- **Doctor 集成**:把 `/gsd doctor` 中的结构性健康问题一起纳入 +- **LLM 引导调查**:启动一个拥有完整工具访问权限的 agent 会话来调查根因 + +``` +/gsd forensics [optional problem description] +``` + +更多诊断方式见 [故障排查](./troubleshooting.md)。 + +### 超时监管 + +三层超时机制可以防止会话失控: + +| 超时类型 | 默认值 | 行为 | +|----------|--------|------| +| Soft | 20 分钟 | 警告 LLM 应该开始收尾 | +| Idle | 10 分钟 | 检测停滞并介入 | +| Hard | 30 分钟 | 暂停自动模式 | + +恢复引导会提醒 LLM 在真正超时前尽量完成可持久化输出。配置方式如下: + +```yaml +auto_supervisor: + soft_timeout_minutes: 20 + idle_timeout_minutes: 10 + hard_timeout_minutes: 30 +``` + +### 成本跟踪 + +每个工作单元的 token 使用量和成本都会被记录,并按阶段、slice 和模型拆分。仪表板会显示运行总量和预测值。预算上限可以在超支前主动暂停自动模式。 + +详见 [成本管理](./cost-management.md)。 + +### 自适应重规划 + +每完成一个 slice,roadmap 都会重新评估。如果最新工作暴露出会改变计划的新信息,后续 slices 就会在继续前被重新排序、添加或删除。`balanced` 和 `budget` token profile 可以跳过这一阶段。 + +### 验证强制执行(v2.26) + +你可以配置 shell 命令,让它们在每个 task 执行后自动运行: + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true # 默认开启自动重试修复 +verification_max_retries: 2 # 最大重试次数(默认 2) +``` + +一旦失败,agent 会看到 verification 输出并尝试自动修复后重试,再决定是否继续。这意味着代码质量门禁是靠机制强制执行,而不是靠 LLM“自觉遵守”。 + +### Slice 讨论门(v2.26) + +如果你希望每个 slice 开始前都先经过人工确认: + +```yaml +require_slice_discussion: true +``` + +自动模式会在每个 slice 开始前暂停,并把 slice 上下文展示出来供你讨论。确认后才继续执行。适用于高风险项目,尤其是你希望 agent 开始构建前先复核计划的时候。 + +### HTML 报告(v2.26) + +每当 milestone 完成后,GSD 都会在 `.gsd/reports/` 中自动生成一个自包含的 HTML 报告。报告包括项目摘要、进度树、slice 依赖图(SVG DAG)、成本 / Token 柱状图、执行时间线、变更日志和知识库。没有外部依赖,所有 CSS 和 JS 都会内联。 + +```yaml +auto_report: true # 默认开启 +``` + +你也可以随时手动执行 `/gsd export --html` 生成报告,或通过 `/gsd export --html --all`(v2.28)为所有 milestones 一次性生成报告。 + +### 故障恢复强化(v2.28) + +v2.28 通过多项机制强化了自动模式的可靠性:原子文件写入可避免崩溃时损坏文件;OAuth 拉取超时(30 秒)避免无限挂起;RPC 子进程退出能被检测并报告;blob 垃圾回收可防止磁盘无限增长。结合已有的崩溃恢复和 headless 自动重启,自动模式可以真正支持“扔在那里跑一晚上”的场景。 + +### 流水线架构(v2.40) + +自动循环采用的是线性阶段流水线,而非递归派发。每轮迭代都经过明确的阶段: + +1. **Pre-Dispatch**:校验状态、检查守卫、解析模型偏好 +2. **Dispatch**:使用聚焦 prompt 执行当前单元 +3. **Post-Unit**:关闭该单元、更新缓存、执行清理 +4. **Verification**:可选验证门(lint、test 等) +5. **Stuck Detection**:滑动窗口模式分析 + +这种线性流程更容易调试,占用更少内存(没有递归调用栈),也使错误恢复更清晰,因为每个阶段都有明确的入口和出口条件。 + +### 实时健康可见性(v2.40) + +`/gsd doctor` 发现的问题现在会实时出现在三个地方: + +- **Dashboard widget**:健康指示器,显示问题数量和严重级别 +- **Workflow visualizer**:状态面板中展示问题 +- **HTML reports**:生成报告时带出完整健康信息 + +问题按严重程度分为:`error`(阻塞自动模式)、`warning`(不阻塞)和 `info`(提示性质)。自动模式会在派发时检查健康状态,并可在关键问题出现时主动暂停。 + +### Prompt 中的技能激活(v2.39) + +配置好的技能会被自动解析并注入派发 prompt。agent 会收到一个 “Available Skills” 区块,列出当前上下文匹配的技能,来源包括: + +- `always_use_skills`:始终注入 +- `prefer_skills`:以偏好形式注入 +- `skill_rules`:根据 `when` 条件做条件激活 + +技能路由偏好详见 [配置](./configuration.md)。 + +## 控制自动模式 + +### 启动 + +``` +/gsd auto +``` + +### 暂停 + +按 **Escape**。对话会被保留,你可以继续和 agent 交互、查看状态,或者稍后恢复。 + +### 恢复 + +``` +/gsd auto +``` + +自动模式会读取磁盘状态,并从中断处继续。 + +### 停止 + +``` +/gsd stop +``` + +优雅地停止自动模式。这个命令也可以从另一个终端执行。 + +### 引导 + +``` +/gsd steer +``` + +在不中断流水线的情况下,强制修改计划文档。修改会在下一个阶段边界生效。 + +### 捕获 + +``` +/gsd capture "add rate limiting to API endpoints" +``` + +随手记录想法,不打断当前执行。Captures 会在 tasks 之间自动 triage。详见 [捕获与分流](./captures-triage.md)。 + +### 可视化 + +``` +/gsd visualize +``` + +打开工作流可视化器,交互式查看进度、依赖、指标和时间线。详见 [工作流可视化器](./visualizer.md)。 + +## 仪表板 + +`Ctrl+Alt+G` 或 `/gsd status` 会显示实时进度: + +- 当前 milestone、slice 和 task +- 自动模式的已运行时间和当前阶段 +- 每个单元的成本与 token 拆分 +- 成本预测 +- 已完成和进行中的单元 +- 待 triage 的 capture 数量(如果存在) +- 并行 worker 状态(运行并行 milestones 时显示,也包含 80% 预算预警) + +## 跳过阶段 + +Token profile 可以通过跳过某些阶段来降低成本: + +| 阶段 | `budget` | `balanced` | `quality` | +|------|----------|------------|-----------| +| Milestone Research | 跳过 | 执行 | 执行 | +| Slice Research | 跳过 | 跳过 | 执行 | +| Reassess Roadmap | 跳过 | 执行 | 执行 | + +更多细节见 [Token 优化](./token-optimization.md)。 + +## 动态模型路由 + +启用后,自动模式会为简单工作单元(例如 slice completion、UAT)自动选择更便宜的模型,并把昂贵模型保留给复杂工作(例如重规划或架构 task)。详见 [动态模型路由](./dynamic-model-routing.md)。 + +## 响应式 Task 执行(v2.38) + +当在偏好中设置 `reactive_execution: true` 时,GSD 会从 task plan 中的 IO 注解推导依赖图。互不冲突的 tasks(没有共享文件读写)会通过 subagents 并行派发,而存在依赖的 tasks 则等待前驱完成。 + +```yaml +reactive_execution: true # 默认关闭 +``` + +依赖图推导是纯函数且确定性的:它会解析 ready-set、检测冲突和死锁,并做相应防护。并行批次中的 verification 结果会被沿用,因此某些 tasks 如果已经通过验证,后续同一 slice 中其他 tasks 完成时就不需要再次验证。 + +这套实现位于 `reactive-graph.ts`(负责图推导、ready-set 解析、冲突 / 死锁检测),并集成到了 `auto-dispatch.ts` 和 `auto-prompts.ts`。 diff --git a/docs/zh-CN/user-docs/captures-triage.md b/docs/zh-CN/user-docs/captures-triage.md new file mode 100644 index 000000000..76a51c66c --- /dev/null +++ b/docs/zh-CN/user-docs/captures-triage.md @@ -0,0 +1,84 @@ +# 捕获与分流 + +*引入于 v2.19.0* + +Captures 允许你在自动模式执行过程中随手记录想法,而不必打断当前流程。你可以把新想法、bug 或范围变更记录下来,让 GSD 在 tasks 之间的自然间隙中进行分流处理。 + +## 快速开始 + +在自动模式运行期间(或任何时候): + +``` +/gsd capture "add rate limiting to the API endpoints" +/gsd capture "the auth flow should support OAuth, not just JWT" +``` + +这些 capture 会追加到 `.gsd/CAPTURES.md`,并在 tasks 之间自动参与 triage。 + +## 工作原理 + +### 流程 + +``` +capture → triage → confirm → resolve → resume +``` + +1. **Capture**:`/gsd capture "thought"` 会带着时间戳和唯一 ID 追加到 `.gsd/CAPTURES.md` +2. **Triage**:在 tasks 之间的自然衔接点(`handleAgentEnd` 中),GSD 会检测待处理 capture 并进行分类 +3. **Confirm**:向用户展示建议的处理方式,由用户确认或调整 +4. **Resolve**:应用该处理方案(插入 task、触发重规划、延期等) +5. **Resume**:自动模式继续运行 + +### 分类类型 + +每条 capture 都会被分类到以下五种类型之一: + +| 类型 | 含义 | 处理方式 | +|------|------|----------| +| `quick-task` | 小型、可独立完成的修复 | 立即以内联 quick task 执行 | +| `inject` | 当前 slice 需要新增 task | 将 task 注入当前活跃的 slice plan | +| `defer` | 重要但不紧急 | 延后到 roadmap reassessment 时处理 | +| `replan` | 改变当前实现路径 | 带着 capture 上下文触发 slice replan | +| `note` | 仅供记录,不需要动作 | 记录并确认,不修改计划 | + +### 自动分流 + +在自动模式下,triage 会在 tasks 之间自动触发。triage prompt 会收到: + +- 所有待处理 captures +- 当前 slice plan +- 当前活跃 roadmap + +LLM 会对每条 capture 进行分类并给出建议处理方案。会修改计划的处理方式(`inject`、`replan`)需要用户确认。 + +### 手动分流 + +你也可以随时手动触发 triage: + +``` +/gsd triage +``` + +这在你积累了多条 capture,并希望在下一个自然间隙之前先处理掉它们时很有用。 + +## 仪表板集成 + +当有待 triage 的 capture 时,进度组件会显示一个待处理数量徽标。无论是在 `Ctrl+Alt+G` 仪表板里,还是自动模式进度组件里,都能看到这个提示。 + +## 上下文注入 + +Capture 上下文会自动注入到: + +- **Replan-slice prompts**:让重规划知道是什么触发了它 +- **Reassess-roadmap prompts**:让被延后的 capture 也会影响 roadmap 决策 + +## Worktree 感知 + +Captures 总是写回**原始项目根目录**下的 `.gsd/CAPTURES.md`,而不是 worktree 的本地副本。这样从 steering 终端记录的内容,也能被运行在 worktree 里的自动模式会话看到。 + +## 命令 + +| 命令 | 说明 | +|------|------| +| `/gsd capture "text"` | 记录一个想法(单词时引号可省略) | +| `/gsd triage` | 手动触发待处理 captures 的 triage | diff --git a/docs/zh-CN/user-docs/claude-code-auth-compliance.md b/docs/zh-CN/user-docs/claude-code-auth-compliance.md new file mode 100644 index 000000000..38d84934b --- /dev/null +++ b/docs/zh-CN/user-docs/claude-code-auth-compliance.md @@ -0,0 +1,177 @@ +# Claude Code 认证合规性研究 + +日期:2026-04-10 + +## 执行摘要 + +Anthropic 当前公开的指导原则边界非常清晰: + +- Anthropic 自家的原生应用,包括 Claude Code,可以使用 Claude 订阅认证。 +- 第三方工具应优先通过 Claude Console 或受支持云 provider 的 API key 进行认证。 +- 任何伪装身份、绕过订阅限制转发第三方流量、或以其他方式违反 Anthropic 条款的应用,都被明确禁止。 + +对于 GSD2,安全路径应当是: + +1. 把本地 Claude Code 视为一个外部、已认证的运行时。 +2. 永远不要让 GSD 用户通过 GSD 托管的 Anthropic OAuth 去登录 Claude 订阅。 +3. 永远不要把 Claude.ai 的订阅 OAuth 凭据交换成 bearer token,然后冒充 Claude Code 直接调用 Anthropic API。 +4. 如果 GSD 需要直接访问 Anthropic API,则必须要求使用 Claude Console API key、Bedrock、Vertex 或其他被明确支持的 provider 路径。 + +## Anthropic 明确允许的内容 + +### 1. Claude Code 本身可以使用 Claude 订阅认证 + +Anthropic 帮助中心说明:Claude Pro / Max 用户应安装 Claude Code,运行 `claude`,并“使用与你登录 Claude 相同的凭据”完成登录。文档还指出,这样会把订阅直接连接到 Claude Code,并且 `/login` 是切换账户类型的方式。Team / Enterprise 文章对组织账号也给出了同样流程。 + +对 GSD2 的含义: + +- 允许用户在真正的 `claude` CLI 内部完成认证,是符合 Anthropic 文档流程的 +- 检测 `claude auth status`,然后通过本地 CLI 或官方 Claude Code SDK 路由工作,是风险最低的方案 + +### 2. Claude Code 同时支持订阅 OAuth 和 API 凭据 + +Anthropic 的 Claude Code 文档说明,支持的认证类型包括 Claude.ai 凭据、Claude API 凭据、Azure Auth、Bedrock Auth 和 Vertex Auth。文档还定义了认证优先级: + +1. cloud provider 凭据 +2. `ANTHROPIC_AUTH_TOKEN` +3. `ANTHROPIC_API_KEY` +4. `apiKeyHelper` +5. 来自 `/login` 的订阅 OAuth + +对 GSD2 的含义: + +- 如果 GSD2 是通过 shell 调用或嵌入 Claude Code,那么它应尊重 Claude Code 自己的凭据选择逻辑,而不是再发明一套平行的 Anthropic OAuth 流程 +- 对需要动态短期凭据、但又不希望把原始 API key 交给工具的组织来说,`apiKeyHelper` 是一个干净的企业级出口 + +### 3. Anthropic 的商业使用可通过 API keys 和受支持的云 provider 实现 + +Anthropic 的商业条款约束的是 API keys 及其相关 Anthropic 服务,包括供客户构建给终端用户使用的产品。面向团队的认证文档推荐使用 Claude for Teams / Enterprise、Claude Console、Bedrock、Vertex 或 Microsoft Foundry。 + +对 GSD2 的含义: + +- 如果 GSD2 作为一个产品面向用户提供 Anthropic 能力,那么任何直接 Anthropic 访问都应走商业认证路径,而不是复用订阅 token + +## Anthropic 明确警告的内容 + +Anthropic 当前的 “Logging in to your Claude account” 文章给出了最清晰的表述: + +- 订阅计划仅适用于 Anthropic 原生应用的日常使用,包括 Claude Web、桌面端、移动端和 Claude Code +- 对第三方工具(包括开源项目),“首选方式”是通过 Claude Console 或受支持云 provider 的 API key 认证 +- 如果你正在为他人构建产品、应用或工具,应使用 Claude Console API key 或受支持云 provider 的认证方式 +- 任何伪装身份、绕过订阅限制转发第三方流量、或以其他方式违反条款的工具,都被禁止 + +Anthropic 的消费条款还额外加入两项限制: + +- 用户不得把账户登录信息、API keys 或账户凭据分享给他人 +- 除非是通过 Anthropic API key 访问服务,或者 Anthropic 明确允许,否则用户不得通过自动化或非人工方式访问这些服务 + +对 GSD2 的含义: + +- 由 GSD 托管的 Anthropic 订阅 OAuth 流程属于高风险 +- 在 GSD 自己的 API client 中复用用户 Claude 订阅凭据属于高风险 +- 任何会让 Anthropic 误以为请求来自 Claude Code、但实际上来自 GSD 基础设施的流程,都越界了 + +## 当前 GSD2 发现 + +### 低风险 / 已对齐的部分 + +- `src/resources/extensions/claude-code-cli/index.ts` + 把 `claude-code` 注册成 `externalCli` provider,并通过 Anthropic 官方的 `@anthropic-ai/claude-agent-sdk` 路由 +- `src/resources/extensions/claude-code-cli/readiness.ts` + 只通过 `claude --version` 和 `claude auth status` 检查本地 CLI 是否存在以及认证状态 +- `src/onboarding.ts` + TUI onboarding 已移除 Anthropic 浏览器 OAuth,并把本地 Claude Code 路由标记为符合 TOS 的路径 +- `src/cli.ts` + 当检测到本地 CLI 可用时,会把用户从 `anthropic` 迁移到 `claude-code` + +这些方向是正确的,因为此时 GSD 使用的是用户自己本地安装的 Claude Code,作为已认证的 Anthropic surface。 + +### 中高风险部分 —— 已解决 + +所有 Anthropic OAuth 代码路径都已被移除: + +- `packages/pi-ai/src/utils/oauth/anthropic.ts` —— **已删除**,不再实现 Anthropic OAuth 流程 +- `packages/pi-ai/src/utils/oauth/index.ts` —— **已更新**,内置注册表中移除了 `anthropicOAuthProvider` +- `src/web/onboarding-service.ts` —— **已更新**,将 Anthropic 标记为 `supportsOAuth: false` +- `packages/daemon/src/orchestrator.ts` —— **已更新**,去掉 OAuth token refresh,改为要求 `ANTHROPIC_API_KEY` 环境变量 +- `packages/pi-ai/src/providers/anthropic.ts` —— **已更新**,移除 OAuth client 分支,`isOAuthToken` 始终返回 false + +## 针对 GSD2 的建议策略 + +将下面内容作为仓库规则: + +- Claude 订阅认证只允许存在于 Anthropic 自有 surface 中: + - `claude` CLI + - 基于本地已认证 Claude Code 安装的 Claude Code SDK + - 其他 Anthropic 文档明确支持的原生流程 +- GSD2 不得为终端用户实现自己的 Anthropic 订阅 OAuth 流程 +- GSD2 不得持久化 Anthropic 订阅 OAuth token,供后续 API 调用使用 +- GSD2 不得使用由 GSD 获取的订阅 OAuth tokens 来发送 Anthropic API 流量 +- GSD2 可以支持 Anthropic 直接访问,但仅限以下方式: + - `ANTHROPIC_API_KEY` + - 保存在 auth storage 中的 Claude Console API keys + - `apiKeyHelper` + - Bedrock / Vertex / Foundry + - 本地 Claude Code provider + +## 推荐实现方案 + +### 方案 A:安全的最小合规清理 + +1. 从内置 OAuth provider 注册表中移除 Anthropic +2. 把 Web onboarding 中的 Anthropic 改为只支持 API key +3. 当 `claude auth status` 成功时,继续保留 `claude-code` 作为推荐路径 +4. 增加明确的 UI 文案: + - “Claude 订阅用户:请登录本地 Claude Code app / CLI,而不是 GSD。” +5. 阻止任何把 Anthropic OAuth 凭据转换成 GSD 托管请求 API 认证的迁移或代码路径 + +这是让仓库与 Anthropic 当前公开指导对齐的最快路径。 + +### 方案 B:企业级安全的 Anthropic 支持 + +把 Anthropic 支持拆分成三种清晰模式: + +- `claude-code` + 只使用本地已认证的 `claude` 运行时 +- `anthropic-api` + 使用 Console API keys 或 `apiKeyHelper` +- `anthropic-cloud` + 使用 Bedrock、Vertex 或 Foundry + +然后彻底移除任何模糊的 `anthropic` 浏览器登录路径。 + +这是长期最好的 UX,因为它清晰地区分了: + +- 基于订阅的原生使用 +- 基于 API 计费的使用 +- 通过云路由的使用 + +## 具体仓库后续动作 —— 已完成 + +1. ~~删除或禁用 `packages/pi-ai/src/utils/oauth/anthropic.ts`。~~ **已完成** —— 文件已删除 +2. ~~从 `packages/pi-ai/src/utils/oauth/index.ts` 中移除 `anthropicOAuthProvider`。~~ **已完成** +3. ~~修改 `src/web/onboarding-service.ts`,让 Anthropic 不再声称支持 OAuth。~~ **已完成** +4. ~~审查 `packages/daemon/src/orchestrator.ts` 以及其他把 Anthropic OAuth access token 当作 API 凭据使用的调用方。~~ **已完成** —— daemon 现在要求 `ANTHROPIC_API_KEY` +5. ~~更新文档 / UI 文案:直接 API 使用优先 `anthropic-api`,订阅使用优先 `claude-code`。~~ **已完成** —— `providers.md` 和 `getting-started.md` 已更新 +6. 添加测试,防止 Anthropic 订阅 OAuth 通过 onboarding / provider registry 被重新引入 —— **TODO** + +## 决策规则 + +如果某个拟议中的 GSD2 特性需要访问 Anthropic,先问一个问题: + +“GSD 是以 GSD 的身份调用 Anthropic,还是 GSD 只是把工作委派给用户本地已认证的 Claude Code 运行时?” + +- 如果 GSD 是以 GSD 的身份调用 Anthropic:必须要求 API key 或受支持的云认证 +- 如果 GSD 只是委派给本地 Claude Code:可以接受,前提是 GSD 自身不会拦截、生成或重放订阅凭据 + +## 审查过的来源 + +- Anthropic Help Center: “Logging in to your Claude account” +- Anthropic Help Center: “Using Claude Code with your Pro or Max plan” +- Anthropic Help Center: “Use Claude Code with your Team or Enterprise plan” +- Anthropic Help Center: “Managing API key environment variables in Claude Code” +- Anthropic Help Center: “API Key Best Practices: Keeping Your Keys Safe and Secure” +- Claude Code Docs:getting started / authentication / team / settings / IAM +- Anthropic Commercial Terms of Service +- Anthropic Consumer Terms of Service +- Anthropic Usage Policy diff --git a/docs/zh-CN/user-docs/commands.md b/docs/zh-CN/user-docs/commands.md new file mode 100644 index 000000000..93ba35098 --- /dev/null +++ b/docs/zh-CN/user-docs/commands.md @@ -0,0 +1,308 @@ +# 命令参考 + +## 会话命令 + +| 命令 | 说明 | +|------|------| +| `/gsd` | Step mode:一次执行一个工作单元,并在每步之间暂停 | +| `/gsd next` | 显式 Step mode(与 `/gsd` 相同) | +| `/gsd auto` | 自动模式:research、plan、execute、commit,然后重复 | +| `/gsd quick` | 在不经过完整 planning 开销的情况下,执行一个带 GSD 保证的 quick task(原子提交、状态跟踪) | +| `/gsd stop` | 优雅地停止自动模式 | +| `/gsd pause` | 暂停自动模式(保留状态,可用 `/gsd auto` 恢复) | +| `/gsd steer` | 在执行过程中强制修改 plan 文档 | +| `/gsd discuss` | 讨论架构和决策(可与自动模式并行使用) | +| `/gsd status` | 进度仪表板 | +| `/gsd widget` | 循环切换仪表板组件:full / small / min / off | +| `/gsd queue` | 给未来 milestones 排队和重排(自动模式中也安全) | +| `/gsd capture` | 随手记录一个想法,不打断当前流程(自动模式中可用) | +| `/gsd triage` | 手动触发待处理 captures 的 triage | +| `/gsd dispatch` | 直接派发一个指定阶段(research、plan、execute、complete、reassess、uat、replan) | +| `/gsd history` | 查看执行历史(支持 `--cost`、`--phase`、`--model` 过滤) | +| `/gsd forensics` | 全访问 GSD 调试器:用于分析自动模式失败,支持结构化异常检测、单元追踪和 LLM 引导的根因分析 | +| `/gsd cleanup` | 清理 GSD 状态文件和过期 worktrees | +| `/gsd visualize` | 打开工作流可视化器(进度、依赖、指标、时间线) | +| `/gsd export --html` | 为当前或已完成的 milestone 生成自包含 HTML 报告 | +| `/gsd export --html --all` | 一次性为所有 milestones 生成回顾报告 | +| `/gsd update` | 在会话内更新到最新版本 | +| `/gsd knowledge` | 添加持久化项目知识(规则、模式或经验) | +| `/gsd fast` | 为支持的模型切换 service tier(优先级 API 路由) | +| `/gsd rate` | 评价上一个单元所用模型层级(over / ok / under),帮助改进自适应路由 | +| `/gsd changelog` | 查看分类后的发行说明 | +| `/gsd logs` | 浏览活动日志、调试日志和指标 | +| `/gsd remote` | 控制远程自动模式 | +| `/gsd help` | 查看所有 GSD 子命令的分类参考及说明 | + +## 配置与诊断 + +| 命令 | 说明 | +|------|------| +| `/gsd prefs` | 模型选择、超时和预算上限 | +| `/gsd mode` | 切换工作流模式(solo / team),同时应用与 milestone ID、git 提交行为和文档相关的协调默认值 | +| `/gsd config` | 重新运行 provider 配置向导(LLM provider + 工具 key) | +| `/gsd keys` | API key 管理器:列出、添加、移除、测试、轮换、doctor | +| `/gsd doctor` | 运行时健康检查与自动修复;问题会实时显示在 widget、visualizer 和 HTML reports 中(v2.40) | +| `/gsd inspect` | 查看 SQLite DB 诊断信息 | +| `/gsd init` | 项目初始化向导:检测、配置并 bootstrap `.gsd/` | +| `/gsd setup` | 查看全局 setup 状态和配置 | +| `/gsd skill-health` | 技能生命周期仪表板:使用统计、成功率、token 趋势、过期告警 | +| `/gsd skill-health ` | 查看某个 skill 的详细信息 | +| `/gsd skill-health --declining` | 只显示被标记为表现下降的 skills | +| `/gsd skill-health --stale N` | 显示 N 天以上未使用的 skills | +| `/gsd hooks` | 查看已配置的 post-unit 和 pre-dispatch hooks | +| `/gsd run-hook` | 手动触发一个指定 hook | +| `/gsd migrate` | 将 v1 的 `.planning` 目录迁移到 `.gsd` 格式 | + +## Milestone 管理 + +| 命令 | 说明 | +|------|------| +| `/gsd new-milestone` | 创建一个新的 milestone | +| `/gsd skip` | 阻止某个工作单元被自动模式派发 | +| `/gsd undo` | 回退上一个已完成单元 | +| `/gsd undo-task` | 重置某个特定 task 的完成状态(DB + markdown) | +| `/gsd reset-slice` | 重置某个 slice 及其所有 tasks(DB + markdown) | +| `/gsd park` | Park 一个 milestone,不删除,只跳过 | +| `/gsd unpark` | 重新激活一个已 park 的 milestone | +| Discard milestone | 在 `/gsd` 向导的 “Milestone actions” → “Discard” 中可用 | + +## 并行编排 + +| 命令 | 说明 | +|------|------| +| `/gsd parallel start` | 分析可并行性、确认后启动 workers | +| `/gsd parallel status` | 显示所有 workers 的状态、进度和成本 | +| `/gsd parallel stop [MID]` | 停止所有 workers,或停止某个指定 milestone 的 worker | +| `/gsd parallel pause [MID]` | 暂停所有 workers,或暂停某个指定 worker | +| `/gsd parallel resume [MID]` | 恢复已暂停的 workers | +| `/gsd parallel merge [MID]` | 把已完成的 milestones 合并回 main | + +完整文档见 [并行编排](./parallel-orchestration.md)。 + +## Workflow Templates(v2.42) + +| 命令 | 说明 | +|------|------| +| `/gsd start` | 启动一个 workflow template(bugfix、spike、feature、hotfix、refactor、security-audit、dep-upgrade、full-project) | +| `/gsd start resume` | 恢复一个进行中的 workflow | +| `/gsd templates` | 列出可用 workflow templates | +| `/gsd templates info ` | 查看某个 template 的详细信息 | + +## 自定义 Workflows(v2.42) + +| 命令 | 说明 | +|------|------| +| `/gsd workflow new` | 创建一个新的 workflow definition(通过 skill) | +| `/gsd workflow run ` | 创建一个 run 并启动自动模式 | +| `/gsd workflow list` | 列出 workflow runs | +| `/gsd workflow validate ` | 校验一个 workflow YAML definition | +| `/gsd workflow pause` | 暂停自定义 workflow 的自动模式 | +| `/gsd workflow resume` | 恢复已暂停的自定义 workflow 自动模式 | + +## 扩展 + +| 命令 | 说明 | +|------|------| +| `/gsd extensions list` | 列出所有扩展及其状态 | +| `/gsd extensions enable ` | 启用一个被禁用的扩展 | +| `/gsd extensions disable ` | 禁用一个扩展 | +| `/gsd extensions info ` | 查看扩展详情 | + +## cmux 集成 + +| 命令 | 说明 | +|------|------| +| `/gsd cmux status` | 显示 cmux 检测结果、prefs 和能力 | +| `/gsd cmux on` | 启用 cmux 集成 | +| `/gsd cmux off` | 禁用 cmux 集成 | +| `/gsd cmux notifications on/off` | 切换 cmux 桌面通知 | +| `/gsd cmux sidebar on/off` | 切换 cmux 侧边栏元数据 | +| `/gsd cmux splits on/off` | 切换 cmux subagent 可视化分屏 | + +## GitHub Sync(v2.39) + +| 命令 | 说明 | +|------|------| +| `/github-sync bootstrap` | 初始配置:根据当前 `.gsd/` 状态创建 GitHub Milestones、Issues 和 draft PRs | +| `/github-sync status` | 显示同步映射数量(milestones、slices、tasks) | + +在偏好设置里启用 `github.enabled: true`。要求已安装并认证 `gh` CLI。同步映射会保存在 `.gsd/.github-sync.json`。 + +## Git 命令 + +| 命令 | 说明 | +|------|------| +| `/worktree`(`/wt`) | Git worktree 生命周期管理:create、switch、merge、remove | + +## 会话管理 + +| 命令 | 说明 | +|------|------| +| `/clear` | 启动一个新会话(`/new` 的别名) | +| `/exit` | 优雅退出,会在退出前保存会话状态 | +| `/kill` | 立即终止 GSD 进程 | +| `/model` | 切换当前 active model | +| `/login` | 登录一个 LLM provider | +| `/thinking` | 在会话中切换 thinking level | +| `/voice` | 切换实时语音转文字(macOS、Linux) | + +## 键盘快捷键 + +| 快捷键 | 动作 | +|--------|------| +| `Ctrl+Alt+G` | 切换 dashboard overlay | +| `Ctrl+Alt+V` | 切换语音转录 | +| `Ctrl+Alt+B` | 显示后台 shell 进程 | +| `Ctrl+V` / `Alt+V` | 从剪贴板粘贴图片(截图 → vision 输入) | +| `Escape` | 暂停自动模式(保留对话) | + +> **注意:** 在不支持 Kitty keyboard protocol 的终端中(如 macOS Terminal.app、JetBrains IDEs),界面会显示 slash-command 形式的回退命令,而不是 `Ctrl+Alt` 快捷键。 +> +> **提示:** 如果 `Ctrl+V` 被终端拦截(例如 Warp),可改用 `Alt+V` 粘贴剪贴板图片。 + +## CLI 参数 + +| 参数 | 说明 | +|------|------| +| `gsd` | 启动新的交互式会话 | +| `gsd --continue`(`-c`) | 恢复当前目录最近一次会话 | +| `gsd --model ` | 为当前会话覆盖默认模型 | +| `gsd --print "msg"`(`-p`) | 单次 prompt 模式(无 TUI) | +| `gsd --mode ` | 非交互使用时的输出模式 | +| `gsd --list-models [search]` | 列出可用模型并退出 | +| `gsd --web [path]` | 启动基于浏览器的 Web 界面(可选项目路径) | +| `gsd --worktree`(`-w`)[name] | 在 git worktree 中启动会话(未指定时自动生成名称) | +| `gsd --no-session` | 禁用会话持久化 | +| `gsd --extension ` | 加载一个额外扩展(可重复) | +| `gsd --append-system-prompt ` | 向 system prompt 末尾追加文本 | +| `gsd --tools ` | 启用的工具列表,逗号分隔 | +| `gsd --version`(`-v`) | 输出版本并退出 | +| `gsd --help`(`-h`) | 输出帮助并退出 | +| `gsd sessions` | 交互式会话选择器:列出当前目录所有保存的会话并选择一个恢复 | +| `gsd --debug` | 启用结构化 JSONL 诊断日志,用于排查 dispatch 和 state 问题 | +| `gsd config` | 配置搜索和文档工具所需的全局 API keys(保存到 `~/.gsd/agent/auth.json`,对所有项目生效)。见 [Global API Keys](./configuration.md#global-api-keys-gsd-config)。 | +| `gsd update` | 更新到最新版本 | +| `gsd headless new-milestone` | 根据上下文文件创建新的 milestone(headless,无需 TUI) | + +## Headless 模式 + +`gsd headless` 可在无 TUI 的情况下运行 `/gsd` 命令,适合 CI、cron job 和脚本自动化。它会在 RPC 模式下启动一个子进程,自动回应交互式提示、检测完成状态,并用有意义的退出码退出。 + +```bash +# 运行自动模式(默认) +gsd headless + +# 运行一个单元 +gsd headless next + +# 即时 JSON 快照,无需 LLM,约 50ms +gsd headless query + +# 用于 CI 的超时参数 +gsd headless --timeout 600000 auto + +# 强制指定一个 phase +gsd headless dispatch plan + +# 根据上下文文件创建新 milestone,并启动自动模式 +gsd headless new-milestone --context brief.md --auto + +# 用内联文本创建 milestone +gsd headless new-milestone --context-text "Build a REST API with auth" + +# 从 stdin 管道输入上下文 +echo "Build a CLI tool" | gsd headless new-milestone --context - +``` + +| 参数 | 说明 | +|------|------| +| `--timeout N` | 总超时(毫秒),默认 `300000` / 5 分钟 | +| `--max-restarts N` | 崩溃时自动重启并指数退避(默认 3)。设为 0 可关闭 | +| `--json` | 以 JSONL 形式把所有事件流式输出到 stdout | +| `--model ID` | 覆盖 headless 会话使用的模型 | +| `--context ` | 给 `new-milestone` 提供上下文文件(用 `-` 表示 stdin) | +| `--context-text ` | 给 `new-milestone` 提供内联上下文文本 | +| `--auto` | 在创建 milestone 后直接接续自动模式 | + +**退出码:** `0` 表示完成,`1` 表示错误或超时,`2` 表示被阻塞。 + +任何 `/gsd` 子命令都可以作为位置参数使用,例如:`gsd headless status`、`gsd headless doctor`、`gsd headless dispatch execute` 等。 + +### `gsd headless query` + +它会返回单个 JSON 对象,包含完整项目快照,无需 LLM 会话,也无需 RPC 子进程,响应几乎即时(约 50ms)。这是 orchestration 工具和脚本检查 GSD 状态的推荐方式。 + +```bash +gsd headless query | jq '.state.phase' +# "executing" + +gsd headless query | jq '.next' +# {"action":"dispatch","unitType":"execute-task","unitId":"M001/S01/T03"} + +gsd headless query | jq '.cost.total' +# 4.25 +``` + +**输出结构:** + +```json +{ + "state": { + "phase": "executing", + "activeMilestone": { "id": "M001", "title": "..." }, + "activeSlice": { "id": "S01", "title": "..." }, + "activeTask": { "id": "T01", "title": "..." }, + "registry": [{ "id": "M001", "status": "active" }, ...], + "progress": { "milestones": { "done": 0, "total": 2 }, "slices": { "done": 1, "total": 3 } }, + "blockers": [] + }, + "next": { + "action": "dispatch", + "unitType": "execute-task", + "unitId": "M001/S01/T01" + }, + "cost": { + "workers": [{ "milestoneId": "M001", "cost": 1.50, "state": "running", ... }], + "total": 1.50 + } +} +``` + + +## MCP Server 模式 + +`gsd --mode mcp` 会通过 stdin/stdout 将 GSD 作为一个 [Model Context Protocol](https://modelcontextprotocol.io) server 运行。这会把所有 GSD 工具(read、write、edit、bash 等)暴露给外部 AI 客户端,例如 Claude Desktop、VS Code Copilot,以及任何兼容 MCP 的宿主。 + +```bash +# 以 MCP server 模式启动 GSD +gsd --mode mcp +``` + +服务会注册 agent 会话中的全部工具,并把 MCP 的 `tools/list` 与 `tools/call` 请求映射到 GSD 的工具定义上。连接会一直保持,直到底层 transport 关闭。 + +## 会话内更新 + +`/gsd update` 会检查 npm 上是否有更新版本,并在不离开当前会话的情况下完成安装。 + +```bash +/gsd update +# Current version: v2.36.0 +# Checking npm registry... +# Updated to v2.37.0. Restart GSD to use the new version. +``` + +如果已经是最新版本,它会给出提示且不做任何操作。 + +## 导出 + +`/gsd export` 用于导出 milestone 工作报告。 + +```bash +# 为当前 active milestone 生成 HTML 报告 +/gsd export --html + +# 一次性为所有 milestones 生成回顾报告 +/gsd export --html --all +``` + +报告会保存到 `.gsd/reports/`,并生成一个可浏览的 `index.html`,链接到所有已生成的快照。 diff --git a/docs/zh-CN/user-docs/configuration.md b/docs/zh-CN/user-docs/configuration.md new file mode 100644 index 000000000..7548e6fa2 --- /dev/null +++ b/docs/zh-CN/user-docs/configuration.md @@ -0,0 +1,852 @@ +# 配置 + +GSD 偏好设置保存在 `~/.gsd/PREFERENCES.md`(全局)或 `.gsd/PREFERENCES.md`(项目级)中。可以通过 `/gsd prefs` 进行交互式管理。 + +## `/gsd prefs` 命令 + +| 命令 | 说明 | +|------|------| +| `/gsd prefs` | 打开全局偏好设置向导(默认) | +| `/gsd prefs global` | 全局偏好设置交互向导(`~/.gsd/PREFERENCES.md`) | +| `/gsd prefs project` | 项目偏好设置交互向导(`.gsd/PREFERENCES.md`) | +| `/gsd prefs status` | 显示当前偏好文件、合并后的值以及 skill 解析状态 | +| `/gsd prefs wizard` | `/gsd prefs global` 的别名 | +| `/gsd prefs setup` | `/gsd prefs wizard` 的别名;若偏好文件不存在会自动创建 | +| `/gsd prefs import-claude` | 将 Claude marketplace plugins 和 skills 以命名空间化的 GSD 组件形式导入 | +| `/gsd prefs import-claude global` | 导入到全局作用域 | +| `/gsd prefs import-claude project` | 导入到项目作用域 | + +## 偏好文件格式 + +偏好设置使用 markdown 文件中的 YAML frontmatter: + +```yaml +--- +version: 1 +models: + research: claude-sonnet-4-6 + planning: claude-opus-4-6 + execution: claude-sonnet-4-6 + completion: claude-sonnet-4-6 +skill_discovery: suggest +auto_supervisor: + soft_timeout_minutes: 20 + idle_timeout_minutes: 10 + hard_timeout_minutes: 30 +budget_ceiling: 50.00 +token_profile: balanced +--- +``` + +## 全局与项目偏好 + +| 作用域 | 路径 | 适用范围 | +|--------|------|----------| +| 全局 | `~/.gsd/PREFERENCES.md` | 所有项目 | +| 项目 | `.gsd/PREFERENCES.md` | 仅当前项目 | + +**合并规则:** + +- **标量字段**(`skill_discovery`、`budget_ceiling`):如果项目级定义了,则项目级优先 +- **数组字段**(`always_use_skills` 等):拼接,顺序为全局在前、项目在后 +- **对象字段**(`models`、`git`、`auto_supervisor`):浅合并,项目级按 key 覆盖 + + +## 全局 API Keys(`/gsd config`) + +工具 API keys 会全局保存在 `~/.gsd/agent/auth.json` 中,并自动应用到所有项目。只需用 `/gsd config` 配置一次,无需在每个项目里维护 `.env`。 + +```bash +/gsd config +``` + +这会打开一个交互式向导,显示哪些 key 已配置、哪些仍缺失。你可以选择一个工具并输入相应的 key。 + +### 支持的 keys + +| 工具 | 环境变量 | 用途 | 获取地址 | +|------|----------|------|----------| +| Tavily Search | `TAVILY_API_KEY` | 为非 Anthropic models 提供 Web 搜索 | [tavily.com/app/api-keys](https://tavily.com/app/api-keys) | +| Brave Search | `BRAVE_API_KEY` | 为非 Anthropic models 提供 Web 搜索 | [brave.com/search/api](https://brave.com/search/api) | +| Context7 Docs | `CONTEXT7_API_KEY` | 库文档检索 | [context7.com/dashboard](https://context7.com/dashboard) | + +### 工作方式 + +1. `/gsd config` 会把 keys 保存到 `~/.gsd/agent/auth.json` +2. 每次会话启动时,`loadToolApiKeys()` 都会读取该文件并设置环境变量 +3. 这些 keys 对所有项目生效,无需单独配置 +4. 环境变量(例如 `export BRAVE_API_KEY=...`)优先级高于保存下来的 keys +5. Anthropic models 不需要 Brave/Tavily,因为它们自带 Web 搜索 + +## MCP Servers + +GSD 可以连接配置在项目文件中的外部 MCP servers。这适合接入本地工具、内部 API、自托管服务,或者那些未作为 GSD 原生扩展内置的集成。 + +### 配置文件位置 + +GSD 会从以下项目本地路径读取 MCP client 配置: + +- `.mcp.json` +- `.gsd/mcp.json` + +如果两个文件都存在,会按 server 名称做合并,先找到的定义优先。通常建议: + +- 把你愿意提交到仓库的共享 MCP 配置放在 `.mcp.json` +- 把仅本机使用、不希望共享的 MCP 配置放在 `.gsd/mcp.json` + +### 支持的 transport + +| Transport | 配置形状 | 适用场景 | +|-----------|----------|----------| +| `stdio` | `command` + 可选 `args`、`env`、`cwd` | 启动本地 MCP server 进程 | +| `http` | `url` | 连接到已经运行中的 MCP server | + +### 示例:stdio server + +```json +{ + "mcpServers": { + "my-server": { + "type": "stdio", + "command": "/absolute/path/to/python3", + "args": ["/absolute/path/to/server.py"], + "env": { + "API_URL": "http://localhost:8000" + } + } + } +} +``` + +### 示例:HTTP server + +```json +{ + "mcpServers": { + "my-http-server": { + "url": "http://localhost:8080/mcp" + } + } +} +``` + +### 验证一个 server + +添加配置后,可以在 GSD 会话中这样验证: + +```text +mcp_servers +mcp_discover(server="my-server") +mcp_call(server="my-server", tool="", args={...}) +``` + +推荐验证顺序: + +1. `mcp_servers`:确认 GSD 能看到配置文件并正确解析 server 条目 +2. `mcp_discover`:确认 server 进程能启动,并能响应 `tools/list` +3. `mcp_call`:确认至少有一个真实 tool 可以成功调用 + +### 说明 + +- 尽量为本地可执行文件和脚本使用绝对路径 +- 对于 `stdio` servers,优先在 MCP 配置里显式设置需要的环境变量,而不是依赖交互式 shell profile +- GSD 和 `gsd-mcp-server` 都会自动加载保存在 `~/.gsd/agent/auth.json` 中的 model / tool keys,因此 MCP 配置可以安全地通过 `${ENV_VAR}` 占位符引用这些值,而不必提交原始凭据 +- 如果某个 server 是团队共享且适合提交到仓库,通常更适合放在 `.mcp.json` +- 如果某个 server 依赖本机路径、个人服务或本地 secrets,更适合放在 `.gsd/mcp.json` + +## 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `GSD_HOME` | `~/.gsd` | 全局 GSD 目录。除非单独覆盖,否则其它路径都从这里派生。影响偏好、skills、sessions 以及项目状态。(v2.39) | +| `GSD_PROJECT_ID` | (自动哈希) | 覆盖自动生成的项目身份哈希。这样项目状态会写入 `$GSD_HOME/projects//`,而不是计算出的哈希目录。适用于 CI/CD 或多个克隆共享状态。(v2.39) | +| `GSD_STATE_DIR` | `$GSD_HOME` | 项目状态根目录。控制 `projects//` 的创建位置。对项目状态的优先级高于 `GSD_HOME`。 | +| `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | agent 目录,包含托管资源、扩展和 auth。对 agent 相关路径的优先级高于 `GSD_HOME`。 | +| `GSD_ALLOWED_COMMAND_PREFIXES` | (内置列表) | 允许用于 `!command` 值解析的命令前缀,逗号分隔。会覆盖 settings.json 中的 `allowedCommandPrefixes`。见 [自定义模型:命令允许列表](custom-models.md#command-allowlist)。 | +| `GSD_FETCH_ALLOWED_URLS` | (无) | 对 `fetch_page` URL block 免检的 hostnames,逗号分隔。会覆盖 settings.json 中的 `fetchAllowedUrls`。见 [URL Blocking](#url-blocking-fetch_page)。 | + +## 全部设置 + +### `models` + +按阶段选择 model。每个 key 都可以是一个 model 字符串,或者是带 fallbacks 的对象。 + +```yaml +models: + research: claude-sonnet-4-6 + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 + execution_simple: claude-haiku-4-5-20250414 + completion: claude-sonnet-4-6 + subagent: claude-sonnet-4-6 +``` + +**阶段键:** `research`、`planning`、`execution`、`execution_simple`、`completion`、`subagent` + +- `execution_simple`:用于被 [complexity router](./token-optimization.md#complexity-based-task-routing) 判断为 “simple” 的 task +- `subagent`:委派给 subagent 的 task 所使用的 model(scout、researcher、worker) +- 指定 provider:使用 `provider/model` 格式(例如 `bedrock/claude-sonnet-4-6`),或者在对象格式里额外写 `provider` 字段 +- 省略某个 key 时,会使用当前 active model + +### 自定义 Model 定义(`models.json`) + +你可以在 `~/.gsd/agent/models.json` 里定义自定义 models 和 providers。这允许你添加默认注册表里没有的 models,适合自托管 endpoints(Ollama、vLLM、LM Studio)、微调模型、代理,或者刚发布的新 provider。 + +GSD 读取 `models.json` 的顺序如下: + +1. `~/.gsd/agent/models.json`:主位置(GSD) +2. `~/.pi/agent/models.json`:回退位置(Pi) +3. 如果两者都不存在,则创建 `~/.gsd/agent/models.json` + +**本地 models(Ollama)的快速示例:** + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "models": [ + { "id": "llama3.1:8b" }, + { "id": "qwen2.5-coder:7b" } + ] + } + } +} +``` + +每次打开 `/model` 时,这个文件都会重新加载,无需重启。 + +关于 provider 配置、model overrides、OpenAI compatibility 和更多高级示例,见 [自定义模型指南](./custom-models.md)。 + +**带 fallbacks 的示例:** + +```yaml +models: + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + - openrouter/moonshotai/kimi-k2.5 + provider: bedrock # 可选:固定到某个 provider +``` + +当某个 model 切换失败(provider 不可用、被限流、额度耗尽)时,GSD 会自动尝试 `fallbacks` 列表中的下一个 model。 + +### Community Provider Extensions + +对于 GSD 未内置的 providers,社区扩展可以添加完整 provider 支持,包括正确的 model 定义、thinking format 配置以及交互式 API key 设置。 + +| 扩展 | Provider | Models | 安装命令 | +|------|----------|--------|----------| +| [`pi-dashscope`](https://www.npmjs.com/package/pi-dashscope) | Alibaba DashScope(ModelStudio) | Qwen3、GLM-5、MiniMax M2.5、Kimi K2.5 | `gsd install npm:pi-dashscope` | + +对于 DashScope models,更推荐使用社区扩展而不是内置的 `alibaba-coding-plan` provider,因为前者会走正确的 OpenAI-compatible endpoint,并包含适配 thinking mode 的 per-model compatibility flags。 + +### `token_profile` + +负责协调 model 选择、阶段跳过和上下文压缩。详见 [Token 优化](./token-optimization.md)。 + +可选值:`budget`、`balanced`(默认)、`quality` + +| 配置 | 行为 | +|------|------| +| `budget` | 跳过 research + reassessment 阶段,优先使用便宜模型 | +| `balanced` | 默认行为:所有阶段运行,使用标准模型选择 | +| `quality` | 所有阶段运行,优先更高质量模型 | + +### `phases` + +对自动模式中哪些阶段运行做细粒度控制: + +```yaml +phases: + skip_research: false # 跳过 milestone 级 research + skip_reassess: false # 在每个 slice 后跳过 roadmap reassessment + skip_slice_research: true # 跳过每个 slice 的 research + reassess_after_slice: true # 每个 slice 后执行 roadmap reassessment(reassessment 的前提) + require_slice_discussion: false # 每个 slice 前暂停,等待讨论 +``` + +这些值通常由 `token_profile` 自动设置,但也可以显式覆盖。 + +> **注意:** Roadmap reassessment 需要显式设置 `reassess_after_slice: true`。如果没有它,无论 `skip_reassess` 怎么配,reassessment 都不会运行。 + +### `skill_discovery` + +控制 GSD 在自动模式中如何发现并应用 skills。 + +| 值 | 行为 | +|----|------| +| `auto` | 自动查找并应用 skills | +| `suggest` | 在 research 阶段识别到 skills,但不自动安装(默认) | +| `off` | 关闭 skill discovery | + +### `auto_supervisor` + +自动模式监督器使用的超时阈值: + +```yaml +auto_supervisor: + model: claude-sonnet-4-6 # 可选:supervisor 使用的 model(默认当前 active model) + soft_timeout_minutes: 20 # 提醒 LLM 收尾 + idle_timeout_minutes: 10 # 检测停滞 + hard_timeout_minutes: 30 # 暂停自动模式 +``` + +### `budget_ceiling` + +自动模式期间允许消耗的最大美元金额。不需要 `$`,直接填数字: + +```yaml +budget_ceiling: 50.00 +``` + +### `budget_enforcement` + +预算上限的执行方式: + +| 值 | 行为 | +|----|------| +| `warn` | 记录警告,但继续运行 | +| `pause` | 暂停自动模式(设置 ceiling 时的默认值) | +| `halt` | 彻底停止自动模式 | + +### `context_pause_threshold` + +上下文窗口使用率达到多少(0-100)时,自动模式会暂停并进行 checkpoint。设为 `0` 可关闭。 + +```yaml +context_pause_threshold: 80 # 在上下文使用达到 80% 时暂停 +``` + +默认值:`0`(关闭) + +### `uat_dispatch` + +在 slice 完成后自动运行 UAT(User Acceptance Test): + +```yaml +uat_dispatch: true +``` + +### Verification(v2.26) + +配置在每次 task 执行后自动运行的 shell 命令。若失败,会先尝试自动修复重试,再决定是否继续。 + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true # 失败时自动重试修复(默认:true) +verification_max_retries: 2 # 最大重试次数(默认:2) +``` + +| 字段 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `verification_commands` | string[] | `[]` | task 执行后要运行的 shell 命令 | +| `verification_auto_fix` | boolean | `true` | verification 失败时是否自动重试 | +| `verification_max_retries` | number | `2` | 自动修复重试的最大次数 | + + +### URL Blocking(`fetch_page`) + +`fetch_page` 工具默认会阻止访问私有网络和内部网络地址,以防 SSRF(server-side request forgery)。这能防止 agent 被诱导去访问内部服务、云 metadata endpoint 或本地文件。 + +**默认会被拦截:** + +| 类别 | 示例 | +|------|------| +| 私有 IP 段 | `10.x.x.x`、`172.16-31.x.x`、`192.168.x.x`、`127.x.x.x` | +| Link-local / 云 metadata | `169.254.x.x`(AWS/GCP instance metadata) | +| 云 metadata hostname | `metadata.google.internal`、`instance-data` | +| Localhost | `localhost`(任意端口) | +| 非 HTTP 协议 | `file://`、`ftp://` | +| IPv6 私有地址段 | `::1`、`fc00:`、`fd`、`fe80:` | + +公共 URL(例如 `https://example.com`、`http://8.8.8.8`)不受影响。 + +**允许特定内部主机:** + +如果你确实需要 agent 访问内网 URL(例如自托管文档、VPN 后的内部 API),可以在全局设置 `~/.gsd/agent/settings.json` 中添加 `fetchAllowedUrls`: + +```json +{ + "fetchAllowedUrls": ["internal-docs.company.com", "192.168.1.50"] +} +``` + +或者设置 `GSD_FETCH_ALLOWED_URLS` 环境变量(逗号分隔)。环境变量优先级高于 settings.json: + +```bash +export GSD_FETCH_ALLOWED_URLS="internal-docs.company.com,192.168.1.50" +``` + +被允许的 hostname 会绕过 blocklist 检查。但协议限制依然有效,也就是说 `file://` 和 `ftp://` 仍然不能加入 allowlist。 + +> **注意:** 这是一个仅全局生效的设置。项目级 settings.json 不能覆盖 URL allowlist,以防克隆下来的仓库把 `fetch_page` 指向内部基础设施。 + +### `auto_report`(v2.26) + +在 milestone 完成后自动生成 HTML 报告: + +```yaml +auto_report: true # 默认:true +``` + +报告会以自包含 HTML 文件的形式写入 `.gsd/reports/`,所有 CSS / JS 都内嵌。 + +### `unique_milestone_ids` + +为 milestone IDs 添加随机后缀,以避免团队协作中的 ID 冲突: + +```yaml +unique_milestone_ids: true +# 输出示例:M001-eh88as,而不是 M001 +``` + +### `git` + +Git 行为配置。所有字段都是可选的: + +```yaml +git: + auto_push: false # 提交后推送到远程 + push_branches: false # 推送 milestone 分支到远程 + remote: origin # git remote 名称 + snapshots: true # 长 task 执行期间做 WIP snapshot commits + pre_merge_check: auto # worktree merge 前执行检查(true / false / "auto") + commit_type: feat # 覆盖 conventional commit 前缀 + main_branch: main # 主分支名称 + merge_strategy: squash # worktree 分支合并方式:"squash" 或 "merge" + isolation: worktree # git isolation:"worktree"、"branch" 或 "none" + commit_docs: true # 是否把 .gsd/ 产物提交到 git(设为 false 时仅保留本地) + manage_gitignore: true # 设为 false 时,GSD 不再修改 .gitignore + worktree_post_create: .gsd/hooks/post-worktree-create # worktree 创建后执行的脚本 + auto_pr: false # milestone 完成时自动创建 PR(要求 push_branches) + pr_target_branch: develop # 自动创建 PR 的目标分支(默认:main branch) +``` + +| 字段 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `auto_push` | boolean | `false` | 提交后推送到远程 | +| `push_branches` | boolean | `false` | 把 milestone 分支推送到远程 | +| `remote` | string | `"origin"` | Git remote 名称 | +| `snapshots` | boolean | `true` | 长 task 期间做 WIP snapshot commits | +| `pre_merge_check` | bool/string | `"auto"` | merge 前是否执行检查(`true` / `false` / `"auto"`) | +| `commit_type` | string | (自动推断) | 覆盖 conventional commit 前缀(`feat`、`fix`、`refactor`、`docs`、`test`、`chore`、`perf`、`ci`、`build`、`style`) | +| `main_branch` | string | `"main"` | 主分支名称 | +| `merge_strategy` | string | `"squash"` | worktree 分支合并方式:`"squash"`(合并为单个提交)或 `"merge"`(保留单独提交) | +| `isolation` | string | `"worktree"` | 自动模式隔离方式:`"worktree"`(独立目录)、`"branch"`(直接在项目根目录工作,适合子模块多的仓库)、`"none"`(无隔离,直接提交到当前分支) | +| `commit_docs` | boolean | `true` | 是否把 `.gsd/` planning 产物提交到 git。设为 `false` 则仅保留本地 | +| `manage_gitignore` | boolean | `true` | 设为 `false` 后,GSD 将完全不修改 `.gitignore`,不会添加基础规则,也不会做自愈 | +| `worktree_post_create` | string | (无) | worktree 创建后执行的脚本。环境变量中会传入 `SOURCE_DIR` 和 `WORKTREE_DIR` | +| `auto_pr` | boolean | `false` | milestone 完成时自动创建 pull request。要求 `auto_push: true` 且已安装认证 `gh` CLI | +| `pr_target_branch` | string | (main branch) | 自动创建 PR 的目标分支,例如 `develop`、`qa`。未设置时默认回退到 `main_branch` | + +#### `git.worktree_post_create` + +在 worktree 创建后执行脚本(自动模式和手动 `/worktree` 都适用)。适合复制 `.env`、建立资源目录软链,或者执行那些 worktree 不会继承的 setup 步骤。 + +```yaml +git: + worktree_post_create: .gsd/hooks/post-worktree-create +``` + +脚本会收到两个环境变量: + +- `SOURCE_DIR`:原始项目根目录 +- `WORKTREE_DIR`:新创建的 worktree 路径 + +示例 hook(`.gsd/hooks/post-worktree-create`): + +```bash +#!/bin/bash +# Copy environment files and symlink assets into the new worktree +cp "$SOURCE_DIR/.env" "$WORKTREE_DIR/.env" +cp "$SOURCE_DIR/.env.local" "$WORKTREE_DIR/.env.local" 2>/dev/null || true +ln -sf "$SOURCE_DIR/assets" "$WORKTREE_DIR/assets" +``` + +路径既可以是绝对路径,也可以相对项目根目录。脚本有 30 秒超时限制。失败不会中断流程,GSD 会记录告警后继续。 + + +#### `git.auto_pr` + +在 milestone 完成时自动创建 pull request。适用于 Gitflow 或分支工作流团队,在合并到目标分支前通过 PR 做审查。 + +```yaml +git: + auto_push: true + auto_pr: true + pr_target_branch: develop # 或 qa、staging 等 +``` + +**要求:** + +- `auto_push: true`:创建 PR 前必须先把 milestone 分支推送到远程 +- 已安装并认证 [`gh` CLI](https://cli.github.com/)(`gh auth login`) + +**工作方式:** + +1. milestone 完成后,GSD 先把 worktree squash merge 回主分支 +2. 如果 `auto_push: true`,把主分支推送到远程 +3. 把 milestone 分支推送到远程 +4. 通过 `gh pr create` 从 milestone 分支向 `pr_target_branch` 创建 PR + +如果没有设置 `pr_target_branch`,PR 会默认指向 `main_branch`(或者自动检测出的主分支)。PR 创建失败不会中断流程,GSD 会记录日志后继续。 + +### `github`(v2.39) + +GitHub 同步配置。启用后,GSD 会自动把 milestones、slices 和 tasks 同步到 GitHub Issues、PRs 和 Milestones。 + +```yaml +github: + enabled: true + repo: "owner/repo" # 省略时从 git remote 自动检测 + labels: [gsd, auto-generated] # 应用到创建出的 issues / PRs 的标签 + project: "Project ID" # 可选的 GitHub Project board +``` + +| 字段 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `enabled` | boolean | `false` | 是否启用 GitHub 同步 | +| `repo` | string | (自动检测) | `owner/repo` 格式的 GitHub 仓库名 | +| `labels` | string[] | `[]` | 创建的 issues / PRs 要附加的标签 | +| `project` | string | (无) | GitHub Project ID,用于接入 Project board | + +**要求:** + +- 已安装并认证 `gh` CLI(`gh auth login`) +- 同步映射会保存在 `.gsd/.github-sync.json` +- 具备速率限制感知:当 GitHub API rate limit 偏低时会跳过同步 + +**命令:** + +- `/github-sync bootstrap`:初始化配置并执行同步 +- `/github-sync status`:显示同步映射数量 + +### `notifications` + +控制 GSD 在自动模式中发出哪些通知: + +```yaml +notifications: + enabled: true + on_complete: true # 单元完成时通知 + on_error: true # 出错时通知 + on_budget: true # 预算阈值通知 + on_milestone: true # milestone 完成时通知 + on_attention: true # 需要人工介入时通知 +``` + +**macOS 通知方式:** GSD 会优先使用 [`terminal-notifier`](https://github.com/julienXX/terminal-notifier),不可用时回退到 `osascript`。建议安装 `terminal-notifier`,获得更稳定的通知体验: + +```bash +brew install terminal-notifier +``` + +原因:`osascript display notification` 的通知权限是算在你的终端应用(Ghostty、iTerm2 等)上的,而这些应用在 System Settings → Notifications 中未必被允许。`terminal-notifier` 会注册成独立 App,并在首次使用时主动请求通知权限。如果通知异常,见 [故障排查:macOS 上通知不显示](troubleshooting.md#notifications-not-appearing-on-macos)。 + +### `remote_questions` + +把交互式问题路由到 Slack 或 Discord,以支持 headless 自动模式: + +```yaml +remote_questions: + channel: slack # 或 discord + channel_id: "C1234567890" + timeout_minutes: 15 # 问题超时(1-30 分钟) + poll_interval_seconds: 10 # 轮询间隔(2-30 秒) +``` + +### `post_unit_hooks` + +在特定单元完成后触发的自定义 hooks: + +```yaml +post_unit_hooks: + - name: code-review + after: [execute-task] + prompt: "Review the code changes for quality and security issues." + model: claude-opus-4-6 # 可选:覆盖 model + max_cycles: 1 # 每次触发最多执行几轮(1-10,默认 1) + artifact: REVIEW.md # 可选:若该文件已存在则跳过 + retry_on: NEEDS-REWORK.md # 可选:若生成该文件,则回退并重跑触发单元 + agent: review-agent # 可选:指定使用哪个 agent 定义 + enabled: true # 可选:保留配置但临时禁用 +``` + +`after` 可识别的 unit types 包括:`research-milestone`、`plan-milestone`、`research-slice`、`plan-slice`、`execute-task`、`complete-slice`、`replan-slice`、`reassess-roadmap`、`run-uat` + +**Prompt 占位符:** `{milestoneId}`、`{sliceId}`、`{taskId}` 会自动替换成当前上下文值。 + +### `pre_dispatch_hooks` + +在 dispatch 前拦截某个单元。支持三种动作: + +**Modify**:在单元 prompt 前后拼接文本 + +```yaml +pre_dispatch_hooks: + - name: add-standards + before: [execute-task] + action: modify + prepend: "Follow our coding standards document." + append: "Run linting after changes." +``` + +**Skip**:完全跳过该单元 + +```yaml +pre_dispatch_hooks: + - name: skip-research + before: [research-slice] + action: skip + skip_if: RESEARCH.md # 可选:仅当该文件存在时才跳过 +``` + +**Replace**:完全替换该单元 prompt + +```yaml +pre_dispatch_hooks: + - name: custom-execute + before: [execute-task] + action: replace + prompt: "Execute the task using TDD methodology." + unit_type: execute-task-tdd # 可选:覆盖 unit type 标签 + model: claude-opus-4-6 # 可选:覆盖 model +``` + +所有 pre-dispatch hooks 都支持 `enabled: true/false`,用于开关而不删除配置。 + +### `always_use_skills` / `prefer_skills` / `avoid_skills` + +Skill 路由偏好: + +```yaml +always_use_skills: + - debug-like-expert +prefer_skills: + - frontend-design +avoid_skills: [] +``` + +Skills 既可以写裸名称(去 `~/.agents/skills/` 和 `.agents/skills/` 查找),也可以写绝对路径。 + +### `skill_rules` + +基于人类可读触发条件的情景化 skill 路由: + +```yaml +skill_rules: + - when: task involves authentication + use: [clerk] + - when: frontend styling work + prefer: [frontend-design] + - when: working with legacy code + avoid: [aggressive-refactor] +``` + +### `custom_instructions` + +附加到每个会话上的持久指令: + +```yaml +custom_instructions: + - "Always use TypeScript strict mode" + - "Prefer functional patterns over classes" +``` + +如果是项目特有知识(模式、坑点、经验),请优先放到 `.gsd/KNOWLEDGE.md` 中,因为它会自动注入每个 agent prompt。你也可以通过 `/gsd knowledge rule|pattern|lesson ` 添加。 + +### `RUNTIME.md`:运行时上下文(v2.39) + +你可以在 `.gsd/RUNTIME.md` 中声明项目级运行时上下文。这个文件会内联进 task execution prompt,让 agent 能准确知道运行环境,而不必靠猜测路径或 URL。 + +**位置:** `.gsd/RUNTIME.md` + +**示例:** + +```markdown +# Runtime Context + +## API Endpoints +- Main API: https://api.example.com +- Cache: redis://localhost:6379 + +## Environment Variables +- DEPLOYMENT_ENV: staging +- DB_POOL_SIZE: 20 + +## Local Services +- PostgreSQL: localhost:5432 +- Redis: localhost:6379 +``` + +适合放在这里的信息,是那些执行时需要知道、但又不属于 `DECISIONS.md`(架构)或 `KNOWLEDGE.md`(规则 / 模式)的内容。典型例子包括:API base URL、服务端口、部署目标,以及环境特有配置。 + +### `dynamic_routing` + +基于复杂度的 model 路由。详见 [动态模型路由](./dynamic-model-routing.md)。 + +```yaml +dynamic_routing: + enabled: true + capability_routing: true # 按 task capability 评分 models(v2.59) + tier_models: + light: claude-haiku-4-5 + standard: claude-sonnet-4-6 + heavy: claude-opus-4-6 + escalate_on_failure: true + budget_pressure: true + cross_provider: true +``` + +### `context_management`(v2.59) + +控制自动模式会话中的 observation masking 和 tool result truncation。可在不增加 LLM 开销的前提下,减少 compaction 之间的上下文膨胀。 + +```yaml +context_management: + observation_masking: true # 用占位符替换旧 tool result(默认:true) + observation_mask_turns: 8 # 保留最近 N 个 user turn 的结果(1-50,默认:8) + compaction_threshold_percent: 0.70 # 在 70% 上下文使用率处触发 compaction(0.5-0.95,默认:0.70) + tool_result_max_chars: 800 # 单个 tool result 的最大字符数(200-10000,默认:800) +``` + +### `service_tier`(v2.42) + +OpenAI 支持模型的 service tier 偏好。可通过 `/gsd fast` 切换。 + +| 值 | 行为 | +|----|------| +| `"priority"` | Priority tier:2 倍成本,更快响应 | +| `"flex"` | Flex tier:0.5 倍成本,更慢响应 | +| (未设置) | 默认 tier | + +```yaml +service_tier: priority +``` + +### `forensics_dedup`(v2.43) + +可选启用:在 `/gsd forensics` 提交 issue 之前,先搜索现有 issues 和 PRs。会额外消耗一些 AI tokens。 + +```yaml +forensics_dedup: true # 默认:false +``` + +### `show_token_cost`(v2.44) + +可选启用:在 footer 中显示每次 prompt 和累计会话的 token 成本。 + +```yaml +show_token_cost: true # 默认:false +``` + +### `auto_visualize` + +在 milestone 完成后自动显示工作流可视化器: + +```yaml +auto_visualize: true +``` + +详见 [工作流可视化器](./visualizer.md)。 + +### `parallel` + +同时运行多个 milestones。默认关闭。 + +```yaml +parallel: + enabled: false # 总开关 + max_workers: 2 # 并发 workers 数(1-4) + budget_ceiling: 50.00 # 聚合成本上限(美元) + merge_strategy: "per-milestone" # "per-slice" 或 "per-milestone" + auto_merge: "confirm" # "auto"、"confirm" 或 "manual" +``` + +完整细节见 [并行编排](./parallel-orchestration.md)。 + +## 完整示例 + +```yaml +--- +version: 1 + +# Model selection +models: + research: openrouter/deepseek/deepseek-r1 + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 + execution_simple: claude-haiku-4-5-20250414 + completion: claude-sonnet-4-6 + +# Token optimization +token_profile: balanced + +# Dynamic model routing +dynamic_routing: + enabled: true + escalate_on_failure: true + budget_pressure: true + +# Budget +budget_ceiling: 25.00 +budget_enforcement: pause +context_pause_threshold: 80 + +# Supervision +auto_supervisor: + soft_timeout_minutes: 15 + hard_timeout_minutes: 25 + +# Git +git: + auto_push: true + merge_strategy: squash + isolation: worktree # "worktree", "branch", or "none" + commit_docs: true + +# Skills +skill_discovery: suggest +skill_staleness_days: 60 # Skills unused for N days get deprioritized (0 = disabled) +always_use_skills: + - debug-like-expert +skill_rules: + - when: task involves authentication + use: [clerk] + +# Notifications +notifications: + on_complete: false + on_milestone: true + on_attention: true + +# Visualizer +auto_visualize: true + +# Service tier +service_tier: priority # "priority" or "flex" (for /gsd fast) + +# Diagnostics +forensics_dedup: true # deduplicate before filing forensics issues +show_token_cost: true # show per-prompt cost in footer + +# Hooks +post_unit_hooks: + - name: code-review + after: [execute-task] + prompt: "Review {sliceId}/{taskId} for quality and security." + artifact: REVIEW.md +--- +``` diff --git a/docs/zh-CN/user-docs/cost-management.md b/docs/zh-CN/user-docs/cost-management.md new file mode 100644 index 000000000..2b047c6ea --- /dev/null +++ b/docs/zh-CN/user-docs/cost-management.md @@ -0,0 +1,94 @@ +# 成本管理 + +GSD 会跟踪自动模式中每个派发工作单元的 Token 使用量和成本。这些数据会驱动仪表板、预算约束以及成本预测。 + +## 成本跟踪 + +每个工作单元的指标都会被自动记录: + +- **Token 数量**:input、output、cache read、cache write、total +- **成本**:每个单元的美元成本 +- **耗时**:真实墙钟时间 +- **工具调用数**:工具调用次数 +- **消息数量**:assistant 与 user 消息数 + +数据保存在 `.gsd/metrics.json` 中,并且可跨会话持续存在。 + +### 查看成本 + +**仪表板**:按 `Ctrl+Alt+G` 或执行 `/gsd status` 可查看实时成本拆分。 + +**可用聚合维度:** + +- 按阶段(research、planning、execution、completion、reassessment) +- 按 slice(M001/S01、M001/S02 等) +- 按模型(哪些模型最耗预算) +- 项目总计 + +## 预算上限 + +可以为单个项目设置最大支出: + +```yaml +--- +version: 1 +budget_ceiling: 50.00 +--- +``` + +### 执行模式 + +控制触达预算上限后会发生什么: + +```yaml +budget_enforcement: pause # 设置 ceiling 后的默认值 +``` + +| 模式 | 行为 | +|------|------| +| `warn` | 记录警告,但继续执行 | +| `pause` | 暂停自动模式,等待用户动作 | +| `halt` | 直接停止自动模式 | + +## 成本预测 + +当至少完成两个 slices 后,GSD 会预测剩余成本: + +``` +Projected remaining: $12.40 ($6.20/slice avg × 2 remaining) +``` + +预测基于已完成工作的每-slice 平均成本。如果预算上限已触达,结果中还会附带一条警告。 + +## 预算压力与模型降级 + +当预算接近上限时,[复杂度路由器](./token-optimization.md#budget-pressure)会自动把模型分配降到更便宜的层级。这是一个渐进过程: + +- **已使用 < 50%**:不调整 +- **已使用 50-75%**:standard task 降为 light +- **已使用 75-90%**:同样降级,但更激进 +- **已使用 > 90%**:几乎所有 task 都降级,只有 heavy task 仍保留在 standard + +这样可以把预算尽量均匀地分摊到剩余工作中,而不是过早在几个复杂 task 上耗尽。 + +## Token 配置与成本 + +`token_profile` 偏好会直接影响成本: + +| 配置 | 常见节省幅度 | 方式 | +|------|--------------|------| +| `budget` | 40-60% | 更便宜的模型、跳过部分阶段、最小上下文 | +| `balanced` | 10-20% | 默认模型、跳过 slice research、标准上下文 | +| `quality` | 0%(基线) | 完整模型、完整阶段、完整上下文 | + +更多细节见 [Token 优化](./token-optimization.md)。 + +## 建议 + +- 先用 `balanced` 配置,并设置一个较宽松的 `budget_ceiling` 来建立成本基线 +- 完成几个 slices 后查看 `/gsd status`,确认每个 slice 的平均成本 +- 对于已知流程、重复性高的工作,切换到 `budget` 配置 +- 只有在做架构决策时才建议使用 `quality` +- 可以通过按阶段选模型,只在 planning 使用 Opus,而在 execution 保持 Sonnet +- 开启 `dynamic_routing`,让简单 task 自动下沉到更便宜的模型,详见 [动态模型路由](./dynamic-model-routing.md) +- 使用 `/gsd visualize` 的 Metrics 标签页查看预算具体花在了哪里 diff --git a/docs/zh-CN/user-docs/custom-models.md b/docs/zh-CN/user-docs/custom-models.md new file mode 100644 index 000000000..24346fd05 --- /dev/null +++ b/docs/zh-CN/user-docs/custom-models.md @@ -0,0 +1,378 @@ +# 自定义模型 + +通过 `~/.gsd/agent/models.json` 添加自定义 providers 和 models(Ollama、vLLM、LM Studio、代理等)。 + +## 目录 + +- [最小示例](#minimal-example) +- [完整示例](#full-example) +- [支持的 API](#supported-apis) +- [Provider 配置](#provider-configuration) +- [Model 配置](#model-configuration) +- [覆盖内置 Providers](#overriding-built-in-providers) +- [按 model 覆盖](#per-model-overrides) +- [OpenAI 兼容性](#openai-compatibility) + + +## 最小示例 + +对于本地 models(Ollama、LM Studio、vLLM),每个 model 只要求提供 `id`: + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "models": [ + { "id": "llama3.1:8b" }, + { "id": "qwen2.5-coder:7b" } + ] + } + } +} +``` + +`apiKey` 在 schema 中是必填,但 Ollama 会忽略它,因此任意值都可以。 + +有些 OpenAI-compatible server 不支持推理模型使用的 `developer` role。对于这类 provider,需要把 `compat.supportsDeveloperRole` 设为 `false`,这样 GSD 会改用 `system` message 发送 system prompt。如果该 server 同时也不支持 `reasoning_effort`,还应把 `compat.supportsReasoningEffort` 也设为 `false`。 + +你可以在 provider 级别设置 `compat`,让它应用到该 provider 下的所有 models;也可以在 model 级别单独覆盖某个 model。这个设置常见于 Ollama、vLLM、SGLang 以及类似的 OpenAI-compatible server。 + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { + "id": "gpt-oss:20b", + "reasoning": true + } + ] + } + } +} +``` + + +## 完整示例 + +当你需要显式覆盖默认值时,可以写成更完整的配置: + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "models": [ + { + "id": "llama3.1:8b", + "name": "Llama 3.1 8B (Local)", + "reasoning": false, + "input": ["text"], + "contextWindow": 128000, + "maxTokens": 32000, + "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 } + } + ] + } + } +} +``` + +每次打开 `/model` 时,这个文件都会重新加载。可以在会话过程中直接编辑,无需重启。 + + +## 支持的 API + +| API | 说明 | +|-----|------| +| `openai-completions` | OpenAI Chat Completions(兼容性最好) | +| `openai-responses` | OpenAI Responses API | +| `anthropic-messages` | Anthropic Messages API | +| `google-generative-ai` | Google Generative AI | + +`api` 可以设置在 provider 级别(作为该 provider 下所有 models 的默认值),也可以设置在 model 级别(覆盖单个 model)。 + + +## Provider 配置 + +| 字段 | 说明 | +|------|------| +| `baseUrl` | API endpoint URL | +| `api` | API 类型(见上) | +| `apiKey` | API key(见下方值解析) | +| `headers` | 自定义请求头(见下方值解析) | +| `authHeader` | 设为 `true` 时,自动添加 `Authorization: Bearer ` | +| `models` | model 配置数组 | +| `modelOverrides` | 针对该 provider 的内置 models 做按 model 覆盖 | + + +### 值解析 + +`apiKey` 和 `headers` 支持三种写法: + +- **Shell 命令:** `"!command"`,执行后读取 stdout + ```json + "apiKey": "!security find-generic-password -ws 'anthropic'" + "apiKey": "!op read 'op://vault/item/credential'" + ``` +- **环境变量:** 取对应环境变量的值 + ```json + "apiKey": "MY_API_KEY" + ``` +- **字面量:** 直接使用 + ```json + "apiKey": "sk-..." + ``` + + +#### 命令允许列表 + +Shell 命令(`!command`)只能执行一组已知的凭据工具。只有以下前缀开头的命令才会被允许: + +`pass`、`op`、`aws`、`gcloud`、`vault`、`security`、`gpg`、`bw`、`gopass`、`lpass` + +不在列表中的命令会被阻止,最终该值会解析为 `undefined`。同时会向 stderr 输出一条警告。 + +为了防止注入,命令参数中的 shell 操作符(`;`、`|`、`&`、`` ` ``、`$`、`>`、`<`)同样会被阻止。 + +**自定义允许列表:** + +如果你使用的凭据工具不在默认列表中,可以在全局设置(`~/.gsd/agent/settings.json`)里覆盖: + +```json +{ + "allowedCommandPrefixes": ["pass", "op", "sops", "doppler", "mycli"] +} +``` + +这会完全替换默认列表,因此如果你还想保留默认命令,需要一起写进去。 + +你也可以设置 `GSD_ALLOWED_COMMAND_PREFIXES` 环境变量(逗号分隔)。环境变量优先级高于 settings.json: + +```bash +export GSD_ALLOWED_COMMAND_PREFIXES="pass,op,sops,doppler" +``` + +> **注意:** 这是一个仅全局生效的设置。项目级 settings.json(`/.gsd/settings.json`)不能覆盖命令 allowlist,以防克隆下来的仓库提升命令执行权限。 + +### 自定义 Headers + +```json +{ + "providers": { + "custom-proxy": { + "baseUrl": "https://proxy.example.com/v1", + "apiKey": "MY_API_KEY", + "api": "anthropic-messages", + "headers": { + "x-portkey-api-key": "PORTKEY_API_KEY", + "x-secret": "!op read 'op://vault/item/secret'" + }, + "models": [...] + } + } +} +``` + + +## Model 配置 + +| 字段 | 必填 | 默认值 | 说明 | +|------|------|--------|------| +| `id` | 是 | — | Model 标识符(会原样传给 API) | +| `name` | 否 | `id` | 可读的 model 标签,用于匹配(例如 `--model` 模糊匹配)并显示在详情 / 状态文字里 | +| `api` | 否 | provider 的 `api` | 为这个 model 覆盖 provider 的 API 类型 | +| `reasoning` | 否 | `false` | 是否支持扩展 thinking | +| `input` | 否 | `["text"]` | 输入类型:`["text"]` 或 `["text", "image"]` | +| `contextWindow` | 否 | `128000` | 上下文窗口大小(tokens) | +| `maxTokens` | 否 | `16384` | 最大输出 tokens | +| `cost` | 否 | 全为 0 | `{"input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0}`(每百万 tokens) | +| `compat` | 否 | provider 的 `compat` | OpenAI 兼容性覆盖项。如果 provider 和 model 两边都配置了,会合并 | + +当前行为: + +- `/model` 与 `--list-models` 都是按 model `id` 列出条目 +- 配置里的 `name` 会用于 model 匹配,以及详情 / 状态文本展示 + + +## 覆盖内置 Providers + +如果你想把某个内置 provider 经由代理路由出去,但又不想重新定义全部 models,可以这样写: + +```json +{ + "providers": { + "anthropic": { + "baseUrl": "https://my-proxy.example.com/v1" + } + } +} +``` + +这样所有内置 Anthropic models 仍然可用。已有的 OAuth 或 API key 认证也会继续生效。 + +如果你想把自定义 models 合并进某个内置 provider,就同时提供 `models` 数组: + +```json +{ + "providers": { + "anthropic": { + "baseUrl": "https://my-proxy.example.com/v1", + "apiKey": "ANTHROPIC_API_KEY", + "api": "anthropic-messages", + "models": [...] + } + } +} +``` + +合并规则如下: + +- 内置 models 会保留 +- 自定义 models 会按 `id` 在该 provider 下执行 upsert +- 如果某个自定义 model 的 `id` 与内置 model 相同,自定义 model 会替换那个内置 model +- 如果某个自定义 model 的 `id` 是新的,它会作为新增条目并列出现 + + +## 按 model 覆盖 + +如果你只想修改某些特定的内置 model,而不想替换整个 provider 的 model 列表,可以使用 `modelOverrides`。 + +```json +{ + "providers": { + "openrouter": { + "modelOverrides": { + "anthropic/claude-sonnet-4": { + "name": "Claude Sonnet 4 (Bedrock Route)", + "compat": { + "openRouterRouting": { + "only": ["amazon-bedrock"] + } + } + } + } + } + } +} +``` + +`modelOverrides` 支持的字段包括:`name`、`reasoning`、`input`、`cost`(可部分覆盖)、`contextWindow`、`maxTokens`、`headers`、`compat`。 + +行为说明: + +- `modelOverrides` 只会应用到内置 provider 的 models 上 +- 未知的 model ID 会被忽略 +- 可以把 provider 级别的 `baseUrl` / `headers` 与 `modelOverrides` 组合使用 +- 如果某个 provider 同时定义了 `models`,那么自定义 models 会在应用完内置覆盖后再合并;如果它的 `id` 与已覆盖的内置 model 相同,最终会以自定义 model 为准 + + +## OpenAI 兼容性 + +对于只部分兼容 OpenAI 的 providers,可通过 `compat` 字段修正行为。 + +- provider 级别的 `compat` 会作为该 provider 下所有 models 的默认值 +- model 级别的 `compat` 会覆盖该 model 的 provider 级别设置 + +```json +{ + "providers": { + "local-llm": { + "baseUrl": "http://localhost:8080/v1", + "api": "openai-completions", + "compat": { + "supportsUsageInStreaming": false, + "maxTokensField": "max_tokens" + }, + "models": [...] + } + } +} +``` + +| 字段 | 说明 | +|------|------| +| `supportsStore` | Provider 是否支持 `store` 字段 | +| `supportsDeveloperRole` | 是否使用 `developer` 而非 `system` role | +| `supportsReasoningEffort` | 是否支持 `reasoning_effort` 参数 | +| `reasoningEffortMap` | 把 GSD 的 thinking levels 映射到 provider 专属 `reasoning_effort` 值 | +| `supportsUsageInStreaming` | 是否支持 `stream_options: { include_usage: true }`(默认 `true`) | +| `maxTokensField` | 使用 `max_completion_tokens` 还是 `max_tokens` | +| `requiresToolResultName` | tool result message 中是否必须包含 `name` | +| `requiresAssistantAfterToolResult` | tool result 之后、user message 之前是否需要插入 assistant message | +| `requiresThinkingAsText` | 是否把 thinking block 转成纯文本 | +| `thinkingFormat` | 使用 `reasoning_effort`、`zai`、`qwen` 或 `qwen-chat-template` 的 thinking 参数格式 | +| `supportsStrictMode` | 是否在 tool definitions 中包含 `strict` 字段 | +| `openRouterRouting` | 传给 OpenRouter 的路由配置,用于 model/provider 选择 | +| `vercelGatewayRouting` | Vercel AI Gateway 的路由配置,用于 provider 选择(`only`、`order`) | + +`qwen` 使用顶层 `enable_thinking`。对于要求 `chat_template_kwargs.enable_thinking` 的本地 Qwen-compatible server,请使用 `qwen-chat-template`。 + +示例: + +```json +{ + "providers": { + "openrouter": { + "baseUrl": "https://openrouter.ai/api/v1", + "apiKey": "OPENROUTER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "openrouter/anthropic/claude-3.5-sonnet", + "name": "OpenRouter Claude 3.5 Sonnet", + "compat": { + "openRouterRouting": { + "order": ["anthropic"], + "fallbacks": ["openai"] + } + } + } + ] + } + } +} +``` + +Vercel AI Gateway 示例: + +```json +{ + "providers": { + "vercel-ai-gateway": { + "baseUrl": "https://ai-gateway.vercel.sh/v1", + "apiKey": "AI_GATEWAY_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "moonshotai/kimi-k2.5", + "name": "Kimi K2.5 (Fireworks via Vercel)", + "reasoning": true, + "input": ["text", "image"], + "cost": { "input": 0.6, "output": 3, "cacheRead": 0, "cacheWrite": 0 }, + "contextWindow": 262144, + "maxTokens": 262144, + "compat": { + "vercelGatewayRouting": { + "only": ["fireworks", "novita"], + "order": ["fireworks", "novita"] + } + } + } + ] + } + } +} +``` diff --git a/docs/zh-CN/user-docs/dynamic-model-routing.md b/docs/zh-CN/user-docs/dynamic-model-routing.md new file mode 100644 index 000000000..6d0d90a3e --- /dev/null +++ b/docs/zh-CN/user-docs/dynamic-model-routing.md @@ -0,0 +1,287 @@ +# 动态模型路由 + +*引入于 v2.19.0。Capability scoring 引入于 v2.52.0。* + +动态模型路由会为简单工作自动选择更便宜的模型,并把昂贵模型留给复杂 task。这样在有成本上限的套餐下,通常可以减少 20-50% 的 token 消耗,同时在关键位置保持质量。 + +从 v2.52.0 开始,router 使用 **capability-aware scoring**,为每个 task 选择最合适的 model,而不只是简单挑选该 tier 里最便宜的。 + +## 工作原理 + +自动模式派发的每个工作单元都会经过一个两阶段流水线: + +**阶段 1:复杂度分类**:先把工作划分到某个 tier(light / standard / heavy)。 + +**阶段 2:能力评分**:在符合该 tier 的候选 models 里,根据它们的能力和 task 需求的匹配程度进行排序。 + +核心规则是:**只允许降级,不允许升级**。用户在偏好设置中配置的 model 始终是上限,router 不会把它升级到比你配置更强的 model。 + +| Tier | 典型工作 | 默认模型级别 | +|------|----------|--------------| +| **Light** | slice completion、UAT、hooks | Haiku 级 | +| **Standard** | research、planning、execution、milestone completion | Sonnet 级 | +| **Heavy** | replan、roadmap reassessment、复杂 execution | Opus 级 | + +## 启用方式 + +动态路由默认关闭。可在偏好设置中开启: + +```yaml +--- +version: 1 +dynamic_routing: + enabled: true +--- +``` + +## 配置 + +```yaml +dynamic_routing: + enabled: true + tier_models: # 可选:为每个 tier 显式指定 model + light: claude-haiku-4-5 + standard: claude-sonnet-4-6 + heavy: claude-opus-4-6 + escalate_on_failure: true # task 失败时提升 tier(默认:true) + budget_pressure: true # 接近预算上限时自动降级(默认:true) + cross_provider: true # 可跨 provider 选择 model(默认:true) + hooks: true # 是否对 post-unit hooks 也应用路由(默认:true) + capability_routing: true # 在 tier 内启用 capability scoring(默认:true) +``` + +### `tier_models` + +覆盖每个 tier 默认使用的 model。如果省略,router 会使用内置 capability mapping,它已经知道一些常见 model 家族的大致定位: + +- **Light:** `claude-haiku-4-5`、`gpt-4o-mini`、`gemini-2.0-flash` +- **Standard:** `claude-sonnet-4-6`、`gpt-4o`、`gemini-2.5-pro` +- **Heavy:** `claude-opus-4-6`、`gpt-4.5-preview`、`gemini-2.5-pro` + +### `escalate_on_failure` + +当 task 在某个 tier 上失败时,router 会在重试时提升到下一层:Light → Standard → Heavy。这样可以避免便宜模型在其实需要更强推理能力的工作上浪费重试次数。 + +### `budget_pressure` + +当预算接近上限时,router 会逐步降低 tier: + +| 已使用预算 | 影响 | +|------------|------| +| < 50% | 不调整 | +| 50-75% | Standard → Light | +| 75-90% | 更激进地降级 | +| > 90% | 几乎所有工作都 → Light;只有 Heavy 保持在 Standard | + +### `cross_provider` + +开启后,router 可以从你的主 provider 之外选择 model。它会使用内置成本表,在每个 tier 里找到最便宜的 model。要求目标 provider 已经正确配置。 + +### `capability_routing` + +开启后(默认:true),router 会通过 capability scoring 在某个 tier 内选出“最适合”的 model,而不是永远只选最便宜的那个。设为 `false` 可恢复到纯 cheapest-in-tier 行为: + +```yaml +dynamic_routing: + enabled: true + capability_routing: false # 关闭评分,改用 tier 内最便宜的 model +``` + +## Capability Profiles + +每个 model 都有一个内置的 **capability profile**,它是一个 7 维评分(0-100),表示该 model 在不同 task 类型下的能力强弱: + +| 维度 | 含义 | +|------|------| +| `coding` | 代码生成和实现准确性 | +| `debugging` | 诊断与修复错误的能力 | +| `research` | 信息综合与主题探索能力 | +| `reasoning` | 多步逻辑推理能力 | +| `speed` | 延迟与吞吐(可视为能力深度的反向维度) | +| `longContext` | 处理大代码库和长文档的能力 | +| `instruction` | 精确遵循结构化指令的能力 | + +目前 9 个 models 带有内置 profile:`claude-opus-4-6`、`claude-sonnet-4-6`、`claude-haiku-4-5`、`gpt-4o`、`gpt-4o-mini`、`gemini-2.5-pro`、`gemini-2.0-flash`、`deepseek-chat`、`o3`。 + +没有内置 profile 的 models 会收到**全维度均为 50** 的默认分数。这是一个冷启动策略:未知模型可以参与竞争,但不会凭空占优。从用户角度看,这类模型的路由行为和 capability scoring 引入前保持一致。 + +**这些 profiles 是启发式排序,不是 benchmark。** 它们表达的是大致的相对优势,而不是经过严格验证的 benchmark 结果。如果你很了解某个 model,可通过用户覆盖项(见下文)修正这些分值。 + +## 评分方式 + +tier 内的路由流程如下: + +``` +classify complexity tier + ↓ +filter eligible models for tier + ↓ +fire before_model_select hook (optional override) + ↓ +capability score eligible models + ↓ +select winner (or first eligible if scoring is disabled) +``` + +**评分公式:** 各能力维度的加权平均 + +``` +score = Σ(weight × capability) / Σ(weights) +``` + +**Task requirements** 是动态的,不同 unit types 对维度的权重不同: + +| Unit Type | 核心维度 | +|-----------|----------| +| `execute-task` | coding (0.9)、instruction (0.7)、speed (0.3) | +| `research-*` | research (0.9)、longContext (0.7)、reasoning (0.5) | +| `plan-*` | reasoning (0.9)、coding (0.5) | +| `replan-slice` | reasoning (0.9)、debugging (0.6)、coding (0.5) | +| `complete-slice`、`run-uat` | instruction (0.8)、speed (0.7) | + +对于 `execute-task`,router 还会进一步根据 task metadata 微调需求: + +- 带有 `docs`、`config`、`readme` 等 tag:提高 instruction 权重 +- 包含 `concurrency`、`compatibility` 等关键词:提高 debugging 和 reasoning 权重 +- 包含 `migration`、`architecture` 等关键词:提高 reasoning 和 coding 权重 +- 文件数较多(≥6)或估计行数较大(≥500):提高 coding 和 reasoning 权重 + +**平分时的决策:** 当两个 models 的得分相差不超过 2 分时,优先选择更便宜的那个。如果成本也相同,则按 model ID 字典序打破平局(确定性结果)。 + +## 用户覆盖 + +如果你对某个 model 的能力认知比内置 profile 更准确,可以通过 `models` 配置里的 `modelOverrides` 修正: + +```json +{ + "providers": { + "anthropic": { + "modelOverrides": { + "claude-sonnet-4-6": { + "capabilities": { + "debugging": 90, + "research": 85 + } + } + } + } + } +} +``` + +这些覆盖会与内置默认值进行**深度合并**:你只需覆盖指定维度,未指定的维度仍保留内置值。 + +**典型用法:** 如果你发现某个 model 在某一类工作上持续优于内置 profile,就覆盖对应维度,把 router 更积极地引导到该 model。 + +## 详细输出 + +开启 verbose mode 时,router 会把自己的路由决策打印出来。如果使用了 capability scoring,日志会包含完整评分拆分: + +``` +Dynamic routing [S]: claude-sonnet-4-6 (capability-scored) — claude-sonnet-4-6: 82.3, gpt-4o: 78.1, deepseek-chat: 72.0 +``` + +如果只使用了 tier 级路由(例如评分被禁用、只有一个符合条件的 model,或命中了路由守卫): + +``` +Dynamic routing [S]: claude-sonnet-4-6 (standard complexity, multiple steps) +``` + +路由决策中的 `selectionMethod` 字段会说明采用了哪种路径: + +- `"capability-scored"`:使用 capability scoring 选出了最终 model +- `"tier-only"`:使用了 tier 内最便宜的 model(或显式固定值) + +## 扩展 Hook + +扩展可以通过 `before_model_select` hook 拦截并覆盖 model 选择。 + +Hook 触发时机在 **tier 过滤之后**(已知符合条件的 models),但在 **capability scoring 之前**(尚未计算分数)。Hook 可以完全接管选择,也可以返回 `undefined`,让 scoring 按默认逻辑继续。 + +**注册处理器:** + +```typescript +pi.on("before_model_select", async (event) => { + const { unitType, unitId, classification, taskMetadata, eligibleModels, phaseConfig } = event; + + // 自定义路由策略:research 一律优先用 gemini + if (unitType.startsWith("research-")) { + const gemini = eligibleModels.find(id => id.includes("gemini")); + if (gemini) return { modelId: gemini }; + } + + // 返回 undefined,让 capability scoring 继续 + return undefined; +}); +``` + +**事件负载:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| `unitType` | `string` | 当前派发单元类型(例如 `"execute-task"`) | +| `unitId` | `string` | 此次单元派发的唯一标识符 | +| `classification` | `{ tier, reason, downgraded }` | 复杂度分类结果 | +| `taskMetadata` | `Record \| undefined` | 从单元 plan 中提取出的 task 元数据 | +| `eligibleModels` | `string[]` | 符合该 tier 的 models | +| `phaseConfig` | `{ primary, fallbacks } \| undefined` | 用户为该 phase 配置的 model | + +**返回值:** `{ modelId: string }` 表示覆盖默认选择;返回 `undefined` 表示交给 capability scoring。 + +**第一个覆盖者生效:** 如果多个扩展都注册了处理器,第一个返回非 `undefined` 的处理器获胜,后续处理器不会再被调用。 + +## 复杂度分类 + +工作单元通过纯启发式规则分类,不涉及 LLM 调用,耗时通常低于 1ms。 + +### Unit Type 默认值 + +| Unit Type | 默认 Tier | +|-----------|-----------| +| `complete-slice`、`run-uat` | Light | +| `research-*`、`plan-*`、`complete-milestone` | Standard | +| `execute-task` | Standard(可被 task 分析升级) | +| `replan-slice`、`reassess-roadmap` | Heavy | +| `hook/*` | Light | + +### Task Plan 分析 + +对于 `execute-task` 单元,分类器会分析 task plan: + +| 信号 | 简单 → Light | 复杂 → Heavy | +|------|--------------|--------------| +| Step 数量 | ≤ 3 | ≥ 8 | +| 文件数 | ≤ 3 | ≥ 8 | +| 描述长度 | < 500 chars | > 2000 chars | +| 代码块数 | — | ≥ 5 | +| 复杂度关键词 | 无 | 有 | + +**复杂度关键词:** `research`、`investigate`、`refactor`、`migrate`、`integrate`、`complex`、`architect`、`redesign`、`security`、`performance`、`concurrent`、`parallel`、`distributed`、`backward compat` + +### 自适应学习 + +路由历史(`.gsd/routing-history.json`)会按 unit type 和 tier 记录成功 / 失败情况。如果某种模式下某个 tier 的失败率超过 20%,未来相似分类会自动上调一个 tier。用户反馈(`over` / `under` / `ok`)的权重是自动结果的 2 倍。 + +## 与 Token Profile 的关系 + +动态路由和 token profile 是互补的: + +- **Token profiles**(`budget` / `balanced` / `quality`)控制阶段跳过和上下文压缩 +- **Dynamic routing** 控制每个工作单元在对应 phase 内的 model 选择 + +两者同时开启时,token profile 负责给出基础模型集,dynamic routing 再在这些基础之上做进一步优化。`budget` token profile + dynamic routing 组合能带来最大的成本节省。 + +## 成本表 + +Router 内置了一张常见 models 的成本表,用于跨 provider 成本比较。成本单位都是每百万 tokens(input / output): + +| Model | Input | Output | +|-------|-------|--------| +| claude-haiku-4-5 | $0.80 | $4.00 | +| claude-sonnet-4-6 | $3.00 | $15.00 | +| claude-opus-4-6 | $15.00 | $75.00 | +| gpt-4o-mini | $0.15 | $0.60 | +| gpt-4o | $2.50 | $10.00 | +| gemini-2.0-flash | $0.10 | $0.40 | + +这张成本表仅用于比较,实际计费仍然来自你所使用的 provider。 diff --git a/docs/zh-CN/user-docs/getting-started.md b/docs/zh-CN/user-docs/getting-started.md new file mode 100644 index 000000000..b2e725b6a --- /dev/null +++ b/docs/zh-CN/user-docs/getting-started.md @@ -0,0 +1,473 @@ +# GSD 快速开始 + +GSD 是一个 AI 编程代理,负责规划、执行、验证和交付,让你可以把注意力放在“要构建什么”上。本指南会带你完成 macOS、Windows 和 Linux 的安装,并启动你的第一个会话。 + +--- + +## 前置条件 + +| 要求 | 最低版本 | 推荐版本 | +|------|----------|----------| +| **[Node.js](https://nodejs.org/)** | 22.0.0 | 24 LTS | +| **[Git](https://git-scm.com/)** | 2.20+ | 最新版 | +| **LLM API key** | 任意受支持提供商 | Anthropic(Claude) | + +如果你还没有安装 Node.js 或 Git,请按下面对应操作系统的步骤进行。 + +--- + +## 按操作系统安装 + +### macOS + +> **下载链接:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/mac) | [Homebrew](https://brew.sh/) + +**第 1 步:安装 Homebrew**(如果已安装可跳过): + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +``` + +**第 2 步:安装 Node.js 和 Git:** + +```bash +brew install node git +``` + +**第 3 步:验证依赖已安装:** + +```bash +node --version # 应输出 v22.x 或更高 +git --version # 应输出 2.20+ +``` + +**第 4 步:安装 GSD:** + +```bash +npm install -g gsd-pi +``` + +**第 5 步:设置你的 LLM provider:** + +```bash +# 选项 A:设置环境变量(推荐 Anthropic) +export ANTHROPIC_API_KEY="sk-ant-..." + +# 选项 B:使用内置配置向导 +gsd config +``` + +如果想永久保存这个 key,把 export 语句写入 `~/.zshrc`: + +```bash +echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.zshrc +source ~/.zshrc +``` + +所有 20+ provider 的完整配置方式请见 [提供商设置指南](./providers.md)。 + +**第 6 步:启动 GSD:** + +```bash +cd ~/my-project # 进入任意项目目录 +gsd # 启动一个会话 +``` + +**第 7 步:确认一切正常:** + +```bash +gsd --version # 输出已安装版本 +``` + +进入会话后,输入 `/model` 以确认你的 LLM 已成功连接。 + +> **Apple Silicon PATH 修复:** 如果安装后找不到 `gsd`,可能是 npm 的全局 bin 目录没有加入 PATH: +> ```bash +> echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc +> source ~/.zshrc +> ``` + +> **oh-my-zsh 冲突:** oh-my-zsh 的 git 插件定义了 `alias gsd='git svn dcommit'`。可在 `~/.zshrc` 中加入 `unalias gsd 2>/dev/null`,或者改用 `gsd-cli`。 + +--- + +### Windows + +> **下载链接:** [Node.js](https://nodejs.org/) | [Git for Windows](https://git-scm.com/download/win) | [Windows Terminal](https://aka.ms/terminal) + +#### 选项 A:使用 winget(推荐 Windows 10/11) + +**第 1 步:安装 Node.js 和 Git:** + +```powershell +winget install OpenJS.NodeJS.LTS +winget install Git.Git +``` + +**第 2 步:重启终端**(关闭并重新打开 PowerShell 或 Windows Terminal)。 + +**第 3 步:验证依赖已安装:** + +```powershell +node --version # 应输出 v22.x 或更高 +git --version # 应输出 2.20+ +``` + +**第 4 步:安装 GSD:** + +```powershell +npm install -g gsd-pi +``` + +**第 5 步:设置你的 LLM provider:** + +```powershell +# 选项 A:设置环境变量(仅当前会话) +$env:ANTHROPIC_API_KEY = "sk-ant-..." + +# 选项 B:使用内置配置向导 +gsd config +``` + +如果要永久保存该 key,可在系统设置的环境变量中添加,或者执行: + +```powershell +[System.Environment]::SetEnvironmentVariable("ANTHROPIC_API_KEY", "sk-ant-...", "User") +``` + +所有 20+ provider 的完整配置方式请见 [提供商设置指南](./providers.md)。 + +**第 6 步:启动 GSD:** + +```powershell +cd C:\Users\you\my-project # 进入任意项目目录 +gsd # 启动一个会话 +``` + +**第 7 步:确认一切正常:** + +```powershell +gsd --version # 输出已安装版本 +``` + +进入会话后,输入 `/model` 以确认你的 LLM 已成功连接。 + +#### 选项 B:手动安装 + +1. 下载并安装 [Node.js LTS](https://nodejs.org/),安装时勾选 **“Add to PATH”** +2. 下载并安装 [Git for Windows](https://git-scm.com/download/win),使用默认选项 +3. 打开一个**新的**终端,然后继续执行上面的第 3-7 步 + +> **Windows 提示:** +> - 建议使用 **Windows Terminal** 或 **PowerShell**,体验最佳。Command Prompt 也能用,但颜色支持较弱。 +> - 如果 `gsd` 无法识别,先重启终端。Windows 需要新开终端才能读取更新后的 PATH。 +> - **WSL2** 也可用,安装 WSL 后,在发行版内部按 Linux 说明继续。 + +--- + +### Linux + +> **下载链接:** [Node.js](https://nodejs.org/) | [Git](https://git-scm.com/download/linux) | [nvm](https://github.com/nvm-sh/nvm) + +先确认你的发行版,然后按对应步骤安装。 + +#### Ubuntu / Debian + +**第 1 步:安装 Node.js 和 Git:** + +```bash +curl -fsSL https://deb.nodesource.com/setup_24.x | sudo -E bash - +sudo apt-get install -y nodejs git +``` + +#### Fedora / RHEL / CentOS + +**第 1 步:安装 Node.js 和 Git:** + +```bash +curl -fsSL https://rpm.nodesource.com/setup_24.x | sudo bash - +sudo dnf install -y nodejs git +``` + +#### Arch Linux + +**第 1 步:安装 Node.js 和 Git:** + +```bash +sudo pacman -S nodejs npm git +``` + +#### 使用 nvm(任意发行版) + +**第 1 步:先安装 nvm,再安装 Node.js:** + +```bash +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.0/install.sh | bash +source ~/.bashrc # 或 ~/.zshrc +nvm install 24 +nvm use 24 +``` + +#### 所有发行版:第 2-7 步 + +**第 2 步:验证依赖已安装:** + +```bash +node --version # 应输出 v22.x 或更高 +git --version # 应输出 2.20+ +``` + +**第 3 步:安装 GSD:** + +```bash +npm install -g gsd-pi +``` + +**第 4 步:设置你的 LLM provider:** + +```bash +# 选项 A:设置环境变量(推荐 Anthropic) +export ANTHROPIC_API_KEY="sk-ant-..." + +# 选项 B:使用内置配置向导 +gsd config +``` + +如果想永久保存这个 key,把 export 语句写到 `~/.bashrc`(或 `~/.zshrc`)中: + +```bash +echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.bashrc +source ~/.bashrc +``` + +所有 20+ provider 的完整配置方式请见 [提供商设置指南](./providers.md)。 + +**第 5 步:启动 GSD:** + +```bash +cd ~/my-project # 进入任意项目目录 +gsd # 启动一个会话 +``` + +**第 6 步:确认一切正常:** + +```bash +gsd --version # 输出已安装版本 +``` + +进入会话后,输入 `/model` 以确认你的 LLM 已成功连接。 + +> **`npm install -g` 遇到权限错误?** 不要用 `sudo npm`。应改为修复 npm 的全局目录: +> ```bash +> mkdir -p ~/.npm-global +> npm config set prefix '~/.npm-global' +> echo 'export PATH="$HOME/.npm-global/bin:$PATH"' >> ~/.bashrc +> source ~/.bashrc +> npm install -g gsd-pi +> ``` + +--- + +### Docker(任意操作系统) + +> **下载链接:** [Docker Desktop](https://www.docker.com/products/docker-desktop/) + +如果你不想在宿主机安装 Node.js,可以在隔离沙箱中运行 GSD。 + +**第 1 步:安装 Docker Desktop**(要求 4.58+)。 + +**第 2 步:克隆 GSD 仓库:** + +```bash +git clone https://github.com/gsd-build/gsd-2.git +cd gsd-2/docker +``` + +**第 3 步:创建并进入沙箱:** + +```bash +docker sandbox create --template . --name gsd-sandbox +docker sandbox exec -it gsd-sandbox bash +``` + +**第 4 步:设置 API key 并运行 GSD:** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +gsd auto "implement the feature described in issue #42" +``` + +完整的配置、资源限制和 compose 文件请见 [Docker Sandbox 文档](../../../docker/README.md)。 + +--- + +## 安装之后 + +### 选择模型 + +完成 provider 设置后,GSD 会自动选择一个默认模型。你可以在会话中随时切换: + +``` +/model +``` + +也可以在偏好设置中按阶段配置模型,详见 [配置](./configuration.md)。 + +--- + +## 两种工作方式 + +### 步骤模式 — `/gsd` + +在会话内输入 `/gsd`。GSD 会一次执行一个工作单元,并在每一步之间暂停,通过向导展示刚完成了什么、下一步是什么。 + +- **没有 `.gsd/` 目录**:启动讨论流程,先收集你的项目愿景 +- **已有 milestone,但没有 roadmap**:讨论或研究该 milestone +- **roadmap 已存在,仍有待完成的 slices**:规划下一个 slice 或执行一个 task +- **进行到一半的 task**:从上次停下的地方继续 + +步骤模式会让你始终留在回路中,在每一步之间查看和确认输出。 + +### 自动模式 — `/gsd auto` + +输入 `/gsd auto` 后就可以离开。GSD 会自主完成 research、planning、execution、verification、commit,并持续推进每个 slice,直到 milestone 完成。 + +``` +/gsd auto +``` + +完整细节请见 [自动模式](./auto-mode.md)。 + +--- + +## 推荐工作流:两个终端 + +一个终端跑自动模式,另一个终端负责引导和干预。 + +**终端 1:让它构建** + +```bash +gsd +/gsd auto +``` + +**终端 2:在它工作时进行引导** + +```bash +gsd +/gsd discuss # 讨论架构决策 +/gsd status # 查看进度 +/gsd queue # 排队下一个 milestone +``` + +两个终端都会读写同一套 `.gsd/` 文件。你在终端 2 里做出的决策,会在下一个阶段边界被自动拾取。 + +--- + +## GSD 如何组织工作 + +``` +Milestone → 一个可交付版本(4-10 个 slice) + Slice → 一个可演示的垂直能力(1-7 个 task) + Task → 一个适合单个上下文窗口的工作单元 +``` + +铁律是:**一个 task 必须能装进一个上下文窗口。** 装不下,就说明它应该拆成两个 task。 + +所有状态都保存在 `.gsd/` 中: + +``` +.gsd/ + PROJECT.md — 项目当前是什么 + REQUIREMENTS.md — 需求契约 + DECISIONS.md — 追加式架构决策记录 + KNOWLEDGE.md — 跨会话规则与模式 + STATE.md — 一眼可见的状态摘要 + milestones/ + M001/ + M001-ROADMAP.md — 带依赖关系的 slice 计划 + slices/ + S01/ + S01-PLAN.md — task 拆解 + S01-SUMMARY.md — 实际发生了什么 +``` + +--- + +## VS Code 扩展 + +GSD 也提供 VS Code 扩展。你可以从扩展市场安装(publisher: FluxLabs),或者在 VS Code 扩展面板中直接搜索 “GSD”: + +- **`@gsd` 聊天参与者**:在 VS Code Chat 中直接与 agent 对话 +- **侧边栏仪表板**:显示连接状态、模型信息、Token 使用量 +- **完整命令面板**:启动 / 停止 agent、切换模型、导出会话 + +CLI(`gsd-pi`)需要先安装好,扩展会通过 RPC 与其连接。 + +--- + +## Web 界面 + +GSD 也提供一个基于浏览器的可视化项目管理界面: + +```bash +gsd --web +``` + +详见 [Web 界面](./web-interface.md)。 + +--- + +## 恢复会话 + +```bash +gsd --continue # 或 gsd -c +``` + +会恢复当前目录最近一次会话。 + +浏览所有保存过的会话: + +```bash +gsd sessions +``` + +--- + +## 更新 GSD + +GSD 每 24 小时检查一次更新,并在启动时提示。你也可以手动更新: + +```bash +npm update -g gsd-pi +``` + +或者在会话中执行: + +``` +/gsd update +``` + +--- + +## 快速排障 + +| 问题 | 解决方式 | +|------|----------| +| `command not found: gsd` | 把 npm 全局 bin 目录加入 PATH(见上面的系统说明) | +| `gsd` 实际执行了 `git svn dcommit` | oh-my-zsh 冲突,执行 `unalias gsd` 或改用 `gsd-cli` | +| `npm install -g gsd-pi` 权限错误 | 修复 npm prefix(见 Linux 说明)或改用 nvm | +| 无法连接到 LLM | 用 `gsd config` 检查 API key,并确认网络可用 | +| `gsd` 启动时卡住 | 检查 Node.js 版本:`node --version`(需要 22+) | + +更多问题见 [故障排查](./troubleshooting.md)。 + +--- + +## 下一步 + +- [自动模式](./auto-mode.md):深入理解自主执行 +- [配置](./configuration.md):模型选择、超时和预算 +- [命令参考](./commands.md):所有命令和快捷键 +- [提供商设置](./providers.md):每个 provider 的详细配置 +- [团队协作](./working-in-teams.md):多开发者工作流 diff --git a/docs/zh-CN/user-docs/git-strategy.md b/docs/zh-CN/user-docs/git-strategy.md new file mode 100644 index 000000000..5f713610a --- /dev/null +++ b/docs/zh-CN/user-docs/git-strategy.md @@ -0,0 +1,186 @@ +# Git 策略 + +GSD 使用 git 来实现 milestone 隔离,以及每个 milestone 内部的顺序提交。你可以通过 **isolation mode** 控制工作发生在哪里。整个策略是自动化的,你不需要手工管理分支。 + +## 隔离模式 + +GSD 支持三种隔离模式,通过 `git.isolation` 偏好设置: + +| 模式 | 工作目录 | 分支 | 适用场景 | +|------|----------|------|----------| +| `worktree`(默认) | `.gsd/worktrees//` | `milestone/` | 大多数项目,milestones 之间文件完全隔离 | +| `branch` | 项目根目录 | `milestone/` | 子模块较多、worktree 表现不佳的仓库 | +| `none` | 项目根目录 | 当前分支(不建 milestone 分支) | 热重载工作流中,文件隔离会破坏开发工具的场景 | + +### `worktree` 模式(默认) + +每个 milestone 都会在 `.gsd/worktrees//` 下拥有自己的 git worktree,对应一个 `milestone/` 分支。所有执行都发生在该 worktree 中。完成后,worktree 会被 squash merge 回主分支,形成一个干净的提交,然后清理对应 worktree 和分支。 + +这提供了完整的文件隔离,某个 milestone 的变更不会干扰你的主工作副本。 + +### `branch` 模式 + +工作直接在项目根目录中的 `milestone/` 分支上进行,不会创建 worktree。完成后,该分支会被合并回主分支(是 squash merge 还是普通 merge 由 `merge_strategy` 控制)。 + +当 worktree 会带来问题时使用它,例如:子模块较多的仓库、包含硬编码路径的仓库、或者 worktree symlink 表现异常的环境。 + +### `none` 模式 + +工作直接发生在当前分支。没有 worktree,也没有 milestone 分支。GSD 依然会按顺序提交,并使用 conventional commit message,但不会提供分支级隔离。 + +适用于热重载工作流中“文件隔离会破坏开发工具”的情况(例如只能监视项目根目录的文件监听器),或者很小的项目里不值得承担分支开销的情况。 + +## 分支模型(worktree 模式) + +``` +main ───────────────────────────────────────────────────────── + │ ↑ + └── milestone/M001 (worktree) ────────────────────────┘ + commit: feat: core types + commit: feat: markdown parser + commit: feat: file writer + commit: docs: workflow docs + ... + → squash-merged to main as single commit +``` + +在 **branch 模式** 下,流程相同,只是工作发生在项目根目录而不是独立的 worktree 目录。 + +在 **none 模式** 下,提交直接落到当前分支,不会创建 milestone 分支,也不需要合并步骤。 + +### 并行 worktrees + +如果启用了 [并行编排](./parallel-orchestration.md),多个 milestones 可以同时运行在各自独立的 worktree 中: + +``` +main ────────────────────────────────────────────────────────── + │ ↑ ↑ + ├── milestone/M002 (worktree) ─────────┘ │ + │ commit: feat: auth types │ + │ commit: feat: JWT middleware │ + │ → squash-merged first │ + │ │ + └── milestone/M003 (worktree) ────────────────────────┘ + commit: feat: dashboard layout + commit: feat: chart components + → squash-merged second +``` + +每个 worktree 都工作在自己的分支和自己的提交历史上。为了避免冲突,合并会顺序进行。 + +### 关键特性 + +- **单分支顺序提交**:没有按 slice 单独分支,也不会在单个 milestone 内产生合并冲突 +- **Squash merge 到主分支**:在 worktree 和 branch 模式下,所有提交最终都会以一个干净的提交压缩到主分支(可通过 `merge_strategy` 配置) + +### 提交格式 + +提交使用 conventional commit 格式,并在 trailer 中带上 GSD 元数据: + +``` +feat: core type definitions + +GSD-Task: M001/S01/T01 + +feat: markdown parser for plan files + +GSD-Task: M001/S01/T02 +``` + +## Worktree 管理 + +以下特性仅适用于 **worktree 模式**。 + +### 自动(自动模式) + +自动模式会自动创建并管理 worktrees: + +1. milestone 启动时,在 `.gsd/worktrees//` 创建 worktree,并切到 `milestone/` 分支 +2. 将 `.gsd/milestones/` 下的规划产物复制到该 worktree +3. 所有执行都发生在 worktree 内部 +4. milestone 完成后,把该 worktree squash merge 回集成分支 +5. 删除 worktree 和对应分支 + +### 手动 + +使用 `/worktree`(或 `/wt`)命令手动管理 worktree: + +``` +/worktree create +/worktree switch +/worktree merge +/worktree remove +``` + +## 工作流模式 + +如果不想逐个配置 git 设置,可以通过 `mode` 获得一组更合理的默认值: + +```yaml +mode: solo # 个人项目:自动推送、squash、简单 ID +mode: team # 共享仓库:唯一 ID、推送分支、预合并检查 +``` + +| 设置 | `solo` | `team` | +|---|---|---| +| `git.auto_push` | `true` | `false` | +| `git.push_branches` | `false` | `true` | +| `git.pre_merge_check` | `false` | `true` | +| `git.merge_strategy` | `"squash"` | `"squash"` | +| `git.isolation` | `"worktree"` | `"worktree"` | +| `git.commit_docs` | `true` | `true` | +| `unique_milestone_ids` | `false` | `true` | + +Mode 默认值的优先级最低,任何显式偏好设置都会覆盖它们。例如,`mode: solo` 配合 `git.auto_push: false`,就表示除了自动推送以外,其它行为都沿用 solo 的默认配置。 + +已有但未设置 `mode` 的配置会保持原样,不会被自动注入新默认值。 + +## Git 偏好设置 + +可以在偏好设置中配置 git 行为: + +```yaml +git: + auto_push: false # 提交后推送 + push_branches: false # 推送 milestone 分支 + remote: origin + snapshots: false # WIP 快照提交 + pre_merge_check: false # 合并前校验 + commit_type: feat # 覆盖提交类型前缀 + main_branch: main # 主分支名称 + commit_docs: true # 将 .gsd/ 提交到 git + isolation: worktree # "worktree"、"branch" 或 "none" + auto_pr: false # milestone 完成时自动创建 PR + pr_target_branch: develop # PR 目标分支(默认 main) +``` + +### 自动创建 Pull Request + +对于使用 Gitflow 或分支工作流的团队,GSD 可以在 milestone 完成时自动创建 pull request: + +```yaml +git: + auto_push: true + auto_pr: true + pr_target_branch: develop +``` + +这样会把 milestone 分支推送到远程,并创建一个目标分支为 `develop`(或你指定的其它分支)的 PR。要求已安装并认证 `gh` CLI。详见 [git.auto_pr](./configuration.md#gitauto_pr)。 + +### `commit_docs: false` + +当设置为 `false` 时,GSD 会把 `.gsd/` 添加到 `.gitignore`,所有规划产物只保留在本地。适合只有部分成员使用 GSD 的团队,或者公司要求仓库保持干净的场景。 + +## 自愈能力 + +GSD 内置了对常见 git 问题的自动恢复: + +- **Detached HEAD**:自动重新附着到正确分支 +- **过期锁文件**:移除崩溃进程残留的 `index.lock` +- **孤儿 worktree**:检测并提供清理废弃 worktree 的选项(仅 worktree 模式) + +可通过 `/gsd doctor` 手动检查 git 健康状态。 + +## 原生 Git 操作 + +从 v2.16 起,GSD 在派发热路径中的读密集 git 操作改用 libgit2 原生绑定。这消除了每次派发周期中约 70 次进程拉起,从而提升了自动模式吞吐量。 diff --git a/docs/zh-CN/user-docs/migration.md b/docs/zh-CN/user-docs/migration.md new file mode 100644 index 000000000..56dd61d7f --- /dev/null +++ b/docs/zh-CN/user-docs/migration.md @@ -0,0 +1,48 @@ +# 从 v1 迁移 + +如果你有仍在使用原始 Get Shit Done(v1)`.planning` 目录结构的项目,可以把它们迁移到 GSD-2 的 `.gsd` 格式。 + +## 运行迁移 + +```bash +# 在项目目录内执行 +/gsd migrate + +# 或者显式指定路径 +/gsd migrate ~/projects/my-old-project +``` + +## 会迁移什么 + +迁移工具会: + +- 解析旧版的 `PROJECT.md`、`ROADMAP.md`、`REQUIREMENTS.md`、phase 目录、计划、总结和研究文档 +- 将 phases 映射为 slices、plans 映射为 tasks、milestones 映射为 milestones +- 保留完成状态(`[x]` 阶段保持已完成,原有 summary 会被带过来) +- 将研究文件整合进新的目录结构 +- 在真正写入前先展示预览 +- 可选运行一次由 agent 驱动的结果审查,以做质量保证 + +## 支持的格式 + +迁移器可处理多种 v1 文档变体: + +- 按 milestone 分段、带 `
` 块的 roadmap +- 粗体 phase 条目 +- 列表格式的 requirements +- 十进制 phase 编号 +- 跨不同 milestones 重复的 phase 编号 + +## 前提条件 + +如果项目有 `ROADMAP.md` 来描述 milestone 结构,迁移效果最好。没有的话,系统会根据 `phases/` 目录推断 milestones。 + +## 迁移后 + +迁移完成后,用下面的命令检查输出结果: + +```bash +/gsd doctor +``` + +它会检查 `.gsd/` 的完整性,并标出任何结构性问题。 diff --git a/docs/zh-CN/user-docs/node-lts-macos.md b/docs/zh-CN/user-docs/node-lts-macos.md new file mode 100644 index 000000000..f23b19ff0 --- /dev/null +++ b/docs/zh-CN/user-docs/node-lts-macos.md @@ -0,0 +1,75 @@ +# 在 macOS 上通过 Homebrew 固定 Node.js LTS 版本 + +如果你是通过 Homebrew 安装 Node.js(`brew install node`),那你跟踪的是**当前最新正式版本**,其中可能包含奇数版本的开发分支(例如 23.x、25.x)。这些版本并不是 LTS,可能带来破坏性变更或稳定性问题。 + +GSD 要求 Node.js **v22 或更高版本**,并且在 **LTS(偶数版本)** 上运行效果最好。本指南展示如何用 Homebrew 固定到 Node 24 LTS。 + +## 检查当前版本 + +```bash +node --version +``` + +如果输出的是奇数主版本号(例如 `v23.x`、`v25.x`),说明你当前使用的是开发版。 + +## 安装 Node 24 LTS + +Homebrew 为 LTS 版本提供了带版本号的 formula: + +```bash +# 取消当前版本(可能不是 LTS)的链接 +brew unlink node + +# 安装 Node 24 LTS +brew install node@24 + +# 将它设为默认版本 +brew link --overwrite node@24 +``` + +验证: + +```bash +node --version +# 应显示 v24.x.x +``` + +## 为什么要固定到 LTS? + +- **稳定性**:LTS 版本会在 30 个月内持续收到 bug 修复和安全更新 +- **兼容性**:包括 GSD 在内的 npm 包通常都会优先测试 LTS 版本 +- **可预期**:`brew upgrade` 不会把你突然升级到不稳定的开发版 + +## 防止误升级 + +默认情况下,`brew upgrade` 会升级所有包,这可能让你离开固定版本。可以把对应 formula pin 住: + +```bash +brew pin node@24 +``` + +如果以后想取消固定: + +```bash +brew unpin node@24 +``` + +## 在多个版本之间切换 + +如果你需要同时使用多个 Node 版本(例如 22 和 24),更推荐使用版本管理器: + +- **[nvm](https://github.com/nvm-sh/nvm)**:`nvm install 24 && nvm use 24` +- **[fnm](https://github.com/Schniz/fnm)**:`fnm install 24 && fnm use 24`(更快,基于 Rust) +- **[mise](https://mise.jdx.dev/)**:`mise use node@24`(多语言版本管理器) + +这些工具允许你通过 `.node-version` 或 `.nvmrc` 为不同项目设置独立的 Node 版本。 + +## 验证 GSD 是否正常工作 + +固定版本后,执行: + +```bash +node --version # v24.x.x +npm install -g gsd-pi +gsd --version +``` diff --git a/docs/zh-CN/user-docs/parallel-orchestration.md b/docs/zh-CN/user-docs/parallel-orchestration.md new file mode 100644 index 000000000..0b75c0275 --- /dev/null +++ b/docs/zh-CN/user-docs/parallel-orchestration.md @@ -0,0 +1,310 @@ +# 并行 Milestone 编排 + +在隔离的 git worktrees 中同时运行多个 milestones。每个 milestone 都拥有自己的 worker 进程、自己的分支和自己的上下文窗口;同时还会有一个 coordinator 跟踪进度、执行预算限制并保持整体同步。 + +> **状态:** 该功能默认处于 `parallel.enabled: false`。属于显式 opt-in,对现有用户零影响。 + +## 快速开始 + +1. 在偏好设置中开启并行模式: + +```yaml +--- +parallel: + enabled: true + max_workers: 2 +--- +``` + +2. 启动并行执行: + +``` +/gsd parallel start +``` + +GSD 会扫描所有 milestones,检查依赖与文件重叠,给出一份可并行性报告,并为符合条件的 milestones 启动 workers。 + +3. 监控进度: + +``` +/gsd parallel status +``` + +4. 完成后停止: + +``` +/gsd parallel stop +``` + +## 工作原理 + +### 架构 + +``` +┌─────────────────────────────────────────────────────────┐ +│ Coordinator(你的 GSD 会话) │ +│ │ +│ 职责: │ +│ - 可并行性分析(依赖 + 文件重叠) │ +│ - Worker 启动与生命周期管理 │ +│ - 全部 workers 的预算跟踪 │ +│ - 派发控制信号(pause / resume / stop) │ +│ - 会话状态监控 │ +│ - Merge 对账 │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Worker 1 │ │ Worker 2 │ │ Worker 3 │ ... │ +│ │ M001 │ │ M003 │ │ M005 │ │ +│ └──────────┘ └──────────┘ └──────────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ .gsd/worktrees/ .gsd/worktrees/ .gsd/worktrees/ │ +│ M001/ M003/ M005/ │ +│ (milestone/ (milestone/ (milestone/ │ +│ M001 branch) M003 branch) M005 branch) │ +└─────────────────────────────────────────────────────────┘ +``` + +### Worker 隔离 + +每个 worker 都是一个完全隔离的独立 `gsd` 进程: + +| 资源 | 隔离方式 | +|------|----------| +| **文件系统** | Git worktree:每个 worker 都有自己的 checkout | +| **Git 分支** | `milestone/`:每个 milestone 一条分支 | +| **状态推导** | 通过 `GSD_MILESTONE_LOCK` 环境变量,让 `deriveState()` 只看到被分配的 milestone | +| **上下文窗口** | 独立进程:每个 worker 都有自己的 agent sessions | +| **指标** | 每个 worktree 都有自己的 `.gsd/metrics.json` | +| **崩溃恢复** | 每个 worktree 都有自己的 `.gsd/auto.lock` | + +### 协调方式 + +Workers 和 coordinator 通过基于文件的 IPC 通信: + +- **会话状态文件**(`.gsd/parallel/.status.json`):worker 写入 heartbeat,coordinator 读取 +- **信号文件**(`.gsd/parallel/.signal.json`):coordinator 写信号,worker 消费 +- **原子写入**:使用写临时文件再 rename 的方式,避免读到半成品 + +## 可并行性分析 + +在真正启动并行执行之前,GSD 会先检查哪些 milestones 可以安全并发运行。 + +### 规则 + +1. **未完成**:已完成的 milestones 会被跳过 +2. **依赖满足**:所有 `dependsOn` 指向的 milestones 都必须已处于 `complete` +3. **文件重叠检查**:如果多个 milestones 会触碰同一批文件,会给出警告(但仍可执行) + +### 示例报告 + +``` +# Parallel Eligibility Report + +## Eligible for Parallel Execution (2) + +- **M002** — Auth System + All dependencies satisfied. +- **M003** — Dashboard UI + All dependencies satisfied. + +## Ineligible (2) + +- **M001** — Core Types + Already complete. +- **M004** — API Integration + Blocked by incomplete dependencies: M002. + +## File Overlap Warnings (1) + +- **M002** <-> **M003** — 2 shared file(s): + - `src/types.ts` + - `src/middleware.ts` +``` + +文件重叠只是警告,不是阻断条件。因为两个 milestones 会运行在各自独立的 worktree 中,它们不会在文件系统层面互相干扰。真正的冲突会在 merge 阶段被检测和处理。 + +## 配置 + +把下面内容加到 `~/.gsd/PREFERENCES.md` 或 `.gsd/PREFERENCES.md`: + +```yaml +--- +parallel: + enabled: false # 总开关(默认:false) + max_workers: 2 # 并发 workers 数(1-4,默认:2) + budget_ceiling: 50.00 # 聚合成本上限(美元,可选) + merge_strategy: "per-milestone" # 何时 merge:"per-slice" 或 "per-milestone" + auto_merge: "confirm" # "auto"、"confirm" 或 "manual" +--- +``` + +### 配置参考 + +| Key | 类型 | 默认值 | 说明 | +|-----|------|--------|------| +| `enabled` | boolean | `false` | 总开关。只有设为 `true`,`/gsd parallel` 命令才可用。 | +| `max_workers` | number(1-4) | `2` | 最大并发 worker 进程数。值越高,内存与 API 预算消耗也越高。 | +| `budget_ceiling` | number | 无 | 所有 workers 的聚合美元预算上限。达到后不会再派发新单元。 | +| `merge_strategy` | `"per-slice"` 或 `"per-milestone"` | `"per-milestone"` | worktree 变更何时回合并到主分支。Per-milestone 会等整个 milestone 完成后再合并。 | +| `auto_merge` | `"auto"`、`"confirm"`、`"manual"` | `"confirm"` | merge-back 策略。`confirm` 会在合并前询问;`manual` 要求显式执行 `/gsd parallel merge`。 | + +## 命令 + +| 命令 | 说明 | +|------|------| +| `/gsd parallel start` | 分析可并行性、确认并启动 workers | +| `/gsd parallel status` | 显示所有 workers 的状态、已完成单元和成本 | +| `/gsd parallel stop` | 停止所有 workers(发送 SIGTERM) | +| `/gsd parallel stop M002` | 停止某个指定 milestone 的 worker | +| `/gsd parallel pause` | 暂停所有 workers(完成当前单元后等待) | +| `/gsd parallel pause M002` | 暂停某个指定 worker | +| `/gsd parallel resume` | 恢复所有已暂停 workers | +| `/gsd parallel resume M002` | 恢复某个指定 worker | +| `/gsd parallel merge` | 把所有已完成 milestones 合并回 main | +| `/gsd parallel merge M002` | 只把某个指定 milestone 合并回 main | + +## 信号生命周期 + +Coordinator 通过信号和 workers 通信: + +``` +Coordinator Worker + │ │ + ├── sendSignal("pause") ──→ │ + │ ├── consumeSignal() + │ ├── pauseAuto() + │ │ (完成当前单元后等待) + │ │ + ├── sendSignal("resume") ─→ │ + │ ├── consumeSignal() + │ ├── 继续 dispatch loop + │ │ + ├── sendSignal("stop") ───→ │ + │ + SIGTERM ────────────→ │ + │ ├── consumeSignal() or SIGTERM handler + │ ├── stopAuto() + │ └── 进程退出 +``` + +Workers 会在单元之间检查信号(位于 `handleAgentEnd`)。在 stop 场景下,coordinator 还会额外发送 `SIGTERM` 来提高响应速度。 + +## Merge 对账 + +当 milestones 完成后,它们在 worktree 中的改动需要 merge 回主分支。 + +### Merge 顺序 + +- **顺序合并**(默认):按 milestone ID 顺序合并(M001 在 M002 之前) +- **按完成顺序合并**:按照 milestones 实际完成的先后顺序合并 + +### 冲突处理 + +1. `.gsd/` 状态文件(如 `STATE.md`、`metrics.json`)会**自动解决**,默认接受 milestone 分支版本 +2. 代码冲突则会**停止并报告**。合并会暂停,并显示哪些文件冲突。你需要手动解决后,再执行 `/gsd parallel merge ` 重试 + +### 示例 + +``` +/gsd parallel merge + +# Merge Results + +- **M002** — merged successfully (pushed) +- **M003** — CONFLICT (2 file(s)): + - `src/types.ts` + - `src/middleware.ts` + Resolve conflicts manually and run `/gsd parallel merge M003` to retry. +``` + +## 预算管理 + +当设置了 `budget_ceiling` 时,coordinator 会跟踪所有 workers 的聚合成本: + +- 成本会从每个 worker 的 session status 中汇总 +- 达到上限后,coordinator 会向 workers 发出停止信号 +- 每个 worker 仍会独立遵守项目级 `budget_ceiling` 偏好 + +## 健康监控 + +### Doctor 集成 + +`/gsd doctor` 能检测并行会话相关问题: + +- **过期的并行会话**:worker 进程已经死亡,但没有清理干净。Doctor 会检查 `.gsd/parallel/*.status.json` 中记录的 PID 和 heartbeat,发现失效后自动清理。 + +可以执行 `/gsd doctor --fix` 自动清理。 + +### 过期检测 + +满足以下任一条件时,会话会被视为 stale: + +- Worker PID 已经不存在(通过 `process.kill(pid, 0)` 检查) +- 最近一次 heartbeat 超过 30 秒 + +Coordinator 会在 `refreshWorkerStatuses()` 中执行 stale detection,并自动移除已经死亡的会话。 + +## 安全模型 + +| 安全层 | 保护内容 | +|--------|----------| +| **Feature flag** | 默认 `parallel.enabled: false`,不影响现有用户 | +| **可并行性分析** | 启动前检查依赖和文件重叠 | +| **Worker 隔离** | 独立进程、worktrees、分支、上下文窗口 | +| **`GSD_MILESTONE_LOCK`** | 每个 worker 在状态推导时只能看到自己的 milestone | +| **`GSD_PARALLEL_WORKER`** | Worker 不能再嵌套启动新的并行会话 | +| **预算上限** | 跨所有 workers 执行聚合成本限制 | +| **信号式关闭** | 通过文件信号 + SIGTERM 优雅停止 | +| **Doctor 集成** | 检测并清理孤儿会话 | +| **冲突感知 merge** | 遇到代码冲突时停止;`.gsd/` 状态冲突自动解决 | + +## 文件布局 + +``` +.gsd/ +├── parallel/ # Coordinator ↔ worker IPC +│ ├── M002.status.json # Worker heartbeat + progress +│ ├── M002.signal.json # Coordinator → worker signals +│ ├── M003.status.json +│ └── M003.signal.json +├── worktrees/ # Git worktrees(每个 milestone 一个) +│ ├── M002/ # M002 的隔离 checkout +│ │ ├── .gsd/ # M002 自己的状态文件 +│ │ │ ├── auto.lock +│ │ │ ├── metrics.json +│ │ │ └── milestones/ +│ │ └── src/ # M002 的工作副本 +│ └── M003/ +│ └── ... +└── ... +``` + +`.gsd/parallel/` 和 `.gsd/worktrees/` 都会被 gitignore,因为它们只是运行时协调文件,永远不会提交。 + +## 故障排查 + +### “Parallel mode is not enabled” + +在偏好设置里加入 `parallel.enabled: true`。 + +### “No milestones are eligible for parallel execution” + +说明所有 milestones 要么已完成,要么被依赖阻塞。可通过 `/gsd queue` 查看 milestone 状态和依赖链。 + +### Worker 崩溃后如何恢复 + +Workers 会自动把状态持久化到磁盘。如果某个 worker 进程死亡,coordinator 会通过 heartbeat 超时检测到死掉的 PID,并把该 worker 标记为 crashed。重启后,worker 会从磁盘状态继续:崩溃恢复、worktree 重入和 completed-unit 跟踪都会延续之前的状态。 + +1. 执行 `/gsd doctor --fix` 清理 stale sessions +2. 执行 `/gsd parallel status` 查看当前状态 +3. 重新执行 `/gsd parallel start`,为剩余 milestones 启动新的 workers + +### 并行执行完成后发生 merge 冲突 + +1. 执行 `/gsd parallel merge` 查看哪些 milestones 存在冲突 +2. 在 `.gsd/worktrees//` 对应的 worktree 中手动解决冲突 +3. 执行 `/gsd parallel merge ` 重试 + +### Workers 看起来卡住了 + +先检查是否触达了预算上限:`/gsd parallel status` 会显示每个 worker 的成本。继续执行的话,提升 `parallel.budget_ceiling` 或直接移除它。 diff --git a/docs/zh-CN/user-docs/providers.md b/docs/zh-CN/user-docs/providers.md new file mode 100644 index 000000000..e19a5e910 --- /dev/null +++ b/docs/zh-CN/user-docs/providers.md @@ -0,0 +1,677 @@ +# Provider 设置指南 + +这是一份覆盖 GSD 所有受支持 LLM providers 的分步配置指南。如果你已经运行过 onboarding 向导(`gsd config`)并选择了 provider,很可能已经配置完成,可以在会话中用 `/model` 检查。 + +## 目录 + +- [快速参考](#quick-reference) +- [内置 Providers](#built-in-providers) + - [Anthropic(Claude)](#anthropic-claude) + - [OpenAI](#openai) + - [Google Gemini](#google-gemini) + - [OpenRouter](#openrouter) + - [Groq](#groq) + - [xAI(Grok)](#xai-grok) + - [Mistral](#mistral) + - [GitHub Copilot](#github-copilot) + - [Amazon Bedrock](#amazon-bedrock) + - [Vertex AI 上的 Anthropic](#anthropic-on-vertex-ai) + - [Azure OpenAI](#azure-openai) +- [本地 Providers](#local-providers) + - [Ollama](#ollama) + - [LM Studio](#lm-studio) + - [vLLM](#vllm) + - [SGLang](#sglang) +- [自定义 OpenAI-Compatible Endpoints](#custom-openai-compatible-endpoints) +- [常见坑点](#common-pitfalls) +- [验证你的配置](#verifying-your-setup) + + +## 快速参考 + +| Provider | 认证方式 | 环境变量 | 配置文件 | +|----------|----------|----------|----------| +| Anthropic | API key | `ANTHROPIC_API_KEY` | — | +| OpenAI | API key | `OPENAI_API_KEY` | — | +| Google Gemini | API key | `GEMINI_API_KEY` | — | +| OpenRouter | API key | `OPENROUTER_API_KEY` | 可选 `models.json` | +| Groq | API key | `GROQ_API_KEY` | — | +| xAI | API key | `XAI_API_KEY` | — | +| Mistral | API key | `MISTRAL_API_KEY` | — | +| GitHub Copilot | OAuth | `GH_TOKEN` | — | +| Amazon Bedrock | IAM credentials | `AWS_PROFILE` 或 `AWS_ACCESS_KEY_ID` | — | +| Vertex AI | ADC | `GOOGLE_APPLICATION_CREDENTIALS` | — | +| Azure OpenAI | API key | `AZURE_OPENAI_API_KEY` | — | +| Ollama | 无(本地) | — | 需要 `models.json` | +| LM Studio | 无(本地) | — | 需要 `models.json` | +| vLLM / SGLang | 无(本地) | — | 需要 `models.json` | + +--- + + +## 内置 Providers + +内置 providers 的 models 已经预注册在 GSD 里。你只需要提供认证信息。 + + +### Anthropic(Claude) + +**推荐。** Anthropic models 集成最深,支持内置 Web 搜索、extended thinking 和 prompt caching。 + +**选项 A:API key(推荐)** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +``` + +或者运行 `gsd config`,在提示时粘贴 key。 + +**获取 key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) + +**选项 B:Claude Code CLI** + +如果你有 Claude Pro 或 Max 订阅,可以通过 Anthropic 官方的 Claude Code CLI 完成认证。安装后执行 `claude` 登录,随后 GSD 会自动检测并经由该通道路由: + +```bash +# 安装 Claude Code CLI(见 https://docs.anthropic.com/en/docs/claude-code) +claude +# 按提示登录,然后启动 GSD +gsd +``` + +GSD 会检测你本地的 Claude Code 安装,并把它作为已认证的 Anthropic surface 使用。这是 Anthropic 订阅用户符合 TOS 的方式,GSD 不会直接处理你的订阅凭据。 + +> **注意:** GSD 不支持 Anthropic 的浏览器 OAuth 登录。请改用 API key 或 Claude Code CLI。 + +**选项 C:在 Claude Code 里直接用 Claude Pro / Max 订阅跑 GSD** + +如果你已经有 Claude Pro / Max 订阅,并希望直接在 Claude Code 里使用 GSD 的 planning、execution 和 milestone orchestration,而不是切到单独终端,那么可以把 GSD 接成一个 MCP server。这样 Claude Code 就能通过 [Model Context Protocol](https://modelcontextprotocol.io) 使用 GSD 的完整 workflow 工具集,在你现有 Claude plan 的驱动下获得 GSD 的结构化项目管理能力。 + +**自动配置(推荐)** + +当 GSD 在启动时检测到 Claude Code model,它会自动在项目根目录写入一个带有 GSD workflow MCP server 配置的 `.mcp.json` 文件。无需手动步骤,只要以 Claude Code 作为 provider 启动一次 GSD,配置就会自动生成。 + +你也可以在 GSD 会话中手动触发: + +```bash +/gsd mcp init +``` + +这会在项目的 `.mcp.json` 中写入(或更新)`gsd-workflow` 条目。Claude Code 会在下一次启动会话时自动发现这个文件。 + +**手动配置** + +如果你更希望自己配置,可以把 GSD 加到项目的 `.mcp.json` 中: + +```json +{ + "mcpServers": { + "gsd": { + "command": "npx", + "args": ["gsd-mcp-server"], + "env": { + "GSD_CLI_PATH": "/path/to/gsd" + } + } + } +} +``` + +如果 `gsd-mcp-server` 已经全局安装: + +```json +{ + "mcpServers": { + "gsd": { + "command": "gsd-mcp-server" + } + } +} +``` + +你也可以把这段配置写到 `~/.claude/settings.json` 的 `mcpServers` 中,让 GSD 在所有项目中都可用。 + +**暴露了什么** + +MCP server 会暴露 GSD 的完整 workflow 工具面:milestone planning、task completion、slice 管理、roadmap reassessment、journal 查询等。会话管理工具(`gsd_execute`、`gsd_status`、`gsd_result`、`gsd_cancel`)允许 Claude Code 启动并监控 GSD 自动模式会话。完整工具列表见 [命令 → MCP Server 模式](./commands.md#mcp-server-mode)。 + +**验证连接** + +在 GSD 会话里检查 MCP server 是否可达: + +```bash +/gsd mcp status +``` + + +### OpenAI + +```bash +export OPENAI_API_KEY="sk-..." +``` + +或者运行 `gsd config`,选择 “Paste an API key” 然后选择 “OpenAI”。 + +**获取 key:** [platform.openai.com/api-keys](https://platform.openai.com/api-keys) + + +### Google Gemini + +```bash +export GEMINI_API_KEY="..." +``` + +**获取 key:** [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey) + + +### OpenRouter + +OpenRouter 通过单个 API key 聚合了多个 providers 的 200+ models。 + +**第 1 步:获取 API key** + +访问 [openrouter.ai/keys](https://openrouter.ai/keys) 创建一个 key。 + +**第 2 步:设置 key** + +```bash +export OPENROUTER_API_KEY="sk-or-..." +``` + +或者运行 `gsd config`,选择 “Paste an API key” 然后选择 “OpenRouter”。 + +**第 3 步:切换到 OpenRouter model** + +在 GSD 会话中输入 `/model` 并选择一个 OpenRouter model。OpenRouter models 都以 `openrouter/` 为前缀(例如 `openrouter/anthropic/claude-sonnet-4`)。 + +**可选:通过 `models.json` 添加自定义 OpenRouter models** + +如果你想使用不在内置列表中的 model,可把它写进 `~/.gsd/agent/models.json`: + +```json +{ + "providers": { + "openrouter": { + "baseUrl": "https://openrouter.ai/api/v1", + "apiKey": "OPENROUTER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "meta-llama/llama-3.3-70b", + "name": "Llama 3.3 70B (OpenRouter)", + "reasoning": false, + "input": ["text"], + "contextWindow": 131072, + "maxTokens": 32768, + "cost": { "input": 0.3, "output": 0.3, "cacheRead": 0, "cacheWrite": 0 } + } + ] + } + } +} +``` + +注意:这里的 `apiKey` 字段写的是**环境变量名**,不是字面 key。GSD 会自动解析它。你也可以改用字面值或 shell 命令(见 [值解析](./custom-models.md#value-resolution))。 + +**可选:路由到指定上游 provider** + +你可以通过 `modelOverrides` 控制 OpenRouter 实际选用哪个上游 provider: + +```json +{ + "providers": { + "openrouter": { + "modelOverrides": { + "anthropic/claude-sonnet-4": { + "compat": { + "openRouterRouting": { + "only": ["amazon-bedrock"] + } + } + } + } + } + } +} +``` + + +### Groq + +```bash +export GROQ_API_KEY="gsk_..." +``` + +**获取 key:** [console.groq.com/keys](https://console.groq.com/keys) + + +### xAI(Grok) + +```bash +export XAI_API_KEY="xai-..." +``` + +**获取 key:** [console.x.ai](https://console.x.ai) + + +### Mistral + +```bash +export MISTRAL_API_KEY="..." +``` + +**获取 key:** [console.mistral.ai/api-keys](https://console.mistral.ai/api-keys) + + +### GitHub Copilot + +使用 OAuth,通过浏览器登录: + +```bash +gsd config +# 选择 "Sign in with your browser" → "GitHub Copilot" +``` + +要求你拥有有效的 GitHub Copilot 订阅。 + + +### Amazon Bedrock + +Bedrock 使用 AWS IAM 凭据,而不是 API key。下面任意一种都可以: + +```bash +# 选项 1:命名 profile +export AWS_PROFILE="my-profile" + +# 选项 2:IAM keys +export AWS_ACCESS_KEY_ID="AKIA..." +export AWS_SECRET_ACCESS_KEY="..." +export AWS_REGION="us-east-1" + +# 选项 3:Bedrock API key(bearer token) +export AWS_BEARER_TOKEN_BEDROCK="..." +``` + +ECS task roles 和 IRSA(Kubernetes)也会被自动检测。 + + +### Vertex AI 上的 Anthropic + +使用 Google Cloud Application Default Credentials: + +```bash +gcloud auth application-default login +export ANTHROPIC_VERTEX_PROJECT_ID="my-project-id" +``` + +或者设置 `GOOGLE_CLOUD_PROJECT`,并确保 ADC 凭据存在于 `~/.config/gcloud/application_default_credentials.json`。 + + +### Azure OpenAI + +```bash +export AZURE_OPENAI_API_KEY="..." +``` + +--- + + +## 本地 Providers + +本地 providers 运行在你的机器上。因为 GSD 需要知道 endpoint URL 和可用 models,所以它们都要求配置 `models.json`。 + +**配置文件位置:** `~/.gsd/agent/models.json` + +每次打开 `/model` 时,这个文件都会自动重新加载,无需重启。 + + +### Ollama + +**第 1 步:安装并启动 Ollama** + +```bash +# macOS +brew install ollama +ollama serve + +# 或前往 https://ollama.com 下载 +``` + +**第 2 步:拉取一个 model** + +```bash +ollama pull llama3.1:8b +ollama pull qwen2.5-coder:7b +``` + +**第 3 步:创建 `~/.gsd/agent/models.json`** + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "llama3.1:8b" }, + { "id": "qwen2.5-coder:7b" } + ] + } + } +} +``` + +`apiKey` 是 schema 的必填字段,但 Ollama 会忽略它,因此任意值都可以。 + +**第 4 步:选择 model** + +在 GSD 里输入 `/model`,然后选择你的 Ollama model。 + +**Ollama 提示:** + +- Ollama 不支持 `developer` role,也不支持 `reasoning_effort`,因此请始终设置 `compat.supportsDeveloperRole: false` 和 `compat.supportsReasoningEffort: false` +- 如果得到空响应,先检查 `ollama serve` 是否正在运行,以及 model 是否已经 pull 下来 +- 如果未显式指定,`contextWindow` 和 `maxTokens` 默认分别为 128K / 16K。若模型能力不同,请手动覆盖 + + +### LM Studio + +**第 1 步:安装 LM Studio** + +访问 [lmstudio.ai](https://lmstudio.ai) 下载。 + +**第 2 步:启动本地 server** + +在 LM Studio 中进入 “Local Server” 标签页,加载一个 model,然后点击 “Start Server”。默认端口为 1234。 + +**第 3 步:创建 `~/.gsd/agent/models.json`** + +```json +{ + "providers": { + "lm-studio": { + "baseUrl": "http://localhost:1234/v1", + "api": "openai-completions", + "apiKey": "lm-studio", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { + "id": "your-model-name", + "name": "My Local Model", + "contextWindow": 32768, + "maxTokens": 4096 + } + ] + } + } +} +``` + +把 `your-model-name` 替换成 LM Studio server 标签页中显示的 model 标识符。 + +**LM Studio 提示:** + +- `models.json` 里的 model `id` 必须与 LM Studio server API 返回的值完全一致 +- LM Studio 默认端口是 1234;如果你改了端口,也要同步修改 `baseUrl` +- 如果模型支持更大的上下文,记得上调 `contextWindow` 和 `maxTokens` + + +### vLLM + +```json +{ + "providers": { + "vllm": { + "baseUrl": "http://localhost:8000/v1", + "api": "openai-completions", + "apiKey": "vllm", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false, + "supportsUsageInStreaming": false + }, + "models": [ + { + "id": "meta-llama/Llama-3.1-8B-Instruct", + "contextWindow": 128000, + "maxTokens": 16384 + } + ] + } + } +} +``` + +model `id` 必须与 `vllm serve` 启动时传入的 `--model` 参数完全一致。 + + +### SGLang + +```json +{ + "providers": { + "sglang": { + "baseUrl": "http://localhost:30000/v1", + "api": "openai-completions", + "apiKey": "sglang", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { + "id": "meta-llama/Llama-3.1-8B-Instruct" + } + ] + } + } +} +``` + +--- + + +## 自定义 OpenAI-Compatible Endpoints + +任何实现了 OpenAI Chat Completions API 的 server 都可以和 GSD 配合使用。这包括代理(LiteLLM、Portkey、Helicone)、自托管推理服务,以及新出现的 providers。 + +**最快路径:使用 onboarding 向导** + +```bash +gsd config +# 选择 "Paste an API key" → "Custom (OpenAI-compatible)" +# 输入:base URL、API key、model ID +``` + +这会自动帮你写好 `~/.gsd/agent/models.json`。 + +**手动配置:** + +```json +{ + "providers": { + "my-provider": { + "baseUrl": "https://my-endpoint.example.com/v1", + "apiKey": "MY_PROVIDER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "model-id-here", + "name": "Friendly Model Name", + "reasoning": false, + "input": ["text"], + "contextWindow": 128000, + "maxTokens": 16384, + "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 } + } + ] + } + } +} +``` + +**添加自定义 headers(常见于代理)** + +```json +{ + "providers": { + "litellm-proxy": { + "baseUrl": "https://litellm.example.com/v1", + "apiKey": "MY_API_KEY", + "api": "openai-completions", + "headers": { + "x-custom-header": "value" + }, + "models": [...] + } + } +} +``` + +**支持 thinking mode 的 Qwen models** + +对于 Qwen-compatible servers,可用 `thinkingFormat` 打开 thinking mode: + +```json +{ + "compat": { + "thinkingFormat": "qwen", + "supportsDeveloperRole": false + } +} +``` + +如果该 server 要求 `chat_template_kwargs.enable_thinking`,请改用 `"qwen-chat-template"`。 + +关于 `compat` 字段、`modelOverrides`、值解析和高级配置的完整说明,见 [自定义模型](./custom-models.md)。 + +--- + + +## 常见坑点 + +### 使用有效 key 仍提示 “Authentication failed” + +**原因:** key 虽然设在 shell 中,但 GSD 看不到。 + +**解决:** 确认你是在同一个终端里 `export` 了该环境变量并运行 `gsd`。或者直接用 `gsd config` 把 key 保存进 `~/.gsd/agent/auth.json`,这样就能跨会话持久化。 + +### OpenRouter models 没出现在 `/model` + +**原因:** 没有设置 `OPENROUTER_API_KEY`,因此 GSD 会隐藏 OpenRouter models。 + +**解决:** 设置 key 并重启 GSD: + +```bash +export OPENROUTER_API_KEY="sk-or-..." +gsd +``` + +### Ollama 返回空响应 + +**原因:** Ollama server 没有运行,或者对应 model 尚未 pull。 + +**解决:** + +```bash +# 确认 server 正在运行 +curl http://localhost:11434/v1/models + +# 如果 model 缺失则先 pull +ollama pull llama3.1:8b +``` + +### LM Studio model ID 不匹配 + +**原因:** `models.json` 中的 `id` 和 LM Studio 实际通过 API 暴露的值不一致。 + +**解决:** 去 LM Studio 的 server 标签页查看精确的 model 标识符。它通常会包含文件名或量化后缀(例如 `lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF`)。 + +### 本地 models 报 `developer` role 错误 + +**原因:** 大多数本地推理 server 不支持 OpenAI 的 `developer` message role。 + +**解决:** 在 provider 配置里添加 `compat.supportsDeveloperRole: false`。这样 GSD 会改用 `system` message: + +```json +{ + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + } +} +``` + +### 本地 models 报 `stream_options` 错误 + +**原因:** 部分 server 不支持 `stream_options: { include_usage: true }`。 + +**解决:** 添加 `compat.supportsUsageInStreaming: false`: + +```json +{ + "compat": { + "supportsUsageInStreaming": false + } +} +``` + +### 报 “apiKey is required” 校验错误 + +**原因:** `models.json` schema 规定:只要定义了 `models`,就必须存在 `apiKey`。 + +**解决:** 对于不需要认证的本地 server,填一个占位值即可: + +```json +"apiKey": "not-needed" +``` + +### 自定义 models 的成本显示为 `$0.00` + +这是**预期行为**。GSD 对自定义 models 的默认成本就是 0。如果你想获得准确的成本跟踪,需要自己填写 `cost` 字段: + +```json +"cost": { "input": 0.15, "output": 0.60, "cacheRead": 0.015, "cacheWrite": 0.19 } +``` + +这些值的单位都是每百万 tokens。 + +--- + + +## 验证你的配置 + +完成 provider 配置后: + +1. **启动 GSD:** + ```bash + gsd + ``` + +2. **检查可用 models:** + ``` + /model + ``` + 列表里应该能看到该 provider 的 models。 + +3. **切换到对应 model:** + 在 `/model` 选择器中选中它。 + +4. **发送一条测试消息:** + 输入任意内容,确认 model 可以正常响应。 + +如果 model 没有出现,请检查: + +- 当前 shell 中是否设置了对应环境变量 +- `models.json` 是否是合法 JSON(可执行 `cat ~/.gsd/agent/models.json | python3 -m json.tool`) +- 本地 providers 的 server 是否已经运行 + +如果还需要更多帮助,请查看 [故障排查](./troubleshooting.md),或者在会话中运行 `/gsd doctor`。 diff --git a/docs/zh-CN/user-docs/remote-questions.md b/docs/zh-CN/user-docs/remote-questions.md new file mode 100644 index 000000000..bdf534bf6 --- /dev/null +++ b/docs/zh-CN/user-docs/remote-questions.md @@ -0,0 +1,161 @@ +# 远程提问 + +在无头自动模式下运行时,远程提问允许 GSD 通过 Slack、Discord 或 Telegram 请求用户输入。当 GSD 遇到需要人工判断的决策点时,它会把问题发到你配置好的频道,并轮询等待响应。 + +## 设置 + +### Discord + +``` +/gsd remote discord +``` + +配置向导会: + +1. 询问你的 Discord bot token +2. 通过 Discord API 验证该 token +3. 列出 bot 当前加入的服务器(或让你选择) +4. 列出所选服务器中的文本频道 +5. 发送一条测试消息以确认权限 +6. 把配置保存到 `~/.gsd/PREFERENCES.md` + +**Bot 要求:** + +- 需要一个带 token 的 Discord bot application(来自 [Discord Developer Portal](https://discord.com/developers/applications)) +- Bot 必须以以下权限加入目标服务器: + - Send Messages + - Read Message History + - Add Reactions + - View Channel +- 必须设置 `DISCORD_BOT_TOKEN` 环境变量(配置向导会帮你处理) + +### Slack + +``` +/gsd remote slack +``` + +配置向导会: + +1. 询问你的 Slack bot token(`xoxb-...`) +2. 验证该 token +3. 列出 bot 可访问的频道(也支持手动输入 ID) +4. 发送一条测试消息确认权限 +5. 保存配置 + +**Bot 要求:** + +- 需要一个带 bot token 的 Slack app(来自 [Slack API](https://api.slack.com/apps)) +- Bot 必须已加入目标频道 +- 公共 / 私有频道常见需要的 scope:`chat:write`、`reactions:read`、`reactions:write`、`channels:read`、`groups:read`、`channels:history`、`groups:history` + +### Telegram + +``` +/gsd remote telegram +``` + +配置向导会: + +1. 询问你的 Telegram bot token(来自 [@BotFather](https://t.me/BotFather)) +2. 通过 Telegram API 验证该 token +3. 询问 chat ID(群聊或私聊) +4. 发送测试消息以确认权限 +5. 保存配置 + +**Bot 要求:** + +- 需要一个来自 [@BotFather](https://t.me/BotFather) 的 Telegram bot token +- Bot 必须已加入目标群聊(或者直接与 bot 私聊) +- 必须设置 `TELEGRAM_BOT_TOKEN` 环境变量 + +## 配置 + +远程提问配置保存在 `~/.gsd/PREFERENCES.md`: + +```yaml +remote_questions: + channel: discord # 或 slack 或 telegram + channel_id: "1234567890123456789" + timeout_minutes: 5 # 1-30,默认 5 + poll_interval_seconds: 5 # 2-30,默认 5 +``` + +## 工作原理 + +1. GSD 在自动模式过程中遇到一个决策点 +2. 问题会以富文本 embed(Discord)或 Block Kit 消息(Slack)的形式发送到你配置的频道 +3. GSD 按设定的间隔轮询响应 +4. 你可以通过以下方式回复: + - **添加数字表情回应**(1️⃣、2️⃣ 等),适用于单问题提示 + - **回复消息内容**,可以是数字(`1`)、逗号分隔数字(`1,3`)或自由文本 +5. GSD 读取到响应后继续执行 +6. 提示消息上会追加一个 ✅ 反应,表示已收到 + +### 响应格式 + +**单个问题:** + +- 用数字表情回应(适用于单问题提示) +- 回复一个数字:`2` +- 回复自由文本(会作为用户备注记录) + +**多个问题:** + +- 用分号回复:`1;2;custom text` +- 用换行回复(每行一个答案) + +### 超时 + +如果在 `timeout_minutes` 内没有收到响应,提示会超时,GSD 将带着超时结果继续执行。LLM 会根据当前上下文处理超时,通常是做一个保守默认选择,或者暂停自动模式。 + +## 命令 + +| 命令 | 说明 | +|------|------| +| `/gsd remote` | 显示远程提问菜单和当前状态 | +| `/gsd remote slack` | 配置 Slack 集成 | +| `/gsd remote discord` | 配置 Discord 集成 | +| `/gsd remote status` | 显示当前配置和最近一次提示状态 | +| `/gsd remote disconnect` | 移除远程提问配置 | + +## Discord 与 Slack 功能对比 + +| 功能 | Discord | Slack | +|------|---------|-------| +| 富文本消息格式 | Embeds with fields | Block Kit | +| 用 reaction 回答 | ✅(单问题) | ✅(单问题) | +| 线程式回复 | Message replies | Thread replies | +| 日志中的消息 URL | ✅ | ✅ | +| 已收到应答的确认 | ✅ 收到后加 reaction | ✅ 收到后加 reaction | +| 多问题支持 | 文本回复(分号 / 换行) | 文本回复(分号 / 换行) | +| 提示中的上下文来源 | ✅(footer) | ✅(context block) | +| 服务器 / 频道选择器 | ✅(交互式) | ✅(交互式 + 手动兜底) | +| Token 验证 | ✅ | ✅ | +| 配置阶段测试消息 | ✅ | ✅ | + +## 故障排查 + +### “Remote auth failed” + +- 确认 bot token 正确且未过期 +- 对 Discord:确认 bot 仍然在目标服务器内 +- 对 Slack:确认 bot token 以 `xoxb-` 开头 + +### “Could not send to channel” + +- 确认 bot 在目标频道拥有 Send Messages 权限 +- 对 Discord:检查 Server Settings 中 bot 对应角色的权限 +- 对 Slack:确认 bot 已加入频道(`/invite @botname`) + +### 未检测到响应 + +- 确认你是在**回复该提示消息**,而不是单独发了一条新消息 +- 对 reactions:只有单问题提示上的数字表情(1️⃣-5️⃣)会被识别 +- 检查 `timeout_minutes` 是否足够长,能覆盖你的响应时间 + +### 频道 ID 格式 + +- **Slack**:9-12 位大写字母数字字符(例如 `C0123456789`) +- **Discord**:17-20 位纯数字 snowflake ID(例如 `1234567890123456789`) +- 在 Discord 中开启 Developer Mode(Settings → Advanced)后可以复制频道 ID diff --git a/docs/zh-CN/user-docs/skills.md b/docs/zh-CN/user-docs/skills.md new file mode 100644 index 000000000..63ce71778 --- /dev/null +++ b/docs/zh-CN/user-docs/skills.md @@ -0,0 +1,195 @@ +# 技能 + +技能(Skills)是当当前 task 匹配时由 GSD 加载的专用指令集。它们为 LLM 提供领域化指导,例如编码模式、框架惯用法、测试策略和工具使用方式。 + +Skills 遵循开放的 [Agent Skills 标准](https://agentskills.io/),并且**不是 GSD 专属格式**。它们同样适用于 Claude Code、OpenAI Codex、Cursor、GitHub Copilot、Windsurf 以及其他 40+ agent。 + +## 技能目录 + +GSD 会按优先级顺序从两个位置读取技能: + +| 位置 | 范围 | 说明 | +|------|------|------| +| `~/.agents/skills/` | 全局 | 对所有项目和所有兼容 agent 共享 | +| `.agents/skills/`(项目根目录) | 项目级 | 项目专用技能,可提交到版本控制 | + +如果出现同名技能,全局技能优先于项目技能。 + +> **从 `~/.gsd/agent/skills/` 迁移:** 升级后首次启动时,GSD 会自动把旧版 `~/.gsd/agent/skills/` 中的技能复制到 `~/.agents/skills/`。旧目录会保留,以兼容旧流程。 + +## 安装技能 + +技能通过 [skills.sh CLI](https://skills.sh) 安装: + +```bash +# 交互式:选择要安装的技能以及目标 agent +npx skills add dpearson2699/swift-ios-skills + +# 非交互方式安装指定技能 +npx skills add dpearson2699/swift-ios-skills --skill swift-concurrency --skill swiftui-patterns -y + +# 安装仓库中的全部技能 +npx skills add dpearson2699/swift-ios-skills --all + +# 检查更新 +npx skills check + +# 更新已安装技能 +npx skills update +``` + +### 入门技能目录 + +在执行 `gsd init` 时,GSD 会检测项目技术栈并推荐合适的技能包。对于 brownfield 项目,检测是自动的;对于 greenfield 项目,则由用户选择技术栈。 + +这个精选目录维护在 `src/resources/extensions/gsd/skill-catalog.ts`。每一条目都会把一个技术栈映射到一个 skills.sh 仓库,以及其中的具体技能名称。 + +#### 可用技能包 + +**Swift(检测到任意 Swift 项目,例如 `Package.swift` 或 `.xcodeproj`):** + +- **SwiftUI**:布局、导航、动画、手势、Liquid Glass +- **Swift Core**:Swift 语言、并发、Codable、Charts、Testing、SwiftData + +**iOS(仅当 `.xcodeproj` 目标通过 `SDKROOT` 指向 `iphoneos` 时):** + +- **iOS App Frameworks**:App Intents、Widgets、StoreKit、MapKit、Live Activities +- **iOS Data Frameworks**:CloudKit、HealthKit、MusicKit、WeatherKit、Contacts +- **iOS AI & ML**:Core ML、Vision、端侧 AI、语音识别 +- **iOS Engineering**:网络、安全、可访问性、本地化、Instruments +- **iOS Hardware**:Bluetooth、CoreMotion、NFC、PencilKit、RealityKit +- **iOS Platform**:CallKit、EnergyKit、HomeKit、SharePlay、PermissionKit + +**Web:** + +- **React & Web Frontend**:React 最佳实践、Web 设计、组合模式 +- **React Native**:跨平台移动开发模式 +- **Frontend Design & UX**:前端设计与可访问性 + +**语言:** + +- **Rust**:Rust 模式与最佳实践 +- **Python**:Python 模式与最佳实践 +- **Go**:Go 模式与最佳实践 + +**通用:** + +- **Document Handling**:PDF、DOCX、XLSX、PPTX 的创建和处理 + +### 维护目录 + +技能目录定义位于 [`src/resources/extensions/gsd/skill-catalog.ts`](../../../src/resources/extensions/gsd/skill-catalog.ts)。新增或更新一个技能包时: + +1. 在 `SKILL_CATALOG` 数组中新增一个 `SkillPack` 条目,包含 `repo`、`skills` 和匹配条件 +2. 基于语言检测做匹配时,使用 `matchLanguages`(取值来自 `detection.ts` 中的 `LANGUAGE_MAP`) +3. 基于 Xcode 平台做匹配时,使用 `matchXcodePlatforms`(例如 `["iphoneos"]`,取自 `project.pbxproj` 中的 `SDKROOT`) +4. 基于文件存在与否做匹配时,使用 `matchFiles`(对照 `detection.ts` 中的 `PROJECT_FILES`) +5. 如果这个技能包需要在 greenfield 选项中出现,把它加入 `GREENFIELD_STACKS` +6. 如果多个技能包共享同一个 `repo`,它们会被合并为一次 `npx skills add` 调用 + +## 技能发现 + +`skill_discovery` 偏好控制 GSD 在自动模式中如何发现技能: + +| 模式 | 行为 | +|------|------| +| `auto` | 自动查找并应用技能 | +| `suggest` | 识别技能,但需要确认(默认) | +| `off` | 关闭技能发现 | + +## 技能偏好 + +你可以通过偏好设置控制使用哪些技能: + +```yaml +--- +version: 1 +always_use_skills: + - debug-like-expert +prefer_skills: + - frontend-design +avoid_skills: + - security-docker +skill_rules: + - when: task involves Clerk authentication + use: [clerk] + - when: frontend styling work + prefer: [frontend-design] +--- +``` + +### 解析顺序 + +技能可以通过以下几种方式引用: + +1. **裸名称**:例如 `frontend-design`,会扫描 `~/.agents/skills/` 和项目内的 `.agents/skills/` +2. **绝对路径**:例如 `/Users/you/.agents/skills/my-skill/SKILL.md` +3. **目录路径**:例如 `~/custom-skills/my-skill`,会在其中查找 `SKILL.md` + +全局技能(`~/.agents/skills/`)优先于项目技能(`.agents/skills/`)。 + +## 自定义技能 + +你可以通过新增一个包含 `SKILL.md` 的目录来创建自己的技能: + +``` +~/.agents/skills/my-skill/ + SKILL.md — 给 LLM 的指令 + references/ — 可选参考文件 +``` + +`SKILL.md` 中写的是当技能启用时,LLM 应遵循的指令。参考文件可由技能按需加载。 + +### 项目本地技能 + +如果想为某个项目提供专用指导,可以把技能放在项目里: + +``` +.agents/skills/my-project-skill/ + SKILL.md +``` + +项目本地技能可以提交到版本控制中,让团队成员共享同一套技能。 + +## 技能生命周期管理 + +GSD 会跨自动模式会话跟踪技能表现,并提供健康度数据,帮助你持续维护技能质量。 + +### 技能遥测 + +每个自动模式工作单元都会记录哪些技能可用、哪些技能实际加载。这些数据和现有的 token / 成本数据一起存入 `metrics.json`。 + +### 技能健康度面板 + +通过 `/gsd skill-health` 查看技能表现: + +``` +/gsd skill-health # 总览表:名称、使用次数、成功率、token、趋势、最近使用时间 +/gsd skill-health rust-core # 查看单个技能的详细信息 +/gsd skill-health --stale 30 # 查看 30+ 天未使用的技能 +/gsd skill-health --declining # 查看成功率在下降的技能 +``` + +该面板会标出可能需要关注的技能: + +- **最近 10 次使用的成功率低于 70%** +- **Token 使用量比上一个窗口上升 20% 以上** +- **过期技能**:超过设定阈值未使用 + +### 过期检测 + +长时间未使用的技能会被标记为 stale,并可自动降低优先级: + +```yaml +--- +skill_staleness_days: 60 # 默认 60;设为 0 表示关闭 +--- +``` + +过期技能会被排除在自动匹配之外,但仍然可以通过 `read` 显式调用。 + +### Heal-Skill(单元后分析) + +如果把它配置为 post-unit hook,GSD 可以分析 agent 在执行中是否偏离了某个技能的指令。如果检测到明显漂移(例如 API 模式过时、指导错误),它会把建议修复写到 `.gsd/skill-review-queue.md`,供人工审核。 + +一个关键设计原则是:技能**永远不会被自动修改**。研究表明,人工策展的技能明显优于自动生成技能,因此保留人工审核是必要的。 diff --git a/docs/zh-CN/user-docs/token-optimization.md b/docs/zh-CN/user-docs/token-optimization.md new file mode 100644 index 000000000..a68836fcb --- /dev/null +++ b/docs/zh-CN/user-docs/token-optimization.md @@ -0,0 +1,373 @@ +# Token 优化 + +*引入于 v2.17.0* + +GSD 2.17 引入了一套协同工作的 token 优化系统,在大多数工作负载下可以在不牺牲输出质量的前提下,将 token 使用降低 40-60%。这套系统由三部分构成:**token profiles**、**context compression** 和 **基于复杂度的 task 路由**。 + +## Token Profiles + +Token profile 是一个单一偏好项,用来统一协调 model 选择、阶段跳过和上下文压缩级别。在偏好设置中这样配置: + +```yaml +--- +version: 1 +token_profile: balanced +--- +``` + +可用的 profile 有三个: + +### `budget`:最大节省(降低 40-60%) + +面向成本敏感型工作流。它会使用更便宜的 models,跳过可选阶段,并把 dispatch 上下文压缩到最低必要程度。 + +| 维度 | 设置 | +|------|------| +| Planning model | Sonnet | +| Execution model | Sonnet | +| Simple task model | Haiku | +| Completion model | Haiku | +| Subagent model | Haiku | +| Milestone research | **跳过** | +| Slice research | **跳过** | +| Roadmap reassessment | **跳过** | +| Context inline level | **Minimal**:丢弃 decisions、requirements、额外 templates | + +适合:原型开发、小项目、已充分理解的代码库、强调成本控制的迭代。 + +### `balanced`:智能默认值(默认) + +默认 profile。保留关键阶段,跳过那些对大多数项目边际收益不高的阶段,并采用标准级别的上下文压缩。 + +| 维度 | 设置 | +|------|------| +| Planning model | 用户默认值 | +| Execution model | 用户默认值 | +| Simple task model | 用户默认值 | +| Completion model | 用户默认值 | +| Subagent model | Sonnet | +| Milestone research | 执行 | +| Slice research | **跳过** | +| Roadmap reassessment | 执行 | +| Context inline level | **Standard**:保留关键上下文,丢弃低信号附加内容 | + +适合:大多数项目、日常开发。 + +### `quality`:完整上下文(不压缩) + +所有阶段都会运行。所有上下文产物都会被内联。没有捷径。 + +| 维度 | 设置 | +|------|------| +| 所有 models | 用户配置的默认值 | +| 所有阶段 | 执行 | +| Context inline level | **Full**:全部内联 | + +适合:复杂架构、需要深度 research 的 greenfield 项目、关键生产环境工作。 + +## Context Compression + +每个 token profile 都会映射到一个 **inline level**,它控制在 dispatch prompt 里预加载多少上下文: + +| Profile | Inline Level | 包含内容 | +|---------|--------------|----------| +| `budget` | `minimal` | Task plan、关键历史 summaries(截断)。不包含 decisions register、requirements、UAT template、secrets manifest。 | +| `balanced` | `standard` | Task plan、历史 summaries、slice plan、roadmap 摘要。不包含部分辅助 templates。 | +| `quality` | `full` | 全部内容:所有 plans、summaries、decisions、requirements、templates 和根文件。 | + +### 压缩如何工作 + +Dispatch prompt builder 接受一个 `inlineLevel` 参数。在不同级别下,特定产物会被按规则裁剪: + +**Minimal 级别的裁剪:** + +- `buildExecuteTaskPrompt`:丢弃 decisions template,并把历史 summaries 截断到只保留最近一个 +- `buildPlanMilestonePrompt`:丢弃 `PROJECT.md`、`REQUIREMENTS.md`、decisions 以及 `secrets-manifest` 等补充 templates +- `buildCompleteSlicePrompt`:丢弃 requirements 和 UAT template 的内联 +- `buildCompleteMilestonePrompt`:丢弃根级 GSD 文件内联 +- `buildReassessRoadmapPrompt`:丢弃 project、requirements 和 decisions 文件 + +这些裁剪是累积式的:`standard` 会丢掉一部分,`minimal` 会丢掉更多;`full` 则保留全部上下文(也就是 v2.17 之前的行为)。 + +### 覆盖 Inline Level + +Inline level 由 `token_profile` 推导而来。如果你想独立于 profile 控制阶段行为,请使用 `phases` 偏好设置: + +```yaml +--- +version: 1 +token_profile: budget +phases: + skip_research: false # 覆盖:即使是 budget,也执行 research +--- +``` + +显式设置的 `phases` 总是优先于 profile 默认值。 + + +## 基于复杂度的 Task 路由 + +当启用 dynamic routing 时,GSD 会根据复杂度对每个 task 做分类,并将其路由到合适的 model tier。简单的文档修复会使用更便宜的模型,而复杂的架构工作会获得所需的推理能力。 + +> **前提条件:** Dynamic routing 需要在偏好设置里显式配置 `models`。如果没有 `models` 段,routing 会被跳过,所有 phases 都会使用会话启动时的 model。Token profiles 会自动设置 `models`。 + +> **上限行为:** 当 dynamic routing 启用时,每个 phase 中配置的 model 充当的是**上限**,而不是固定绑定。Router 可以为更简单的工作降级到更便宜的 model,但绝不会超过你配置的 model。 + +### 分类如何工作 + +Tasks 会通过分析 task plan 来分类: + +| 信号 | Simple | Standard | Complex | +|------|--------|----------|---------| +| Step 数量 | ≤ 3 | 4-7 | ≥ 8 | +| 文件数 | ≤ 3 | 4-7 | ≥ 8 | +| 描述长度 | < 500 chars | 500-2000 | > 2000 chars | +| 代码块数 | — | — | ≥ 5 | +| 信号词 | 无 | 任意出现 | — | + +**会阻止判定为 simple 的信号词:** `research`、`investigate`、`refactor`、`migrate`、`integrate`、`complex`、`architect`、`redesign`、`security`、`performance`、`concurrent`、`parallel`、`distributed`、`backward compat`、`migration`、`architecture`、`concurrency`、`compatibility`。 + +空 plan 或格式错误的 plan 会默认归类到 `standard`(偏保守的选择)。 + +### Unit Type 默认值 + +非 task 单元也有内置的 tier 分配: + +| Unit Type | 默认 Tier | +|-----------|-----------| +| `complete-slice`、`run-uat` | Light | +| `research-*`、`plan-*`、`execute-task`、`complete-milestone` | Standard | +| `replan-slice`、`reassess-roadmap` | Heavy | +| `hook/*` | Light | + +### Model 路由 + +每个 tier 会映射到某类 model 配置: + +| Tier | 对应 Model Phase Key | 常见 Model | +|------|----------------------|------------| +| Light | `completion` | Haiku(budget)/ 用户默认值 | +| Standard | `execution` | Sonnet / 用户默认值 | +| Heavy | `execution` | Opus / 用户默认值 | + +如果配置了 `execution_simple`,simple tasks 会优先使用它。`budget` profile 会自动把该键设为 Haiku。 + + +### 预算压力 + +当接近预算上限时,分类器会自动降低 tier: + +| 已使用预算 | 影响 | +|------------|------| +| < 50% | 不调整 | +| 50-75% | Standard → Light | +| 75-90% | Standard → Light | +| > 90% | 除 Heavy 之外全部 → Light;Heavy → Standard | + +这种逐步降级方式能尽量把最复杂工作的模型质量保留下来,同时随着预算逼近上限逐步降低成本。 + +## 自适应学习(Routing History) + +GSD 会随着时间推移记录每个 tier 分配的成功 / 失败情况,并据此调整未来的分类。它默认自动生效,并持久化在 `.gsd/routing-history.json` 中。 + +### 工作方式 + +1. 每个工作单元完成后,系统会把结果(成功 / 失败)记录到对应的 unit type 和 tier 上 +2. 结果会按 pattern 跟踪,例如 `execute-task` 或 `execute-task:docs`,并维护最近 50 条的滚动窗口 +3. 如果某个 pattern 下某个 tier 的失败率超过 20%,未来相同 pattern 的分类会自动上调一个 tier +4. 系统也支持更细粒度的 tag pattern,例如 `execute-task:test` 和 `execute-task:frontend` + +### 用户反馈 + +你可以通过 `/gsd rate` 为最近完成的工作单元提交反馈: + +``` +/gsd rate over # model 太强了,下次更倾向便宜一点 +/gsd rate ok # model 选得合适,不调整 +/gsd rate under # model 太弱了,下次更倾向强一点 +``` + +这些反馈的权重是自动结果的 2 倍。要求 dynamic routing 已启用(最近完成的单元必须带有 tier 数据)。 + +### 数据管理 + +```bash +# Routing history 按项目存储 +.gsd/routing-history.json + +# 清空历史以重置自适应学习 +# (通过 routing-history 模块 API 完成) +``` + +反馈数组最多保留 200 条。每个 pattern 的结果统计使用 50 条滚动窗口,以防陈旧数据长期主导判断。 + +## 配置示例 + +### 成本优先配置 + +```yaml +--- +version: 1 +token_profile: budget +budget_ceiling: 25.00 +models: + execution_simple: claude-haiku-4-5-20250414 +--- +``` + +### 使用自定义 Models 的平衡配置 + +```yaml +--- +version: 1 +token_profile: balanced +models: + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 +--- +``` + +### 面向关键工作的高质量配置 + +```yaml +--- +version: 1 +token_profile: quality +models: + planning: claude-opus-4-6 + execution: claude-opus-4-6 +--- +``` + +### 按阶段覆盖 + +`token_profile` 会设置默认值,但显式偏好始终优先: + +```yaml +--- +version: 1 +token_profile: budget +phases: + skip_research: false # 覆盖:保留 milestone research +models: + planning: claude-opus-4-6 # 覆盖:即使是 budget profile,planning 也用 Opus +--- +``` + +## 这些机制如何协同 + +``` +PREFERENCES.md + └─ token_profile: balanced + ├─ resolveProfileDefaults() → model 默认值 + phase 跳过默认值 + ├─ resolveInlineLevel() → standard + │ └─ prompt builders 根据 level 决定纳入哪些上下文 + ├─ classifyUnitComplexity() → 路由到 execution / execution_simple model + │ ├─ task plan 分析(steps、files、signals) + │ ├─ unit type 默认值 + │ ├─ budget pressure 调整 + │ ├─ 从 routing-history.json 做自适应学习 + │ └─ capability scoring(当 `capability_routing: true` 时) + │ └─ 7 维 model profile × task requirement vectors + └─ context_management + ├─ observation masking(before_provider_request hook) + ├─ tool result truncation(tool_result_max_chars) + └─ phase handoff anchors(注入 prompt builders) +``` + +Profile 会在 dispatch pipeline 的起点解析一次,并一路向下流动。每一层上,显式偏好都优先于 profile 默认值。 + +## Observation Masking + +*引入于 v2.59.0* + +在自动模式会话中,tool results 会不断堆积在会话历史里并占用上下文窗口。Observation masking 会在每次 LLM 调用前,把早于最近 N 个 user turns 的 tool result 内容替换成轻量占位符。这样可以在**不增加任何 LLM 开销**的前提下减少 token 使用:不需要额外总结调用,也不会带来额外延迟。 + +Observation masking 在自动模式中默认开启。可通过偏好设置控制: + +```yaml +context_management: + observation_masking: true # 默认:true(设为 false 可关闭) + observation_mask_turns: 8 # 保留最近 8 个 user turns 内的结果(范围:1-50) + tool_result_max_chars: 800 # 单个 tool result 超过该长度时进行截断 +``` + +### 工作方式 + +1. 每次 provider request 之前,`before_provider_request` hook 会检查 messages 数组 +2. 早于阈值的 tool results(`toolResult`、`bashExecution`)会被替换成 `[result masked — within summarized history]` +3. 最近的 tool results(仍在保留窗口内)会完整保留 +4. 所有 assistant 和 user messages 始终保留,只有 tool result 内容会被 masking + +它与现有的 compaction 系统配套:masking 负责减少两次 compaction 之间的上下文压力,而 compaction 负责在窗口填满时执行完整上下文重置。 + +### Tool Result Truncation + +单个 tool result 如果超过 `tool_result_max_chars`(默认 800),会被加上 `…[truncated]` 标记后截断。这可以防止某一次特别大的工具输出独占上下文窗口。 + +## Phase Handoff Anchors + +*引入于 v2.59.0* + +当自动模式在 phases 之间切换(research → planning → execution)时,系统会把结构化 JSON anchors 写到 `.gsd/milestones//anchors/.json`。下游 prompt builders 会自动注入这些 anchors,让下一阶段继承前一阶段的意图、决策、阻塞点和下一步,而不必重新从 artifact 文件里推断。 + +这能减少上下文漂移,也就是企业级 agent 失败案例中最常见的一类问题:agent 在 phase 边界上丢失了之前的决策脉络。 + +Anchors 会在 `research-milestone`、`research-slice`、`plan-milestone` 和 `plan-slice` 成功完成后自动写入,不需要任何配置。 + +## Prompt Compression + +*引入于 v2.29.0* + +GSD 可以在退回到 section-boundary truncation 之前,先做确定性的 prompt compression。这样在上下文超预算时,可以保留更多信息。 + +### 压缩策略 + +在偏好设置中配置: + +```yaml +--- +version: 1 +compression_strategy: compress +--- +``` + +可用策略有两个: + +| 策略 | 行为 | 默认适用对象 | +|------|------|--------------| +| `truncate` | 在边界处整段丢弃 section(v2.29 之前的行为) | `quality` profile | +| `compress` | 先做启发式文本压缩,如果仍超预算,再截断 | `budget` 和 `balanced` profiles | + +Compression 会确定性地去掉冗余空白、缩短啰嗦表达、去重重复内容并删除低信息量样板文本,不涉及任何 LLM 调用。 + +### 上下文选择 + +控制文件如何内联进 prompt: + +```yaml +--- +version: 1 +context_selection: smart +--- +``` + +| 模式 | 行为 | 默认适用对象 | +|------|------|--------------| +| `full` | 内联完整文件 | `balanced` 和 `quality` profiles | +| `smart` | 对大文件(>3KB)使用 TF-IDF 语义分块,只纳入相关部分 | `budget` profile | + +### 结构化数据压缩 + +在 `budget` 和 `balanced` 的 inline level 下,decisions 和 requirements 会被格式化成更紧凑的表示方式,相比完整 markdown tables 可节省 30-50% tokens。 + +### Summary Distillation + +如果某个 slice 有 3 个以上依赖 summary,且总量超过 summary 预算,GSD 会先提取结构化核心数据(`provides`、`requires`、`key_files`、`key_decisions`),丢弃冗长 prose 段落,然后才会退回到 section-boundary truncation。 + +### Cache Hit Rate Tracking + +指标账本现在会为每个工作单元记录 `cacheHitRate`(输入 tokens 中来自缓存的比例),并提供 `aggregateCacheHitRate()` 用于统计整场会话的缓存表现。 diff --git a/docs/zh-CN/user-docs/troubleshooting.md b/docs/zh-CN/user-docs/troubleshooting.md new file mode 100644 index 000000000..e5526f6ff --- /dev/null +++ b/docs/zh-CN/user-docs/troubleshooting.md @@ -0,0 +1,434 @@ +# 故障排查 + +## `/gsd doctor` + +内置诊断工具会校验 `.gsd/` 的完整性: + +``` +/gsd doctor +``` + +它会检查: + +- 文件结构和命名约定 +- roadmap ↔ slice ↔ task 的引用完整性 +- 完成状态是否一致 +- Git worktree 健康状态(仅 worktree 和 branch 模式;none 模式跳过) +- 过期锁文件和孤儿运行时记录 + +## 常见问题 + +### 自动模式在同一个单元上循环 + +**症状:** 同一个工作单元(例如 `research-slice` 或 `plan-slice`)被反复派发,直到触发 dispatch 上限。 + +**原因:** + +- 崩溃后的缓存过期:内存中的文件列表没有反映新产物 +- LLM 没有生成预期的 artifact 文件 + +**解决:** 先运行 `/gsd doctor` 修复状态,然后执行 `/gsd auto` 恢复。如果问题持续存在,检查预期 artifact 文件是否确实已经写到磁盘。 + +### 自动模式因 “Loop detected” 停止 + +**原因:** 同一个单元连续两次没有生成预期 artifact。 + +**解决:** 检查 task plan 是否足够清晰。如果 plan 存在歧义,先手动澄清,再执行 `/gsd auto` 恢复。 + +### Worktree 中出现了错误文件 + +**症状:** Planning 产物或代码被写到了错误目录。 + +**原因:** LLM 把内容写回了主仓库,而不是 worktree。 + +**解决:** 该问题已在 v2.14+ 修复。如果你仍在旧版本,请更新。现在 dispatch prompt 已包含明确的工作目录指令。 + +### 安装后出现 `command not found: gsd` + +**症状:** `npm install -g gsd-pi` 成功,但系统找不到 `gsd`。 + +**原因:** npm 的全局 bin 目录没有加入 shell 的 `$PATH`。 + +**解决:** + +```bash +# 找出 npm 安装二进制的目录 +npm prefix -g +# 输出:/opt/homebrew(Apple Silicon)或 /usr/local(Intel Mac) + +# 如果缺失,把 bin 目录加入 PATH +echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc +source ~/.zshrc +``` + +**临时方案:** 直接执行 `npx gsd-pi`,或使用 `$(npm prefix -g)/bin/gsd`。 + +**常见原因:** + +- **Homebrew Node**:理论上 `/opt/homebrew/bin` 应该在 PATH 里,但如果 shell profile 没有初始化 Homebrew,就可能缺失 +- **版本管理器(nvm、fnm、mise)**:全局 bin 路径是按版本区分的,需确保版本管理器正确初始化 +- **oh-my-zsh**:`gitfast` 插件会把 `gsd` alias 到 `git svn dcommit`。可通过 `alias gsd` 检查,并在需要时取消 alias + +### `npm install -g gsd-pi` 失败 + +**常见原因:** + +- 缺少 workspace packages:已在 v2.10.4+ 修复 +- Linux 上 `postinstall` 卡住(Playwright `--with-deps` 触发 sudo):已在 v2.3.6+ 修复 +- Node.js 版本过低:要求 ≥ 22.0.0 + +### 自动模式中的 provider 错误 + +**症状:** 自动模式因为 provider 错误暂停(限流、服务端错误、认证失败)。 + +**GSD 的处理方式(v2.26):** + +| 错误类型 | 自动恢复? | 延迟 | +|----------|------------|------| +| Rate limit(429、`too many requests`) | ✅ 是 | `retry-after` 头或默认 60 秒 | +| Server error(500、502、503、`overloaded`) | ✅ 是 | 30 秒 | +| Auth / billing(`unauthorized`、`invalid key`) | ❌ 否 | 需要手动恢复 | + +对于瞬时错误,GSD 会短暂停顿后自动继续。对于永久性错误,建议配置 fallback models: + +```yaml +models: + execution: + model: claude-sonnet-4-6 + fallbacks: + - openrouter/minimax/minimax-m2.5 +``` + +**Headless 模式:** `gsd headless auto` 在进程崩溃时会自动重启整个进程(默认 3 次,带指数退避)。与 provider 错误自动恢复配合后,能支持真正的夜间无人值守运行。 + +常见的 provider 配置问题(role 错误、streaming 错误、model ID 不匹配)见 [Provider 设置指南:常见坑点](./providers.md#common-pitfalls)。 + +### 达到预算上限 + +**症状:** 自动模式因 “Budget ceiling reached” 暂停。 + +**解决:** 提高偏好设置中的 `budget_ceiling`,或者切换到 `budget` token profile 降低每个工作单元成本,然后再执行 `/gsd auto` 恢复。 + +### 过期锁文件 + +**症状:** 自动模式无法启动,提示另一个会话正在运行。 + +**解决:** GSD 会自动检测过期锁:如果持有锁的 PID 已死亡,则在下次 `/gsd auto` 时清理并重新获取锁。它也会处理 `proper-lockfile` 崩溃后遗留的 `.gsd.lock/` 目录。如果自动恢复失败,可手动删除 `.gsd/auto.lock` 和 `.gsd.lock/`: + +```bash +rm -f .gsd/auto.lock +rm -rf "$(dirname .gsd)/.gsd.lock" +``` + +### Git merge 冲突 + +**症状:** Worktree merge 在 `.gsd/` 文件上失败。 + +**解决:** GSD 会自动解决 `.gsd/` 运行时文件上的冲突。对于代码文件的内容冲突,LLM 会先获得一次 fix-merge 会话进行自动修复;若失败,则需要手动解决。 + +### Pre-dispatch 提示 milestone integration branch 已不存在 + +**症状:** 自动模式或 `/gsd doctor` 报告某个 milestone 记录的 integration branch 已经不在 git 中。 + +**这意味着什么:** 该 milestone 的 `.gsd/milestones//-META.json` 里仍然记录着启动时的 branch,但该 branch 之后被重命名或删除了。 + +**当前行为:** + +- 如果 GSD 能确定性地恢复到一个安全 branch,就不会再直接 hard-stop 自动模式 +- 安全回退的顺序是: + - 显式配置且存在的 `git.main_branch` + - 仓库自动检测到的默认 integration branch(例如 `main` 或 `master`) +- 在这种情况下,`/gsd doctor` 会给出 warning,而 `/gsd doctor fix` 会把过期的 metadata 改写为当前有效 branch +- 如果无法确定安全回退 branch,GSD 仍会阻止继续运行 + +**解决:** + +- 先执行 `/gsd doctor fix`,在安全回退很明显时自动改写过期 metadata +- 如果 GSD 仍然阻塞,则请重新创建缺失 branch,或更新 git 偏好设置,让 `git.main_branch` 指向一个真实存在的 branch + +### 写 `.gsd/` 文件时出现瞬时 `EBUSY` / `EPERM` / `EACCES` + +**症状:** 在 Windows 上,自动模式或 doctor 在更新 `.gsd/` 文件时偶发 `EBUSY`、`EPERM` 或 `EACCES`。 + +**原因:** 杀毒软件、索引器、编辑器或文件监视器可能会在 GSD 执行原子 rename 的瞬间,短暂锁住目标文件或临时文件。 + +**当前行为:** GSD 现在会对这类瞬时 rename 失败做短时、有上界的退避重试;这样既能覆盖短暂锁竞争,也不会因为真正的文件系统问题而无限挂起。 + +**解决:** + +- 重新执行操作;大多数瞬时锁竞争会很快自行解除 +- 如果错误持续,关闭可能占用该文件的工具后再试 +- 如果反复失败,运行 `/gsd doctor`,确认仓库状态依旧健康,并记录具体路径与错误码 + +### Node v24 Web 启动失败 + +**症状:** 在 Node v24 上执行 `gsd --web` 时,报 `ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING`。 + +**原因:** Node v24 修改了对 `node_modules` 的 type stripping 行为,导致 Next.js Web 构建失败。 + +**解决:** 已在 v2.42.0+ 修复(#1864)。升级到最新版本。 + +### 孤儿 Web server 进程 + +**症状:** `gsd --web` 因端口 3000 已被占用而失败,但实际上并没有运行中的 GSD 会话。 + +**原因:** 上一次 Web server 退出时未能清理进程。 + +**解决:** 已在 v2.42.0+ 修复。现在 GSD 会自动清理过期的 Web server 进程。如果你还在旧版本,可手动终止孤儿进程:`lsof -ti:3000 | xargs kill`。 + +### 非 JS 项目被 worktree health check 阻挡 + +**症状:** 在不使用 Node.js 的项目(例如 Rust、Go、Python)中,worktree health check 失败或阻塞自动模式。 + +**原因:** 在 v2.42.0 之前,worktree health check 只识别 JavaScript 生态。 + +**解决:** 已在 v2.42.0+ 修复(#1860)。现在 health check 已支持 17+ 生态。升级到最新版本。 + +### 德语 / 非英语 locale 下的 git 错误 + +**症状:** 当系统 locale 不是英语(例如德语)时,Git 命令失败或输出异常。 + +**原因:** GSD 之前假设 git 输出永远是英文。 + +**解决:** 已在 v2.42.0+ 修复。现在所有 git 命令都会强制 `LC_ALL=C`,从而无论系统 locale 如何,都保证 git 输出一致为英文。 + +## MCP Client 问题 + +### `mcp_servers` 显示没有已配置 servers + +**症状:** `mcp_servers` 报告没有配置任何 server。 + +**常见原因:** + +- 当前项目里不存在 `.mcp.json` 或 `.gsd/mcp.json` +- 配置文件不是合法 JSON +- 你是在另一个项目目录中配置的 server,但当前启动 GSD 的目录不同 + +**解决:** + +- 把 server 配置加到 `.mcp.json` 或 `.gsd/mcp.json` +- 确认文件能被正常解析为 JSON +- 重新执行 `mcp_servers(refresh=true)` + +### `mcp_discover` 超时 + +**症状:** `mcp_discover` 因超时失败。 + +**常见原因:** + +- Server 进程启动了,但没有完成 MCP 握手 +- 配置的命令指向一个启动时会卡住的脚本 +- Server 正在等待某个不可用依赖或后端服务 + +**解决:** + +- 在 GSD 外部直接运行该命令,确认 server 能真正启动 +- 检查后端 URL 或依赖服务是否可达 +- 如果是本地自定义 server,确认它使用的是 MCP SDK 或正确的 stdio 协议实现 + +### `mcp_discover` 报 connection closed + +**症状:** `mcp_discover` 立即失败,并提示连接被关闭。 + +**常见原因:** + +- 可执行文件路径错误 +- 脚本路径错误 +- 缺失运行时依赖 +- Server 在响应前就崩溃了 + +**解决:** + +- 确认 `command` 和 `args` 路径正确且尽量使用绝对路径 +- 手动运行命令,查看导入 / 运行时错误 +- 检查配置中的解释器或运行时在当前机器上是否存在 + +### `mcp_call` 因缺少必填参数失败 + +**症状:** MCP tool 已成功发现,但调用时因缺少必填字段而校验失败。 + +**常见原因:** + +- 调用形状写错了 +- 目标 server 的 tool schema 已更新 +- 你调用的是旧 server 定义或旧分支构建 + +**解决:** + +- 重新执行 `mcp_discover(server="name")`,确认实际要求的参数名 +- 按 `mcp_call(server="name", tool="tool_name", args={...})` 的形式调用 +- 如果你正在开发 GSD 本身,在 schema 变更后重新执行 `npm run build` + +### 本地 stdio server 手动可用,但在 GSD 中不可用 + +**症状:** 手动执行 server 命令没有问题,但 GSD 连接不上。 + +**常见原因:** + +- Server 依赖某些 GSD 不会继承的 shell 状态 +- 相对路径只有在另一个 working directory 中才成立 +- 需要的环境变量存在于你的 shell 中,但没有写进 MCP 配置 + +**解决:** + +- 对 `command` 和脚本参数都使用绝对路径 +- 把所需环境变量写进 MCP 配置的 `env` 块 +- 有必要时,在 server 定义里显式设置 `cwd` + +### Session lock 被另一个终端中的 `/gsd` 抢走 + +**症状:** 在第二个终端运行 `/gsd`(step mode)时,正在运行的自动模式会话失去了锁。 + +**解决:** 已在 v2.36.0 修复。现在裸 `/gsd` 不会再从运行中的自动模式会话手里抢 session lock。升级到最新版本。 + +### Worktree 中的提交落到了 main,而不是 `milestone/` 分支 + +**症状:** 自动模式在 worktree 中提交时,最终落在了 `main`,而不是 `milestone/`。 + +**解决:** 已在 v2.37.1 修复。现在 dispatch 前会重新校正 CWD,并在失败时清理过期 merge 状态。升级到最新版本。 + +### Extension loader 因 subpath export 错误而失败 + +**症状:** 扩展加载时报 `Cannot find module`,并且错误信息引用了 npm subpath exports。 + +**原因:** Extension loader 中的动态导入过去无法解析 npm subpath exports(例如 `@pkg/foo/bar`)。 + +**解决:** 已在 v2.38+ 修复。现在 extension loader 会自动解析 npm subpath exports,并为动态导入创建 `node_modules` symlink。升级到最新版本。 + +## 恢复流程 + +### 重置自动模式状态 + +```bash +rm .gsd/auto.lock +rm .gsd/completed-units.json +``` + +然后执行 `/gsd auto`,从当前磁盘状态重新开始。 + +### 重置路由历史 + +如果自适应模型路由给出了糟糕的结果,可以清空路由历史: + +```bash +rm .gsd/routing-history.json +``` + +### 完整重建状态 + +``` +/gsd doctor +``` + +Doctor 会从磁盘上的 plan 和 roadmap 文件重建 `STATE.md`,并修复检测到的不一致项。 + +## 获取帮助 + +- **GitHub Issues:** [github.com/gsd-build/GSD-2/issues](https://github.com/gsd-build/GSD-2/issues) +- **Dashboard:** `Ctrl+Alt+G` 或 `/gsd status`,查看实时诊断信息 +- **Forensics:** `/gsd forensics`,用于对自动模式失败做结构化事后分析 +- **Session logs:** `.gsd/activity/` 中包含用于崩溃取证的 JSONL 会话转储 + +## iTerm2 专属问题 + +### Ctrl+Alt 快捷键触发了错误动作(例如 Ctrl+Alt+G 打开了外部编辑器,而不是 GSD dashboard) + +**症状:** 按下 Ctrl+Alt+G 后,会触发外部编辑器提示(Ctrl+G),而不是 GSD dashboard。其它 Ctrl+Alt 快捷键也表现得像它们对应的 Ctrl-only 快捷键。 + +**原因:** iTerm2 默认的 Left Option Key 设置是 “Normal”,这会吞掉 Ctrl+Alt 组合中的 Alt 修饰键。终端实际只收到了 Ctrl,所以 Ctrl+Alt+G 最终变成 Ctrl+G。 + +**解决:** 在 iTerm2 中进入 **Profiles → Keys → General**,把 **Left Option Key** 改成 **Esc+**。这样 Alt / Option 会发送 escape 前缀,终端应用就能正确识别 Ctrl+Alt 快捷键。 + +## Windows 专属问题 + +### Windows 上 LSP 返回 ENOENT(MSYS2 / Git Bash) + +**症状:** LSP 初始化因 `ENOENT` 失败,或者把 `/c/Users/...` 这类 POSIX 路径错误地解析为 `C:\Users\...`。 + +**原因:** MSYS2 / Git Bash 中的 `which` 命令返回的是 POSIX 风格路径,而 Node.js 的 `spawn()` 无法正确解析。 + +**解决:** 已在 v2.29+ 修复,Windows 现在改用 `where.exe`。升级到最新版本。 + +### 构建 WXT / 浏览器扩展时出现 EBUSY + +**症状:** 构建浏览器扩展时出现 `EBUSY: resource busy or locked, rmdir .output/chrome-mv3`。 + +**原因:** Chromium 浏览器仍然从构建输出目录加载着该扩展,导致目录无法删除。 + +**解决:** 关闭浏览器中的该扩展,或者在 WXT 配置里使用不同的 `outDirTemplate`,避开被锁住的目录。 + +## 数据库问题 + +### “GSD database is not available” + +**症状:** `gsd_decision_save`(及其别名 `gsd_save_decision`)、`gsd_requirement_update`(及其别名 `gsd_update_requirement`)或 `gsd_summary_save`(及其别名 `gsd_save_summary`)报这个错误。 + +**原因:** SQLite 数据库未初始化。这个问题会出现在 v2.29 之前的手动 `/gsd` 会话(非自动模式)中。 + +**解决:** 已在 v2.29+ 修复。现在数据库会在第一次 tool call 时自动初始化。升级到最新版本。 + +## Verification 问题 + +### Verification gate 因 shell 语法错误失败 + +**症状:** 在 verification 阶段出现 `stderr: /bin/sh: 1: Syntax error: "(" unexpected`。 + +**原因:** 某个描述性字符串(例如 `All 10 checks pass (build, lint)`)被误当成 shell 命令执行。这通常发生在 task plans 的 `verify:` 字段里写了 prose,而不是实际命令。 + +**解决:** 已在 v2.29+ 修复,现在偏好命令会先通过 `isLikelyCommand()` 过滤。请确保偏好中的 `verification_commands` 只包含合法 shell 命令,而不是文字描述。 + +## LSP(Language Server Protocol) + +### “LSP isn't available in this workspace” + +GSD 会根据项目文件自动检测 language servers(例如 `package.json` → TypeScript、`Cargo.toml` → Rust、`go.mod` → Go)。如果没有检测到 server,agent 会跳过 LSP 功能。 + +**查看状态:** + +``` +lsp status +``` + +它会显示哪些 servers 已经激活;如果一个都没找到,也会说明原因,包括发现了哪些项目标记、但缺失了哪些 server 命令。 + +**常见修复方式:** + +| 项目类型 | 安装命令 | +|----------|----------| +| TypeScript / JavaScript | `npm install -g typescript-language-server typescript` | +| Python | `pip install pyright` 或 `pip install python-lsp-server` | +| Rust | `rustup component add rust-analyzer` | +| Go | `go install golang.org/x/tools/gopls@latest` | + +安装完成后,执行 `lsp reload` 即可重新检测,无需重启 GSD。 + +## Notifications + + +### macOS 上通知不显示 + +**症状:** 偏好中已设置 `notifications.enabled: true`,但自动模式期间没有任何桌面通知(没有 milestone 完成提示、预算预警或错误通知),同时日志里也没有报错。 + +**原因:** GSD 在 macOS 上会把 `osascript display notification` 作为回退方案。这个命令的通知归属你的终端应用(Ghostty、iTerm2、Alacritty、Kitty、Warp 等)。如果该终端应用在 System Settings → Notifications 中没有权限,macOS 会静默丢弃通知,而 `osascript` 仍然返回 0,不会报错。 + +很多终端应用只有在成功送出过至少一条通知后,才会出现在通知设置面板里,这就形成了“先能通知,系统才给你配置通知”的鸡生蛋蛋生鸡问题。 + +**推荐修复方式:** 安装 `terminal-notifier`,它会注册为独立的 Notification Center 应用: + +```bash +brew install terminal-notifier +``` + +GSD 在检测到 `terminal-notifier` 可用时会自动优先使用它。首次使用时,macOS 会弹出通知权限请求,这是预期行为。 + +**替代修复方式:** 进入 **System Settings → Notifications**,为你的终端应用启用通知。如果终端应用不在列表中,可以先在 Terminal.app 中手动发送一条测试通知,注册出 “Script Editor”: + +```bash +osascript -e 'display notification "test" with title "GSD"' +``` + +**验证:** 完成任一修复后,用下面命令测试: + +```bash +terminal-notifier -title "GSD" -message "working!" -sound Glass +``` diff --git a/docs/zh-CN/user-docs/visualizer.md b/docs/zh-CN/user-docs/visualizer.md new file mode 100644 index 000000000..6b652f37e --- /dev/null +++ b/docs/zh-CN/user-docs/visualizer.md @@ -0,0 +1,104 @@ +# 工作流可视化器 + +*引入于 v2.19.0* + +工作流可视化器是一个全屏 TUI 叠层视图,以交互式四标签页的形式展示项目进度、依赖关系、成本指标和执行时间线。 + +## 打开可视化器 + +``` +/gsd visualize +``` + +或者配置为在 milestone 完成后自动显示: + +```yaml +auto_visualize: true +``` + +## 标签页 + +可通过 `Tab`、`1`-`4` 或方向键切换标签页。 + +### 1. 进度 + +以树状视图展示 milestones、slices 和 tasks 的完成状态: + +``` +M001: User Management 3/6 tasks ⏳ + ✅ S01: Auth module 3/3 tasks + ✅ T01: Core types + ✅ T02: JWT middleware + ✅ T03: Login flow + ⏳ S02: User dashboard 1/2 tasks + ✅ T01: Layout component + ⬜ T02: Profile page + ⬜ S03: Admin panel 0/1 tasks +``` + +已完成项显示勾选,进行中项显示转圈,待处理项显示空框。每一层级也会显示 task 数量和完成百分比。 + +如果某个 milestone 经过 discussion 阶段,还会显示**讨论状态**,用于表明需求是否已经记录,以及讨论停留在哪个状态。 + +### 2. 依赖 + +用 ASCII 依赖图展示 slices 之间的关系: + +``` +S01 ──→ S02 ──→ S04 + └───→ S03 ──↗ +``` + +它会把 roadmap 中的 `depends:` 字段可视化出来,便于快速判断哪些 slices 被阻塞、哪些可以继续推进。 + +### 3. 指标 + +通过柱状图展示成本和 Token 使用情况: + +- **按阶段**:research、planning、execution、completion、reassessment +- **按 slice**:每个 slice 的成本以及累计总额 +- **按模型**:哪些模型消耗了最多预算 + +数据来自 `.gsd/metrics.json`。 + +### 4. 时间线 + +按时间顺序展示执行历史,包括: + +- 单元类型和 ID +- 开始 / 结束时间戳 +- 持续时间 +- 使用的模型 +- Token 数量 + +条目按执行时间排序,因此可以看到自动模式的完整派发历史。 + +## 控制 + +| 按键 | 动作 | +|------|------| +| `Tab` | 下一个标签页 | +| `Shift+Tab` | 上一个标签页 | +| `1`-`4` | 直接跳转到标签页 | +| `↑` / `↓` | 在当前标签页内滚动 | +| `Escape` / `q` | 关闭可视化器 | + +## 自动刷新 + +可视化器每 2 秒从磁盘刷新一次数据,因此即使它和自动模式会话同时打开,也能保持最新状态。 + +## HTML 导出(v2.26) + +如果需要在终端外部分享报告,可以使用 `/gsd export --html`。它会在 `.gsd/reports/` 中生成一个自包含的 HTML 文件,包含与 TUI 可视化器相同的数据:进度树、依赖图(SVG DAG)、成本 / Token 柱状图、执行时间线、变更日志和知识库。所有 CSS 和 JS 都会内联,无外部依赖,也可以在任意浏览器中打印为 PDF。 + +自动生成的 `index.html` 会集中列出所有报告,并显示跨 milestones 的推进指标。 + +```yaml +auto_report: true # 在 milestone 完成后自动生成(默认开启) +``` + +## 配置 + +```yaml +auto_visualize: true # 在 milestone 完成后显示可视化器 +``` diff --git a/docs/zh-CN/user-docs/web-interface.md b/docs/zh-CN/user-docs/web-interface.md new file mode 100644 index 000000000..a7ddd4aeb --- /dev/null +++ b/docs/zh-CN/user-docs/web-interface.md @@ -0,0 +1,67 @@ +# Web 界面 + +> 新增于 v2.41.0 + +GSD 提供了基于浏览器的 Web 界面,用于项目管理、实时进度监控以及多项目支持。 + +## 快速开始 + +```bash +gsd --web +``` + +这会启动一个本地 Web 服务器,并在默认浏览器中打开 GSD 仪表板。 + +### CLI 参数(v2.42.0) + +```bash +gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com" +``` + +| 参数 | 默认值 | 说明 | +|------|--------|------| +| `--host` | `localhost` | Web 服务器监听地址 | +| `--port` | `3000` | Web 服务器端口 | +| `--allowed-origins` | (无) | 允许的 CORS 来源列表,逗号分隔 | + +## 功能 + +- **项目管理**:在可视化仪表板中查看 milestones、slices 和 tasks +- **实时进度**:通过 server-sent events 在自动模式执行期间推送状态更新 +- **多项目支持**:通过 `?project=` URL 参数,在单个浏览器标签页中管理多个项目 +- **切换项目根目录**:无需重启服务器即可在 Web UI 中切换项目目录(v2.44) +- **首次引导流程**:可在浏览器中完成 API key 设置和 provider 配置 +- **模型选择**:直接从 Web UI 切换模型和 provider + +## 架构 + +Web 界面基于 Next.js 构建,并通过桥接服务与 GSD 后端通信。每个项目都会拥有自己的 bridge 实例,以便在并发会话中保持隔离。 + +关键组件: + +- `ProjectBridgeService`:按项目分配的命令路由和 SSE 订阅服务 +- `getProjectBridgeServiceForCwd()`:根据项目路径返回独立实例的注册表 +- `resolveProjectCwd()`:从请求 URL 中读取 `?project=`,若不存在则回退到 `GSD_WEB_PROJECT_CWD` + +## 配置 + +默认情况下,Web 服务器监听在 `localhost:3000`。如需覆盖,可使用 `--host`、`--port` 和 `--allowed-origins`(见上面的 CLI 参数)。 + +### 环境变量 + +| 变量 | 说明 | +|------|------| +| `GSD_WEB_PROJECT_CWD` | 当未指定 `?project=` 时使用的默认项目路径 | + +## Node v24 兼容性 + +Node v24 对类型剥离(type stripping)做了破坏性改动,曾导致 Web 启动时报 `ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING`。该问题已在 v2.42.0+ 中修复(#1864)。如果你仍然遇到这个错误,请升级 GSD。 + +## 认证令牌持久化 + +从 v2.42.0 起,Web UI 会把认证令牌持久化到 `sessionStorage`,因此页面刷新后不会丢失登录态(#1877)。在此之前,每次刷新都需要重新认证。 + +## 平台说明 + +- **Windows**:由于 Next.js webpack 在系统目录上会触发 EPERM 问题,Windows 下会跳过 Web 构建。CLI 仍然可完整使用。 +- **macOS / Linux**:完整支持。 diff --git a/docs/zh-CN/user-docs/working-in-teams.md b/docs/zh-CN/user-docs/working-in-teams.md new file mode 100644 index 000000000..f5486ff80 --- /dev/null +++ b/docs/zh-CN/user-docs/working-in-teams.md @@ -0,0 +1,103 @@ +# 团队协作 + +GSD 支持多人并行工作流,让多个开发者可以同时在同一个仓库中工作。 + +## 设置 + +### 1. 启用 Team Mode + +为团队使用配置 GSD 的最简单方法,是在项目偏好中设置 `mode: team`。这会一次性开启唯一 milestone ID、推送分支和预合并检查: + +```yaml +# .gsd/PREFERENCES.md(项目级,提交到 git) +--- +version: 1 +mode: team +--- +``` + +这相当于手动设置 `unique_milestone_ids: true`、`git.push_branches: true`、`git.pre_merge_check: true` 以及其他适合团队协作的默认值。你仍然可以覆盖单个选项,例如如果团队偏好自动推送,也可以在 `mode: team` 基础上再加 `git.auto_push: true`。 + +你也可以不使用 mode,而是单独配置每一项设置(详见 [Git 策略](git-strategy.md))。 + +### 2. 配置 `.gitignore` + +共享规划产物(milestones、roadmaps、decisions),同时把运行时文件保留在本地: + +```bash +# ── GSD:运行时 / 临时文件(按开发者、按会话隔离)────── +.gsd/auto.lock +.gsd/completed-units.json +.gsd/STATE.md +.gsd/metrics.json +.gsd/activity/ +.gsd/runtime/ +.gsd/worktrees/ +.gsd/milestones/**/continue.md +.gsd/milestones/**/*-CONTINUE.md +``` + +**会共享的内容**(提交到 git): + +- `.gsd/PREFERENCES.md`:项目偏好 +- `.gsd/PROJECT.md`:持续维护的项目描述 +- `.gsd/REQUIREMENTS.md`:需求契约 +- `.gsd/DECISIONS.md`:架构决策 +- `.gsd/milestones/`:roadmaps、plans、summaries 和 research + +**仅保留本地的内容**(gitignore): + +- 锁文件、指标、状态缓存、运行时记录、worktrees、活动日志 + +### 3. 提交偏好设置 + +```bash +git add .gsd/PREFERENCES.md +git commit -m "chore: enable GSD team workflow" +``` + +## `commit_docs: false` + +如果团队里只有部分成员使用 GSD,或者公司策略要求仓库保持干净: + +```yaml +git: + commit_docs: false +``` + +这会把整个 `.gsd/` 加入 `.gitignore`,让所有产物都保留在本地。这样使用 GSD 的开发者仍然能获得结构化规划的好处,而不会影响不使用 GSD 的同事。 + +## 迁移现有项目 + +如果你当前项目里对 `.gsd/` 做了整目录忽略: + +1. 确保当前没有进行中的 milestones(工作区状态干净) +2. 按上面的选择性规则更新 `.gitignore` +3. 在 `.gsd/PREFERENCES.md` 中添加 `unique_milestone_ids: true` +4. 如有需要,重命名现有 milestones 以使用唯一 ID: + ``` + I have turned on unique milestone ids, please update all old milestone + ids to use this new format e.g. M001-abc123 where abc123 is a random + 6 char lowercase alpha numeric string. Update all references in all + .gsd file contents, file names and directory names. Validate your work + once done to ensure referential integrity. + ``` +5. 提交修改 + +## 并行开发 + +多个开发者可以同时对不同 milestones 运行自动模式。每个开发者都会: + +- 获得自己的 worktree(`.gsd/worktrees//`,已加入 gitignore) +- 在独立的 `milestone/` 分支上工作 +- 独立地 squash merge 回主分支 + +milestone 依赖可以通过 `M00X-CONTEXT.md` frontmatter 声明: + +```yaml +--- +depends_on: [M001-eh88as] +--- +``` + +GSD 会强制要求上游依赖 milestone 先完成,之后才会启动下游工作。 diff --git a/gitbook/README.md b/gitbook/README.md new file mode 100644 index 000000000..cb84bae87 --- /dev/null +++ b/gitbook/README.md @@ -0,0 +1,65 @@ +# What is GSD? + +GSD is an AI-powered development agent that turns project ideas into working software. Describe what you want to build, and GSD researches, plans, codes, tests, and commits — with clean git history and full cost tracking. + +## How It Works + +GSD breaks your project into manageable pieces and works through them systematically: + +``` +You describe your project + ↓ +GSD creates a milestone with slices (features) + ↓ +Each slice is decomposed into tasks + ↓ +Tasks are executed one at a time in fresh AI sessions + ↓ +Code is committed, verified, and the next task begins +``` + +You can stay hands-on with **step mode** (reviewing each step) or let GSD run autonomously with **auto mode** while you grab coffee. + +## Key Features + +- **Autonomous execution** — `/gsd auto` runs research, planning, coding, testing, and committing without intervention +- **20+ LLM providers** — Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, local models, and more +- **Git isolation** — Each milestone works in its own worktree branch, merged cleanly when done +- **Cost tracking** — Real-time token usage, budget ceilings, and automatic model downgrading +- **Crash recovery** — Sessions resume automatically after interruptions +- **Skills system** — Domain-specific instruction sets for frameworks, languages, and tools +- **Parallel milestones** — Run multiple milestones simultaneously in isolated worktrees +- **Remote questions** — Get Discord, Slack, or Telegram notifications when GSD needs input +- **Web interface** — Browser-based dashboard with real-time progress +- **VS Code extension** — Chat participant, sidebar dashboard, and full command palette +- **Headless mode** — Run in CI pipelines, cron jobs, and scripted automation + +## Quick Start + +```bash +# Install +npm install -g gsd-pi + +# Launch +gsd + +# Start autonomous mode +/gsd auto +``` + +See [Installation](getting-started/installation.md) for detailed setup instructions. + +## Two Ways to Work + +| Mode | Command | Best For | +|------|---------|----------| +| **Step** | `/gsd` | Staying in the loop, reviewing each step | +| **Auto** | `/gsd auto` | Walking away, overnight builds, batch work | + +The recommended workflow: run auto mode in one terminal, steer from another. See [Step Mode](core-concepts/step-mode.md) and [Auto Mode](core-concepts/auto-mode.md). + +## Requirements + +- **Node.js** 22.0.0 or later (24 LTS recommended) +- **Git** installed and configured +- An API key for at least one LLM provider (or use browser sign-in for Anthropic/GitHub Copilot) diff --git a/gitbook/SUMMARY.md b/gitbook/SUMMARY.md new file mode 100644 index 000000000..962364bbe --- /dev/null +++ b/gitbook/SUMMARY.md @@ -0,0 +1,49 @@ +# Table of contents + +* [What is GSD?](README.md) + +## Getting Started + +* [Installation](getting-started/installation.md) +* [Your First Project](getting-started/first-project.md) +* [Choosing a Model](getting-started/choosing-a-model.md) + +## Core Concepts + +* [How GSD Organizes Work](core-concepts/project-structure.md) +* [Step Mode](core-concepts/step-mode.md) +* [Auto Mode](core-concepts/auto-mode.md) + +## Configuration + +* [Preferences](configuration/preferences.md) +* [Provider Setup](configuration/providers.md) +* [Custom Models](configuration/custom-models.md) +* [Git & Worktrees](configuration/git-settings.md) +* [Notifications](configuration/notifications.md) +* [MCP Servers](configuration/mcp-servers.md) + +## Features + +* [Cost Management](features/cost-management.md) +* [Token Optimization](features/token-optimization.md) +* [Dynamic Model Routing](features/dynamic-model-routing.md) +* [Skills](features/skills.md) +* [Captures & Triage](features/captures.md) +* [Workflow Visualizer](features/visualizer.md) +* [Workflow Templates](features/workflow-templates.md) +* [Web Interface](features/web-interface.md) +* [Remote Questions](features/remote-questions.md) +* [Working in Teams](features/teams.md) +* [Parallel Orchestration](features/parallel.md) +* [Headless & CI Mode](features/headless.md) +* [GitHub Sync](features/github-sync.md) + +## Reference + +* [Commands](reference/commands.md) +* [Keyboard Shortcuts](reference/keyboard-shortcuts.md) +* [CLI Flags](reference/cli-flags.md) +* [Environment Variables](reference/environment-variables.md) +* [Troubleshooting](reference/troubleshooting.md) +* [Migration from v1](reference/migration.md) diff --git a/gitbook/configuration/custom-models.md b/gitbook/configuration/custom-models.md new file mode 100644 index 000000000..8f02512ff --- /dev/null +++ b/gitbook/configuration/custom-models.md @@ -0,0 +1,131 @@ +# Custom Models + +Define custom models and providers in `~/.gsd/agent/models.json`. This lets you add models not in the default registry — self-hosted endpoints, fine-tuned models, proxies, or new provider releases. + +## File Location + +GSD looks for models.json at: +1. `~/.gsd/agent/models.json` (primary) +2. `~/.pi/agent/models.json` (fallback) + +The file reloads each time you open `/model` — no restart needed. + +## Basic Structure + +```json +{ + "providers": { + "my-provider": { + "baseUrl": "https://my-endpoint.example.com/v1", + "apiKey": "MY_PROVIDER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "model-id-here", + "name": "Friendly Model Name", + "reasoning": false, + "input": ["text"], + "contextWindow": 128000, + "maxTokens": 16384, + "cost": { "input": 0.15, "output": 0.60, "cacheRead": 0.015, "cacheWrite": 0.19 } + } + ] + } + } +} +``` + +## API Key Resolution + +The `apiKey` field can be: + +- **An environment variable name**: `"OPENROUTER_API_KEY"` — GSD resolves it automatically +- **A literal value**: `"sk-abc123..."` — used directly +- **A dummy value**: `"not-needed"` — for local servers that don't require auth + +## Compatibility Flags + +Local and non-standard servers often need compatibility adjustments: + +```json +{ + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false, + "supportsUsageInStreaming": false, + "thinkingFormat": "qwen" + } +} +``` + +| Flag | Default | Purpose | +|------|---------|---------| +| `supportsDeveloperRole` | `true` | Set `false` if the server doesn't support the `developer` message role | +| `supportsReasoningEffort` | `true` | Set `false` if the server doesn't support reasoning effort parameters | +| `supportsUsageInStreaming` | `true` | Set `false` if streaming responses don't include token usage | +| `thinkingFormat` | — | Set `"qwen"` for Qwen thinking mode, `"qwen-chat-template"` for chat template variant | + +## Custom Headers + +For proxies that need extra headers: + +```json +{ + "providers": { + "litellm-proxy": { + "baseUrl": "https://litellm.example.com/v1", + "apiKey": "MY_API_KEY", + "api": "openai-completions", + "headers": { + "x-custom-header": "value" + }, + "models": [...] + } + } +} +``` + +## Model Overrides + +Override specific model settings without redefining the entire model: + +```json +{ + "providers": { + "openrouter": { + "modelOverrides": { + "anthropic/claude-sonnet-4": { + "compat": { + "openRouterRouting": { + "only": ["amazon-bedrock"] + } + } + } + } + } + } +} +``` + +## Cost Tracking + +For accurate cost tracking with custom models, add the `cost` field (per million tokens): + +```json +"cost": { + "input": 0.15, + "output": 0.60, + "cacheRead": 0.015, + "cacheWrite": 0.19 +} +``` + +Without this, cost shows $0.00 — which is the expected default for custom models. + +## Community Extensions + +For providers not built into GSD, community extensions add full provider support: + +| Extension | Provider | Install | +|-----------|----------|---------| +| `pi-dashscope` | Alibaba DashScope (Qwen3, GLM-5, etc.) | `gsd install npm:pi-dashscope` | diff --git a/gitbook/configuration/git-settings.md b/gitbook/configuration/git-settings.md new file mode 100644 index 000000000..cf4c0d524 --- /dev/null +++ b/gitbook/configuration/git-settings.md @@ -0,0 +1,148 @@ +# Git & Worktrees + +GSD uses git for milestone isolation and sequential commits. The strategy is fully automated — you don't need to manage branches manually. + +## Isolation Modes + +GSD supports three isolation modes, configured via `git.isolation` in preferences: + +| Mode | Working Directory | Branch | Best For | +|------|-------------------|--------|----------| +| `worktree` (default) | `.gsd/worktrees//` | `milestone/` | Most projects — full isolation | +| `branch` | Project root | `milestone/` | Submodule-heavy repos | +| `none` | Project root | Current branch | Hot-reload workflows | + +### Worktree Mode (Default) + +Each milestone gets its own git worktree and branch. All execution happens inside the worktree. On completion, everything is squash-merged to main as one clean commit. The worktree and branch are then cleaned up. + +Changes in a milestone can't interfere with your main working copy. + +### Branch Mode + +Work happens in the project root on a `milestone/` branch. No worktree directory is created. Useful when worktrees cause problems with submodules or hardcoded paths. + +### None Mode + +Work happens directly on your current branch. No worktree, no milestone branch. GSD still commits with conventional commit messages. Use this when file isolation breaks dev tooling (file watchers, hot-reload, etc.). + +## Branching Model + +``` +main ──────────────────────────────────────────── + │ ↑ + └── milestone/M001 (worktree) ─────────────┘ + commit: feat: core types + commit: feat: markdown parser + commit: feat: file writer + → squash-merged to main +``` + +## Workflow Modes + +Set `mode` for sensible defaults instead of configuring each setting individually: + +```yaml +mode: solo # personal projects +mode: team # shared repos +``` + +| Setting | `solo` | `team` | +|---------|--------|--------| +| `git.auto_push` | `true` | `false` | +| `git.push_branches` | `false` | `true` | +| `git.pre_merge_check` | `false` | `true` | +| `unique_milestone_ids` | `false` | `true` | + +Mode defaults are the lowest priority — any explicit preference overrides them. + +## Git Preferences + +```yaml +git: + auto_push: false # push after commits + push_branches: false # push milestone branch to remote + remote: origin # git remote name + snapshots: true # WIP snapshot commits during long tasks + pre_merge_check: auto # validation before merge + commit_type: feat # override conventional commit prefix + main_branch: main # primary branch name + merge_strategy: squash # "squash" or "merge" + isolation: worktree # "worktree", "branch", or "none" + commit_docs: true # commit .gsd/ artifacts to git + manage_gitignore: true # let GSD manage .gitignore + auto_pr: false # create PR on milestone completion + pr_target_branch: develop # PR target branch +``` + +## Automatic Pull Requests + +For teams using Gitflow or branch-based workflows: + +```yaml +git: + auto_push: true + auto_pr: true + pr_target_branch: develop +``` + +When a milestone completes, GSD pushes the branch and creates a PR targeting your specified branch. Requires `gh` CLI installed and authenticated. + +## Post-Worktree Hook + +Run a script after worktree creation (copy `.env` files, symlink assets, etc.): + +```yaml +git: + worktree_post_create: .gsd/hooks/post-worktree-create +``` + +Example hook: + +```bash +#!/bin/bash +cp "$SOURCE_DIR/.env" "$WORKTREE_DIR/.env" +ln -sf "$SOURCE_DIR/assets" "$WORKTREE_DIR/assets" +``` + +## Keeping `.gsd/` Local + +For teams where only some members use GSD: + +```yaml +git: + commit_docs: false +``` + +This adds `.gsd/` to `.gitignore` entirely. You get structured planning without affecting teammates who don't use GSD. + +## Commit Format + +Commits use conventional commit format with GSD metadata: + +``` +feat: core type definitions + +GSD-Task: M001/S01/T01 +``` + +## Manual Worktree Management + +Use `/worktree` (or `/wt`) for manual worktree operations: + +``` +/worktree create +/worktree switch +/worktree merge +/worktree remove +``` + +## Self-Healing + +GSD automatically recovers from common git issues: + +- **Detached HEAD** — reattaches to the correct branch +- **Stale lock files** — removes `index.lock` from crashed processes +- **Orphaned worktrees** — detects and cleans up abandoned worktrees + +Run `/gsd doctor` to check git health manually. diff --git a/gitbook/configuration/mcp-servers.md b/gitbook/configuration/mcp-servers.md new file mode 100644 index 000000000..6079fa3a7 --- /dev/null +++ b/gitbook/configuration/mcp-servers.md @@ -0,0 +1,65 @@ +# MCP Servers + +GSD can connect to external MCP (Model Context Protocol) servers for local tools, internal APIs, self-hosted services, or integrations not built in as native extensions. + +## Configuration Files + +GSD reads MCP config from these project-local paths: + +- `.mcp.json` — repo-shared config (safe to commit) +- `.gsd/mcp.json` — local-only config (not shared) + +If both exist, server names are merged and the first definition found wins. + +## Supported Transports + +| Transport | Config Shape | Use When | +|-----------|-------------|----------| +| `stdio` | `command` + optional `args`, `env`, `cwd` | Launching a local MCP server | +| `http` | `url` | Connecting to an already-running server | + +## Examples + +### stdio Server + +```json +{ + "mcpServers": { + "my-server": { + "type": "stdio", + "command": "/absolute/path/to/python3", + "args": ["/absolute/path/to/server.py"], + "env": { + "API_URL": "http://localhost:8000" + } + } + } +} +``` + +### HTTP Server + +```json +{ + "mcpServers": { + "my-http-server": { + "url": "http://localhost:8080/mcp" + } + } +} +``` + +## Verifying a Server + +After adding config, verify from a GSD session: + +1. `mcp_servers` — confirms GSD sees the config +2. `mcp_discover(server="my-server")` — confirms the server starts and responds +3. `mcp_call(server="my-server", tool="", args={...})` — confirms a real tool call works + +## Tips + +- Use **absolute paths** for executables and scripts +- Set required **environment variables** directly in the MCP config's `env` block +- Use `.mcp.json` for team-shared servers; `.gsd/mcp.json` for machine-local ones +- If a server depends on local paths or personal secrets, keep it in `.gsd/mcp.json` diff --git a/gitbook/configuration/notifications.md b/gitbook/configuration/notifications.md new file mode 100644 index 000000000..54acd0d67 --- /dev/null +++ b/gitbook/configuration/notifications.md @@ -0,0 +1,38 @@ +# Notifications + +GSD sends desktop notifications during auto mode to keep you informed without watching the terminal. + +## Configuration + +```yaml +notifications: + enabled: true + on_complete: true # notify on unit completion + on_error: true # notify on errors + on_budget: true # notify on budget thresholds + on_milestone: true # notify when milestone finishes + on_attention: true # notify when manual attention needed +``` + +## macOS Setup + +GSD uses `terminal-notifier` when available, falling back to `osascript`. + +**Recommended:** Install `terminal-notifier` for reliable delivery: + +```bash +brew install terminal-notifier +``` + +**Why?** The `osascript` fallback attributes notifications to your terminal app (Ghostty, iTerm2, etc.), which may not have notification permissions. `terminal-notifier` registers as its own app and prompts for permission on first use. + +### Notifications Not Appearing? + +1. Check **System Settings → Notifications** for your terminal app +2. Install `terminal-notifier` (recommended) +3. Test with: + ```bash + terminal-notifier -title "GSD" -message "working!" -sound Glass + ``` + +If your terminal app doesn't appear in Notification settings, it may need to send at least one notification first to register. See [Troubleshooting](../reference/troubleshooting.md) for more details. diff --git a/gitbook/configuration/preferences.md b/gitbook/configuration/preferences.md new file mode 100644 index 000000000..3a997150a --- /dev/null +++ b/gitbook/configuration/preferences.md @@ -0,0 +1,238 @@ +# Preferences + +GSD preferences live in YAML frontmatter markdown files. You can configure them globally or per-project. + +## Managing Preferences + +``` +/gsd prefs # open the global preferences wizard +/gsd prefs project # open the project preferences wizard +/gsd prefs status # show current values and where they come from +``` + +## Preference Files + +| Scope | Path | Applies To | +|-------|------|-----------| +| Global | `~/.gsd/PREFERENCES.md` | All projects | +| Project | `.gsd/PREFERENCES.md` | Current project only | + +**How they merge:** +- **Scalar fields** (`budget_ceiling`, `token_profile`): project wins if defined +- **Array fields** (`always_use_skills`, etc.): concatenated (global first, then project) +- **Object fields** (`models`, `git`, `auto_supervisor`): shallow-merged, project overrides per-key + +## Quick Example + +```yaml +--- +version: 1 + +# Model selection +models: + research: claude-sonnet-4-6 + planning: claude-opus-4-6 + execution: claude-sonnet-4-6 + completion: claude-sonnet-4-6 + +# Token optimization +token_profile: balanced + +# Budget +budget_ceiling: 25.00 +budget_enforcement: pause + +# Supervision +auto_supervisor: + soft_timeout_minutes: 15 + hard_timeout_minutes: 25 + +# Git +git: + auto_push: true + merge_strategy: squash + isolation: worktree + +# Verification +verification_commands: + - npm run lint + - npm run test + +# Notifications +notifications: + on_milestone: true + on_attention: true +--- +``` + +## All Settings + +### `models` + +Per-phase model selection. See [Choosing a Model](../getting-started/choosing-a-model.md). + +```yaml +models: + research: claude-sonnet-4-6 + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 + execution_simple: claude-haiku-4-5 + completion: claude-sonnet-4-6 + subagent: claude-sonnet-4-6 +``` + +### `token_profile` + +Coordinates model selection, phase skipping, and context compression. Values: `budget`, `balanced` (default), `quality`. See [Token Optimization](../features/token-optimization.md). + +### `budget_ceiling` + +Maximum USD to spend during auto mode: + +```yaml +budget_ceiling: 50.00 +``` + +### `budget_enforcement` + +What happens when the ceiling is reached: + +| Value | Behavior | +|-------|----------| +| `warn` | Log a warning, continue | +| `pause` | Pause auto mode (default) | +| `halt` | Stop auto mode entirely | + +### `auto_supervisor` + +Timeout thresholds for auto mode: + +```yaml +auto_supervisor: + soft_timeout_minutes: 20 # warn AI to wrap up + idle_timeout_minutes: 10 # detect stalls + hard_timeout_minutes: 30 # pause auto mode +``` + +### `verification_commands` + +Shell commands that run after every task execution: + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true # auto-retry on failure (default) +verification_max_retries: 2 # max attempts (default: 2) +``` + +### `phases` + +Fine-grained control over which phases run: + +```yaml +phases: + skip_research: false + skip_reassess: false + skip_slice_research: true + reassess_after_slice: true + require_slice_discussion: false +``` + +### `skill_discovery` + +| Value | Behavior | +|-------|----------| +| `auto` | Skills found and applied automatically | +| `suggest` | Skills identified but not auto-applied (default) | +| `off` | Skill discovery disabled | + +### `dynamic_routing` + +Automatic model selection by task complexity. See [Dynamic Model Routing](../features/dynamic-model-routing.md). + +```yaml +dynamic_routing: + enabled: true + escalate_on_failure: true + budget_pressure: true +``` + +### `git` + +Git behavior. See [Git & Worktrees](git-settings.md). + +```yaml +git: + auto_push: false + merge_strategy: squash + isolation: worktree + commit_docs: true + auto_pr: false +``` + +### `notifications` + +See [Notifications](notifications.md). + +```yaml +notifications: + enabled: true + on_complete: true + on_error: true + on_milestone: true + on_attention: true +``` + +### `remote_questions` + +Route questions to Slack, Discord, or Telegram. See [Remote Questions](../features/remote-questions.md). + +```yaml +remote_questions: + channel: discord + channel_id: "1234567890123456789" + timeout_minutes: 5 +``` + +### `parallel` + +Run multiple milestones simultaneously. See [Parallel Orchestration](../features/parallel.md). + +```yaml +parallel: + enabled: false + max_workers: 2 + budget_ceiling: 50.00 +``` + +### `custom_instructions` + +Durable instructions appended to every session: + +```yaml +custom_instructions: + - "Always use TypeScript strict mode" + - "Prefer functional patterns over classes" +``` + +For project-specific patterns, use `.gsd/KNOWLEDGE.md` instead — it's injected into every agent prompt automatically. + +### `context_pause_threshold` + +Context window usage percentage at which auto mode pauses: + +```yaml +context_pause_threshold: 80 # pause at 80% +``` + +### `show_token_cost` + +Show per-prompt and cumulative session token cost in the footer: + +```yaml +show_token_cost: true +``` diff --git a/gitbook/configuration/providers.md b/gitbook/configuration/providers.md new file mode 100644 index 000000000..4cb709142 --- /dev/null +++ b/gitbook/configuration/providers.md @@ -0,0 +1,277 @@ +# Provider Setup + +Step-by-step setup instructions for every LLM provider GSD supports. If you ran the onboarding wizard (`gsd config`) and picked a provider, you may already be configured — check with `/model` inside a session. + +## Quick Reference + +| Provider | Auth Method | Environment Variable | +|----------|-------------|---------------------| +| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | +| OpenAI | API key | `OPENAI_API_KEY` | +| Google Gemini | API key | `GEMINI_API_KEY` | +| OpenRouter | API key | `OPENROUTER_API_KEY` | +| Groq | API key | `GROQ_API_KEY` | +| xAI (Grok) | API key | `XAI_API_KEY` | +| Mistral | API key | `MISTRAL_API_KEY` | +| GitHub Copilot | OAuth | `GH_TOKEN` | +| Amazon Bedrock | IAM credentials | `AWS_PROFILE` or `AWS_ACCESS_KEY_ID` | +| Vertex AI | ADC | `GOOGLE_APPLICATION_CREDENTIALS` | +| Azure OpenAI | API key | `AZURE_OPENAI_API_KEY` | +| Ollama | None (local) | — | +| LM Studio | None (local) | — | +| vLLM / SGLang | None (local) | — | + +## Built-in Providers + +### Anthropic (Claude) + +**Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching. + +**Option A — Browser sign-in (recommended):** + +```bash +gsd config +# Choose "Sign in with your browser" → "Anthropic (Claude)" +``` + +Or inside a session: `/login` + +**Option B — API key:** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +``` + +### OpenAI + +```bash +export OPENAI_API_KEY="sk-..." +``` + +Or run `gsd config` and choose "Paste an API key" then "OpenAI". + +### Google Gemini + +```bash +export GEMINI_API_KEY="..." +``` + +### OpenRouter + +OpenRouter aggregates 200+ models from multiple providers behind a single API key. + +1. Get a key at [openrouter.ai/keys](https://openrouter.ai/keys) +2. Set it: + ```bash + export OPENROUTER_API_KEY="sk-or-..." + ``` +3. In GSD, type `/model` to select an OpenRouter model (prefixed with `openrouter/`) + +To add models not in the built-in list, add them to `~/.gsd/agent/models.json`. See [Custom Models](custom-models.md). + +### Groq + +```bash +export GROQ_API_KEY="gsk_..." +``` + +### xAI (Grok) + +```bash +export XAI_API_KEY="xai-..." +``` + +### Mistral + +```bash +export MISTRAL_API_KEY="..." +``` + +### GitHub Copilot + +Uses OAuth — sign in through the browser: + +```bash +gsd config +# Choose "Sign in with your browser" → "GitHub Copilot" +``` + +Requires an active GitHub Copilot subscription. + +### Amazon Bedrock + +Bedrock uses AWS IAM credentials: + +```bash +# Named profile +export AWS_PROFILE="my-profile" + +# Or IAM keys +export AWS_ACCESS_KEY_ID="AKIA..." +export AWS_SECRET_ACCESS_KEY="..." +export AWS_REGION="us-east-1" + +# Or bearer token +export AWS_BEARER_TOKEN_BEDROCK="..." +``` + +ECS task roles and IRSA (Kubernetes) are also detected automatically. + +### Anthropic on Vertex AI + +```bash +gcloud auth application-default login +export ANTHROPIC_VERTEX_PROJECT_ID="my-project-id" +``` + +### Azure OpenAI + +```bash +export AZURE_OPENAI_API_KEY="..." +``` + +## Local Providers + +Local providers run on your machine. They require a `models.json` configuration file at `~/.gsd/agent/models.json` because GSD needs to know the endpoint URL and available models. + +The file reloads each time you open `/model` — no restart needed. + +### Ollama + +1. Install and start Ollama: + ```bash + brew install ollama + ollama serve + ``` + +2. Pull a model: + ```bash + ollama pull llama3.1:8b + ``` + +3. Create `~/.gsd/agent/models.json`: + ```json + { + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "llama3.1:8b" } + ] + } + } + } + ``` + +4. In GSD, type `/model` and select your Ollama model. + +### LM Studio + +1. Install [LM Studio](https://lmstudio.ai) +2. Go to "Local Server" tab, load a model, click "Start Server" (default port 1234) +3. Create `~/.gsd/agent/models.json`: + ```json + { + "providers": { + "lm-studio": { + "baseUrl": "http://localhost:1234/v1", + "api": "openai-completions", + "apiKey": "lm-studio", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "your-model-name" } + ] + } + } + } + ``` + +### vLLM + +```json +{ + "providers": { + "vllm": { + "baseUrl": "http://localhost:8000/v1", + "api": "openai-completions", + "apiKey": "vllm", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false, + "supportsUsageInStreaming": false + }, + "models": [ + { "id": "meta-llama/Llama-3.1-8B-Instruct" } + ] + } + } +} +``` + +### SGLang + +```json +{ + "providers": { + "sglang": { + "baseUrl": "http://localhost:30000/v1", + "api": "openai-completions", + "apiKey": "sglang", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "meta-llama/Llama-3.1-8B-Instruct" } + ] + } + } +} +``` + +## Custom OpenAI-Compatible Endpoints + +Any server that implements the OpenAI Chat Completions API can work with GSD — proxies (LiteLLM, Portkey, Helicone), self-hosted inference, new providers. + +**Quickest path:** + +```bash +gsd config +# Choose "Paste an API key" → "Custom (OpenAI-compatible)" +# Enter: base URL, API key, model ID +``` + +This writes `~/.gsd/agent/models.json` for you. See [Custom Models](custom-models.md) for manual setup. + +## Verifying Your Setup + +1. Launch GSD: `gsd` +2. Check available models: `/model` +3. Select your model from the picker +4. Send a test message to confirm it responds + +If the model doesn't appear, check: +- The environment variable is set in the current shell +- `models.json` is valid JSON +- The server is running (for local providers) + +## Common Issues + +| Problem | Cause | Fix | +|---------|-------|-----| +| "Authentication failed" with valid key | Key not visible to GSD | Export in the same terminal, or save via `gsd config` | +| OpenRouter models not in `/model` | No API key set | Set `OPENROUTER_API_KEY` and restart | +| Ollama returns empty responses | Server not running or model not pulled | Run `ollama serve` and `ollama pull ` | +| LM Studio model ID mismatch | ID doesn't match server | Check LM Studio's server tab for the exact identifier | +| `developer` role error | Local server doesn't support it | Set `compat.supportsDeveloperRole: false` | +| `stream_options` error | Server doesn't support streaming usage | Set `compat.supportsUsageInStreaming: false` | +| Cost shows $0.00 | Default for custom models | Add `cost` field to model definition | diff --git a/gitbook/core-concepts/auto-mode.md b/gitbook/core-concepts/auto-mode.md new file mode 100644 index 000000000..b611f85ff --- /dev/null +++ b/gitbook/core-concepts/auto-mode.md @@ -0,0 +1,183 @@ +# Auto Mode + +Auto mode is GSD's autonomous execution engine. Run `/gsd auto`, walk away, come back to built software with clean git history. + +## Starting Auto Mode + +``` +/gsd auto +``` + +GSD reads `.gsd/STATE.md`, determines the next unit of work, creates a fresh AI session with all relevant context, and lets the AI execute. When it finishes, GSD reads disk state again and dispatches the next unit. This continues until the milestone is complete. + +## The Execution Loop + +Each slice flows through phases automatically: + +``` +Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice + ↓ (all done) + Validate Milestone +``` + +- **Plan** — scouts the codebase, researches docs, decomposes the slice into tasks +- **Execute** — runs each task in a fresh context window +- **Complete** — writes summary, UAT script, marks roadmap, commits +- **Reassess** — checks if the roadmap still makes sense after what was learned +- **Validate** — after all slices, verifies success criteria were actually met + +## Controlling Auto Mode + +### Pause + +Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume. + +### Resume + +``` +/gsd auto +``` + +Auto mode reads disk state and picks up where it left off. + +### Stop + +``` +/gsd stop +``` + +Stops auto mode gracefully. Can be run from a different terminal. + +### Steer + +``` +/gsd steer +``` + +Modify plan documents during execution without stopping. Changes are picked up at the next phase boundary. + +### Capture Thoughts + +``` +/gsd capture "add rate limiting to API endpoints" +``` + +Fire-and-forget thought capture. Captures are triaged automatically between tasks without pausing execution. See [Captures & Triage](../features/captures.md). + +## Fresh Session Per Unit + +Every task gets a clean AI context window. No accumulated garbage, no quality degradation from context bloat. The dispatch prompt includes everything needed — task plans, prior summaries, decisions, dependency context — so the AI starts oriented. + +## Git Isolation + +GSD isolates milestone work using one of three modes: + +| Mode | How It Works | Best For | +|------|-------------|----------| +| `worktree` (default) | Each milestone gets its own directory and branch | Most projects | +| `branch` | Work happens in the project root on a milestone branch | Submodule-heavy repos | +| `none` | Work happens directly on your current branch | Hot-reload workflows | + +In worktree mode, all commits are squash-merged to main as one clean commit when the milestone completes. See [Git & Worktrees](../configuration/git-settings.md). + +## Crash Recovery + +If a session dies, the next `/gsd auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. + +In headless mode (`gsd headless auto`), crashes trigger automatic restart with exponential backoff (5s → 10s → 30s, up to 3 attempts). Combined with crash recovery, this enables true overnight "fire and forget" execution. + +## Provider Error Recovery + +GSD handles provider errors automatically: + +| Error Type | Examples | What Happens | +|-----------|----------|-------------| +| Rate limit | 429, "too many requests" | Auto-resumes after cooldown (60s or retry-after header) | +| Server error | 500, 502, 503, "overloaded" | Auto-resumes after 30s | +| Permanent | "unauthorized", "invalid key" | Pauses — requires manual resume | + +No manual intervention needed for transient errors. + +## Timeout Supervision + +Three timeout tiers prevent runaway sessions: + +| Timeout | Default | What Happens | +|---------|---------|-------------| +| Soft | 20 min | Warns the AI to wrap up | +| Idle | 10 min | Detects stalls, intervenes | +| Hard | 30 min | Pauses auto mode | + +Configure in preferences: + +```yaml +auto_supervisor: + soft_timeout_minutes: 20 + idle_timeout_minutes: 10 + hard_timeout_minutes: 30 +``` + +## Verification Gates + +Configure shell commands that run automatically after every task: + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true # auto-retry on failure +verification_max_retries: 2 # max retry attempts +``` + +If verification fails, the AI sees the output and attempts to fix the issues before advancing. This ensures quality gates are enforced mechanically. + +## Slice Discussion Gate + +For projects requiring human review before each slice: + +```yaml +require_slice_discussion: true +``` + +Auto mode pauses before each slice, showing the plan for your approval before building. + +## Stuck Detection + +GSD uses sliding-window analysis to detect stuck loops — not just "same unit dispatched twice" but also cycles like A→B→A→B. On detection, GSD retries once with a diagnostic prompt. If it fails again, auto mode stops with details so you can intervene. + +## Cost Tracking + +Every unit's token usage and cost is captured, broken down by phase, slice, and model. The dashboard shows running totals and projections. Budget ceilings can pause auto mode before overspending. See [Cost Management](../features/cost-management.md). + +## Dashboard + +`Ctrl+Alt+G` or `/gsd status` shows real-time progress: + +- Current milestone, slice, and task +- Auto mode elapsed time and phase +- Per-unit cost and token breakdown +- Cost projections +- Completed and in-progress units +- Pending capture count +- Parallel worker status (when running parallel milestones) + +## HTML Reports + +After a milestone completes, GSD generates a self-contained HTML report in `.gsd/reports/` with project summary, progress tree, dependency graph, cost metrics, timeline, and changelog. Generate manually with: + +``` +/gsd export --html +/gsd export --html --all # all milestones +``` + +## Diagnostic Tools + +If auto mode has issues, GSD provides two diagnostic tools: + +- **`/gsd doctor`** — validates `.gsd/` integrity, checks referential consistency, fixes structural issues +- **`/gsd forensics`** — full post-mortem debugger with anomaly detection, unit traces, metrics analysis, and AI-guided investigation + +``` +/gsd doctor +/gsd forensics [optional problem description] +``` diff --git a/gitbook/core-concepts/project-structure.md b/gitbook/core-concepts/project-structure.md new file mode 100644 index 000000000..6aa6e9078 --- /dev/null +++ b/gitbook/core-concepts/project-structure.md @@ -0,0 +1,104 @@ +# How GSD Organizes Work + +GSD uses a three-level hierarchy to break projects into manageable pieces that an AI can execute reliably. + +## The Hierarchy + +``` +Milestone → a shippable version (4-10 slices) + Slice → one demoable vertical feature (1-7 tasks) + Task → one context-window-sized unit of work +``` + +### Milestones + +A milestone is a shippable version of your project — an MVP, a major release, or a feature set that delivers standalone value. Milestones typically contain 4-10 slices. + +Examples: +- "MVP with user auth, dashboard, and settings" +- "v2.0 with real-time collaboration and API v2" +- "Security hardening milestone" + +### Slices + +A slice is one demoable, vertical capability within a milestone. It cuts across layers (database, backend, frontend) to deliver something you could show to a user. Slices contain 1-7 tasks. + +Examples: +- "User authentication with JWT" +- "Dashboard layout with charts" +- "API rate limiting" + +### Tasks + +A task is the smallest unit of work — something that fits in one AI context window. If a task can't be completed in a single AI session, it's broken into smaller tasks. + +Examples: +- "Create the User model and migration" +- "Implement JWT middleware" +- "Build the login form component" + +## The `.gsd/` Directory + +All project state lives on disk in a `.gsd/` directory at your project root: + +``` +.gsd/ + PROJECT.md — living description of what the project is + REQUIREMENTS.md — requirement contract (active/validated/deferred) + DECISIONS.md — append-only architectural decisions log + KNOWLEDGE.md — cross-session rules, patterns, and lessons + RUNTIME.md — runtime context: API endpoints, env vars, services + STATE.md — quick-glance status of current work + PREFERENCES.md — project-level preferences (optional) + milestones/ + M001/ + M001-ROADMAP.md — slice plan with risk levels and dependencies + M001-CONTEXT.md — scope and goals from discussion phase + slices/ + S01/ + S01-PLAN.md — task decomposition for this slice + S01-SUMMARY.md — what was built and what changed + S01-UAT.md — human test script + tasks/ + T01-PLAN.md — detailed plan for this task + T01-SUMMARY.md — what the task accomplished +``` + +### Key Files + +| File | Purpose | +|------|---------| +| `PROJECT.md` | High-level project description, updated as the project evolves | +| `REQUIREMENTS.md` | Formal requirement contract — tracks what's active, validated, and deferred | +| `DECISIONS.md` | Append-only log of architectural decisions with rationale | +| `KNOWLEDGE.md` | Rules, patterns, and lessons learned across sessions — GSD reads this at the start of every task | +| `RUNTIME.md` | Runtime context like API URLs, ports, and environment variables | +| `STATE.md` | Current status at a glance — auto-generated, don't edit manually | + +## How Work Flows + +Each slice flows through phases: + +``` +Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice +``` + +1. **Plan** — GSD scouts the codebase, researches relevant docs, and decomposes the slice into tasks with clear requirements +2. **Execute** — Each task runs in a fresh AI session with focused context +3. **Complete** — GSD writes summaries, generates a UAT script, and commits +4. **Reassess** — The roadmap is checked against reality — slices may be reordered, added, or removed +5. **Next Slice** — The loop continues until all slices are done + +After all slices complete, a **milestone validation** gate checks that success criteria were actually met before sealing the milestone. + +## Adding Knowledge + +GSD maintains a knowledge base that persists across sessions. Add rules, patterns, or lessons: + +``` +/gsd knowledge rule "Always use parameterized queries for database access" +/gsd knowledge pattern "Service classes go in src/services/" +/gsd knowledge lesson "The OAuth flow requires the redirect URL to match exactly" +``` + +This knowledge is injected into every task prompt automatically. diff --git a/gitbook/core-concepts/step-mode.md b/gitbook/core-concepts/step-mode.md new file mode 100644 index 000000000..750c56728 --- /dev/null +++ b/gitbook/core-concepts/step-mode.md @@ -0,0 +1,54 @@ +# Step Mode + +Step mode is GSD's interactive, one-step-at-a-time workflow. You stay in the loop, reviewing output between each step. + +## Starting Step Mode + +``` +/gsd +``` + +GSD reads the state of your `.gsd/` directory and presents a wizard showing what's completed and what's next. It then executes one unit of work and pauses. + +## How It Works + +Step mode adapts to your project's current state: + +| State | What Happens | +|-------|-------------| +| No `.gsd/` directory | Starts a discussion flow to capture your project vision | +| Milestone exists, no roadmap | Opens a discussion or research phase for the milestone | +| Roadmap exists, slices pending | Plans the next slice or executes the next task | +| Mid-task | Resumes where you left off | + +After each unit completes, you see results and decide what to do next. This is ideal for: + +- New projects where you want to shape the architecture +- Critical work where you want to review each step +- Learning how GSD works before trusting auto mode + +## Steering During Step Mode + +Between steps, you can: + +- **Discuss** — `/gsd discuss` to talk through architecture decisions +- **Skip** — `/gsd skip` to prevent a unit from being dispatched +- **Undo** — `/gsd undo` to revert the last completed unit +- **Switch to auto** — `/gsd auto` to let GSD continue autonomously + +## When to Use Step Mode + +- **First milestone** — Review GSD's work before trusting it to run solo +- **Architectural decisions** — When you want to guide the approach +- **Unfamiliar codebases** — When you want to ensure GSD understands the project +- **High-stakes changes** — When mistakes would be costly + +## Transitioning to Auto Mode + +Once you're comfortable with GSD's approach, switch to auto mode: + +``` +/gsd auto +``` + +You can always press **Escape** to pause auto mode and return to step-by-step control. diff --git a/gitbook/features/captures.md b/gitbook/features/captures.md new file mode 100644 index 000000000..54a2a27e1 --- /dev/null +++ b/gitbook/features/captures.md @@ -0,0 +1,54 @@ +# Captures & Triage + +Captures let you fire-and-forget thoughts during auto-mode execution. Instead of pausing auto mode to steer, capture ideas, bugs, or scope changes and let GSD triage them at natural seams between tasks. + +## Quick Start + +While auto mode is running (or any time): + +``` +/gsd capture "add rate limiting to the API endpoints" +/gsd capture "the auth flow should support OAuth, not just JWT" +``` + +Captures are appended to `.gsd/CAPTURES.md` and triaged automatically between tasks. + +## How It Works + +``` +Capture → Triage → Confirm → Resolve → Resume +``` + +1. **Capture** — your thought is saved with a timestamp +2. **Triage** — between tasks, GSD classifies each capture +3. **Confirm** — you see the proposed resolution and approve or adjust +4. **Resolve** — the resolution is applied +5. **Resume** — auto mode continues + +## Classification Types + +Each capture is classified into one of five types: + +| Type | Meaning | What Happens | +|------|---------|-------------| +| `quick-task` | Small, self-contained fix | Executed immediately | +| `inject` | New task needed in current slice | Task added to active slice | +| `defer` | Important but not urgent | Deferred to roadmap reassessment | +| `replan` | Changes the current approach | Triggers slice replan | +| `note` | Informational, no action needed | Acknowledged, no changes | + +Plan-modifying resolutions (inject, replan) require your confirmation. + +## Manual Triage + +Trigger triage manually at any time: + +``` +/gsd triage +``` + +Useful when you've accumulated several captures and want to process them before the next natural seam. + +## Dashboard Integration + +The progress widget shows a pending capture count badge when captures are waiting for triage. diff --git a/gitbook/features/cost-management.md b/gitbook/features/cost-management.md new file mode 100644 index 000000000..62204c586 --- /dev/null +++ b/gitbook/features/cost-management.md @@ -0,0 +1,74 @@ +# Cost Management + +GSD tracks token usage and cost for every unit of work during auto mode. This data powers the dashboard, budget enforcement, and cost projections. + +## Viewing Costs + +**Dashboard:** Press `Ctrl+Alt+G` or type `/gsd status` for real-time cost breakdown. + +**Visualizer:** `/gsd visualize` → Metrics tab for detailed charts. + +**Aggregations:** +- By phase (research, planning, execution, completion, reassessment) +- By slice +- By model +- Project totals + +## Budget Ceiling + +Set a maximum spend: + +```yaml +budget_ceiling: 50.00 +``` + +### Enforcement Modes + +```yaml +budget_enforcement: pause # default when ceiling is set +``` + +| Mode | What Happens | +|------|-------------| +| `warn` | Log a warning, keep going | +| `pause` | Pause auto mode, wait for you | +| `halt` | Stop auto mode entirely | + +## Cost Projections + +Once at least two slices have completed, GSD projects the remaining cost: + +``` +Projected remaining: $12.40 ($6.20/slice avg × 2 remaining) +``` + +## Budget Pressure + +When approaching the budget ceiling, GSD automatically uses cheaper models: + +| Budget Used | Effect | +|------------|--------| +| < 50% | No adjustment | +| 50-75% | Standard tasks downgrade to lighter models | +| 75-90% | More aggressive downgrading | +| > 90% | Nearly everything downgrades; only complex tasks stay at standard | + +This spreads your budget across remaining work instead of exhausting it early. + +## Token Profiles & Cost + +| Profile | Typical Savings | How | +|---------|----------------|-----| +| `budget` | 40-60% | Cheaper models, phase skipping, minimal context | +| `balanced` | 10-20% | Default models, standard context | +| `quality` | 0% (baseline) | All phases, full context | + +## Tips + +- Start with `balanced` profile and a generous `budget_ceiling` to establish baseline costs +- Check `/gsd status` after a few slices to see per-slice cost averages +- Switch to `budget` for well-understood, repetitive work +- Use `quality` only when architectural decisions are being made +- Use per-phase model selection to save: Opus for planning, Sonnet for execution +- Enable `dynamic_routing` for automatic model downgrading on simple tasks +- Use `/gsd visualize` → Metrics tab to see where your budget is going diff --git a/gitbook/features/dynamic-model-routing.md b/gitbook/features/dynamic-model-routing.md new file mode 100644 index 000000000..a9903f715 --- /dev/null +++ b/gitbook/features/dynamic-model-routing.md @@ -0,0 +1,88 @@ +# Dynamic Model Routing + +Dynamic model routing automatically selects cheaper models for simple work and reserves expensive models for complex tasks. This reduces cost by 20-50% without sacrificing quality where it matters. + +## Enabling + +```yaml +dynamic_routing: + enabled: true +``` + +## How It Works + +Each unit passes through two stages: + +1. **Complexity classification** — classifies work as light, standard, or heavy +2. **Capability scoring** — within the tier, ranks models by how well they match the task + +**Key rule:** Your configured model is always the ceiling — routing never upgrades beyond what you've set. + +| Tier | Typical Work | Model Level | +|------|-------------|-------------| +| Light | Slice completion, UAT, hooks | Haiku-class | +| Standard | Research, planning, execution | Sonnet-class | +| Heavy | Replanning, roadmap reassessment | Opus-class | + +## Configuration + +```yaml +dynamic_routing: + enabled: true + tier_models: # optional: explicit model per tier + light: claude-haiku-4-5 + standard: claude-sonnet-4-6 + heavy: claude-opus-4-6 + escalate_on_failure: true # bump tier on failure (default) + budget_pressure: true # auto-downgrade near budget ceiling (default) + cross_provider: true # consider models from other providers (default) + capability_routing: true # score models by task fit (default) +``` + +### Escalate on Failure + +When a task fails at a given tier, the router escalates to the next tier on retry: Light → Standard → Heavy. This prevents cheap models from burning retries on work that needs more reasoning. + +### Budget Pressure + +When approaching the budget ceiling, the router progressively downgrades: + +| Budget Used | Effect | +|------------|--------| +| < 50% | No adjustment | +| 50-75% | Standard → Light | +| 75-90% | More aggressive | +| > 90% | Nearly everything → Light | + +### Cross-Provider + +When enabled, the router may select models from providers other than your primary, using the built-in cost table to find the cheapest model at each tier. + +### Capability Routing + +Models are scored across 7 dimensions: coding, debugging, research, reasoning, speed, long context handling, and instruction following. Different task types weight these dimensions differently — a research task prioritizes research and reasoning, while an execution task prioritizes coding and instruction following. + +Set `capability_routing: false` to revert to simple cheapest-in-tier selection. + +## Interaction with Token Profiles + +Dynamic routing and token profiles work together: + +- **Token profiles** control phase skipping and context compression +- **Dynamic routing** controls per-unit model selection + +The `budget` profile + dynamic routing provides maximum cost savings. + +## Adaptive Learning + +GSD tracks routing outcomes in `.gsd/routing-history.json`. If a tier's failure rate exceeds 20% for a given task type, future classifications are bumped up. + +Use `/gsd rate` to submit feedback: + +``` +/gsd rate over # too powerful — use cheaper next time +/gsd rate ok # just right +/gsd rate under # too weak — use stronger next time +``` + +Feedback is weighted 2x compared to automatic outcomes. diff --git a/gitbook/features/github-sync.md b/gitbook/features/github-sync.md new file mode 100644 index 000000000..aa89c8602 --- /dev/null +++ b/gitbook/features/github-sync.md @@ -0,0 +1,44 @@ +# GitHub Sync + +GSD can auto-sync milestones, slices, and tasks to GitHub Issues, PRs, and Milestones. + +## Setup + +1. Install and authenticate the `gh` CLI: + ```bash + gh auth login + ``` + +2. Enable in preferences: + ```yaml + github: + enabled: true + repo: "owner/repo" # auto-detected from git remote if omitted + labels: [gsd, auto-generated] # labels for created items + ``` + +## Commands + +| Command | Description | +|---------|-------------| +| `/github-sync bootstrap` | Initial setup — creates GitHub Milestones, Issues, and draft PRs from current `.gsd/` state | +| `/github-sync status` | Show sync mapping counts (milestones, slices, tasks) | + +## How It Works + +- Milestones → GitHub Milestones +- Slices → GitHub Issues (linked to milestone) +- Tasks → GitHub Issue checklists +- Completed slices → Draft PRs + +Sync mapping is persisted in `.gsd/.github-sync.json`. The sync is rate-limit aware — it skips when the GitHub API rate limit is low. + +## Configuration + +```yaml +github: + enabled: true + repo: "owner/repo" + labels: [gsd, auto-generated] + project: "Project ID" # optional: GitHub Project board +``` diff --git a/gitbook/features/headless.md b/gitbook/features/headless.md new file mode 100644 index 000000000..5cc1e9351 --- /dev/null +++ b/gitbook/features/headless.md @@ -0,0 +1,86 @@ +# Headless & CI Mode + +`gsd headless` runs GSD commands without a terminal UI — designed for CI pipelines, cron jobs, and scripted automation. + +## Basic Usage + +```bash +# Run auto mode +gsd headless + +# Run a single unit +gsd headless next + +# With timeout for CI +gsd headless --timeout 600000 auto + +# Force a specific phase +gsd headless dispatch plan + +# Stream all events as JSONL +gsd headless --json auto +``` + +## Creating Milestones Headlessly + +```bash +# From a context file +gsd headless new-milestone --context brief.md --auto + +# From inline text +gsd headless new-milestone --context-text "Build a REST API with auth" + +# Pipe from stdin +echo "Build a CLI tool" | gsd headless new-milestone --context - +``` + +## CLI Flags + +| Flag | Default | Description | +|------|---------|-------------| +| `--timeout N` | 300000 (5 min) | Overall timeout in milliseconds | +| `--max-restarts N` | 3 | Auto-restart on crash (0 to disable) | +| `--json` | — | Stream events as JSONL to stdout | +| `--model ID` | — | Override model for this session | +| `--context ` | — | Context file for `new-milestone` (use `-` for stdin) | +| `--context-text ` | — | Inline context for `new-milestone` | +| `--auto` | — | Chain into auto mode after milestone creation | + +## Exit Codes + +| Code | Meaning | +|------|---------| +| `0` | Complete | +| `1` | Error or timeout | +| `2` | Blocked | + +## Instant State Query + +`gsd headless query` returns a JSON snapshot of project state — no AI session, instant response (~50ms): + +```bash +gsd headless query | jq '.state.phase' +# "executing" + +gsd headless query | jq '.next' +# {"action":"dispatch","unitType":"execute-task","unitId":"M001/S01/T03"} + +gsd headless query | jq '.cost.total' +# 4.25 +``` + +Any `/gsd` subcommand works as a positional argument: `gsd headless status`, `gsd headless doctor`, etc. + +## MCP Server Mode + +`gsd --mode mcp` runs GSD as a Model Context Protocol server over stdin/stdout, exposing all GSD tools to external AI clients: + +```bash +gsd --mode mcp +``` + +Compatible with Claude Desktop, VS Code Copilot, and any MCP host. + +## Auto-Restart + +In headless mode, crashes trigger automatic restart with exponential backoff (5s → 10s → 30s cap, default 3 attempts). SIGINT/SIGTERM bypasses restart. Combined with crash recovery, this enables true overnight unattended execution. diff --git a/gitbook/features/parallel.md b/gitbook/features/parallel.md new file mode 100644 index 000000000..a94615308 --- /dev/null +++ b/gitbook/features/parallel.md @@ -0,0 +1,97 @@ +# Parallel Orchestration + +Run multiple milestones simultaneously in isolated git worktrees. Each milestone gets its own worker process, branch, and context window. + +{% hint style="info" %} +Parallel mode is off by default. Enable it in preferences to use `/gsd parallel` commands. +{% endhint %} + +## Quick Start + +1. Enable parallel mode: + ```yaml + parallel: + enabled: true + max_workers: 2 + ``` + +2. Start parallel execution: + ``` + /gsd parallel start + ``` + GSD scans milestones, checks dependencies and file overlap, shows an eligibility report, and spawns workers. + +3. Monitor: + ``` + /gsd parallel status + ``` + +4. Stop: + ``` + /gsd parallel stop + ``` + +## How It Works + +Each worker is a separate GSD process with complete isolation: + +| Resource | Isolation | +|----------|----------| +| Filesystem | Own git worktree | +| Git branch | `milestone/` | +| Context window | Separate process | +| Metrics | Own `metrics.json` | +| Crash recovery | Own `auto.lock` | + +Workers communicate with the coordinator through file-based IPC — heartbeat files and signal files in `.gsd/parallel/`. + +## Eligibility + +Before starting, GSD checks which milestones can run concurrently: + +1. **Not complete** — finished milestones are skipped +2. **Dependencies satisfied** — all `dependsOn` entries must be complete +3. **File overlap** — milestones touching the same files get a warning (but are still eligible since they run in separate worktrees) + +## Configuration + +```yaml +parallel: + enabled: false # master toggle (default: false) + max_workers: 2 # concurrent workers (1-4) + budget_ceiling: 50.00 # aggregate cost limit + merge_strategy: "per-milestone" # when to merge back + auto_merge: "confirm" # "auto", "confirm", or "manual" +``` + +## Commands + +| Command | Description | +|---------|-------------| +| `/gsd parallel start` | Analyze and start workers | +| `/gsd parallel status` | Show all workers with progress and cost | +| `/gsd parallel stop [MID]` | Stop all or a specific worker | +| `/gsd parallel pause [MID]` | Pause all or a specific worker | +| `/gsd parallel resume [MID]` | Resume paused workers | +| `/gsd parallel merge [MID]` | Merge completed milestones to main | + +## Merge Reconciliation + +When milestones complete, their changes merge back to main: + +- `.gsd/` state files are auto-resolved +- Code conflicts halt the merge — resolve manually and retry with `/gsd parallel merge ` + +## Budget Management + +When `budget_ceiling` is set, aggregate cost across all workers is tracked. When the ceiling is reached, workers are signaled to stop. + +## Troubleshooting + +| Problem | Fix | +|---------|-----| +| "Parallel mode is not enabled" | Set `parallel.enabled: true` | +| "No eligible milestones" | All milestones are complete or blocked; check `/gsd queue` | +| Worker crashed | Run `/gsd doctor --fix`, then `/gsd parallel start` | +| Merge conflicts | Resolve in `.gsd/worktrees//`, then `/gsd parallel merge ` | +| Workers seem stuck | Check if budget ceiling was reached via `/gsd parallel status` | diff --git a/gitbook/features/remote-questions.md b/gitbook/features/remote-questions.md new file mode 100644 index 000000000..2c16ef8db --- /dev/null +++ b/gitbook/features/remote-questions.md @@ -0,0 +1,90 @@ +# Remote Questions + +Remote questions let GSD ask for your input via Slack, Discord, or Telegram when running in headless auto mode. When GSD needs a decision, it posts the question to your configured channel and polls for a response. + +## Setup + +### Discord + +``` +/gsd remote discord +``` + +The wizard prompts for your bot token, validates it, lets you pick a server and channel, sends a test message, and saves the config. + +**Bot requirements:** +- A bot application with a token from the [Discord Developer Portal](https://discord.com/developers/applications) +- Bot invited to the server with: Send Messages, Read Message History, Add Reactions, View Channel +- `DISCORD_BOT_TOKEN` environment variable set + +### Slack + +``` +/gsd remote slack +``` + +**Bot requirements:** +- A Slack app with a bot token (`xoxb-...`) from [Slack API](https://api.slack.com/apps) +- Bot invited to the target channel +- Scopes: `chat:write`, `reactions:read`, `reactions:write`, `channels:read`, `groups:read`, `channels:history`, `groups:history` + +### Telegram + +``` +/gsd remote telegram +``` + +**Bot requirements:** +- A bot token from [@BotFather](https://t.me/BotFather) +- Bot added to the target group chat +- `TELEGRAM_BOT_TOKEN` environment variable set + +## Configuration + +```yaml +remote_questions: + channel: discord # or slack or telegram + channel_id: "1234567890123456789" + timeout_minutes: 5 # 1-30, default 5 + poll_interval_seconds: 5 # 2-30, default 5 +``` + +## How It Works + +1. GSD encounters a decision point during auto mode +2. The question is posted to your channel as a rich message +3. GSD polls for a response at the configured interval +4. You respond by: + - **Reacting** with a number emoji (1️⃣, 2️⃣, etc.) for single-question prompts + - **Replying** with a number, comma-separated numbers, or free text +5. GSD picks up the response and continues +6. A ✅ reaction confirms receipt + +### Response Formats + +**Single question:** React with a number emoji, reply with a number, or reply with free text. + +**Multiple questions:** Reply with semicolons (`1;2;custom text`) or newlines (one answer per line). + +### Timeouts + +If no response arrives within `timeout_minutes`, GSD continues with a timeout result — typically making a conservative default choice. + +## Commands + +| Command | Description | +|---------|-------------| +| `/gsd remote` | Show menu and current status | +| `/gsd remote slack` | Set up Slack | +| `/gsd remote discord` | Set up Discord | +| `/gsd remote telegram` | Set up Telegram | +| `/gsd remote status` | Show current config | +| `/gsd remote disconnect` | Remove configuration | + +## Troubleshooting + +| Problem | Fix | +|---------|-----| +| "Remote auth failed" | Verify bot token is correct and not expired | +| "Could not send to channel" | Check bot has Send Messages permission; invite bot to channel | +| No response detected | Make sure you're replying to the prompt message, not posting a new one | diff --git a/gitbook/features/skills.md b/gitbook/features/skills.md new file mode 100644 index 000000000..4a9fd46b7 --- /dev/null +++ b/gitbook/features/skills.md @@ -0,0 +1,120 @@ +# Skills + +Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance — coding patterns, framework idioms, testing strategies, and tool usage. + +Skills follow the open [Agent Skills standard](https://agentskills.io/) and work across multiple AI agents, not just GSD. + +## Skill Directories + +| Location | Scope | Description | +|----------|-------|------------| +| `~/.agents/skills/` | Global | Shared across all projects | +| `.agents/skills/` (project root) | Project | Project-specific, committable to git | + +Global skills take precedence when names collide. + +## Installing Skills + +Skills are installed via the [skills.sh CLI](https://skills.sh): + +```bash +# Interactive — choose skills and target agents +npx skills add dpearson2699/swift-ios-skills + +# Install specific skills +npx skills add dpearson2699/swift-ios-skills --skill swift-concurrency --skill swiftui-patterns -y + +# Install all from a repo +npx skills add dpearson2699/swift-ios-skills --all + +# Check for updates +npx skills check + +# Update installed skills +npx skills update +``` + +## Onboarding Catalog + +During `gsd init`, GSD detects your project's tech stack and recommends relevant skill packs: + +- **Swift** — SwiftUI, Swift Core, concurrency, Charts, Testing +- **iOS** — App Intents, Widgets, StoreKit, MapKit, Core ML, Vision, accessibility +- **Web** — React, React Native, frontend design, accessibility +- **Languages** — Rust, Python, Go patterns and best practices +- **General** — Document handling (PDF, DOCX, XLSX) + +## Skill Discovery + +The `skill_discovery` preference controls how GSD finds skills during auto mode: + +| Mode | Behavior | +|------|----------| +| `auto` | Skills found and applied automatically | +| `suggest` | Skills identified but require confirmation (default) | +| `off` | No skill discovery | + +## Skill Preferences + +Control which skills are used: + +```yaml +always_use_skills: + - debug-like-expert +prefer_skills: + - frontend-design +avoid_skills: + - security-docker +skill_rules: + - when: task involves authentication + use: [clerk] + - when: frontend styling work + prefer: [frontend-design] +``` + +## Creating Custom Skills + +Create your own skill by adding a directory with a `SKILL.md` file: + +``` +~/.agents/skills/my-skill/ + SKILL.md — instructions for the AI + references/ — optional reference files +``` + +The `SKILL.md` contains instructions the AI follows when the skill is active. + +### Project-Local Skills + +Place skills in your project root for project-specific guidance: + +``` +.agents/skills/my-project-skill/ + SKILL.md +``` + +Project-local skills can be committed to git so team members share the same skill set. + +## Skill Health Dashboard + +Track skill performance: + +``` +/gsd skill-health # overview table +/gsd skill-health rust-core # detailed view for one skill +/gsd skill-health --stale 30 # skills unused for 30+ days +/gsd skill-health --declining # skills with falling success rates +``` + +The dashboard flags: +- Success rate below 70% over the last 10 uses +- Token usage rising 20%+ compared to previous window +- Skills unused beyond the configured threshold + +### Staleness Detection + +```yaml +skill_staleness_days: 60 # flag skills unused for 60+ days (0 to disable) +``` + +Stale skills are excluded from automatic matching but remain available for explicit use. diff --git a/gitbook/features/teams.md b/gitbook/features/teams.md new file mode 100644 index 000000000..44dac0c57 --- /dev/null +++ b/gitbook/features/teams.md @@ -0,0 +1,91 @@ +# Working in Teams + +GSD supports multi-user workflows where several developers work on the same repository concurrently. + +## Quick Setup + +The simplest way: set team mode in your project preferences. + +```yaml +# .gsd/PREFERENCES.md (committed to git) +--- +version: 1 +mode: team +--- +``` + +This enables unique milestone IDs, push branches, pre-merge checks, and other team-appropriate defaults in one setting. + +## What Team Mode Does + +| Setting | Effect | +|---------|--------| +| `unique_milestone_ids` | IDs like `M001-eh88as` instead of `M001` — no collisions | +| `git.push_branches` | Milestone branches are pushed to remote | +| `git.pre_merge_check` | Validation runs before merging | + +You can override individual settings on top of `mode: team`. + +## Configure `.gitignore` + +Share planning artifacts while keeping runtime files local: + +```bash +# Runtime files (per-developer, gitignore these) +.gsd/auto.lock +.gsd/completed-units.json +.gsd/STATE.md +.gsd/metrics.json +.gsd/activity/ +.gsd/runtime/ +.gsd/worktrees/ +.gsd/milestones/**/continue.md +.gsd/milestones/**/*-CONTINUE.md +``` + +**What gets shared** (committed to git): +- `.gsd/PREFERENCES.md` — project preferences +- `.gsd/PROJECT.md` — living project description +- `.gsd/REQUIREMENTS.md` — requirement contract +- `.gsd/DECISIONS.md` — architectural decisions +- `.gsd/milestones/` — roadmaps, plans, summaries, research + +**What stays local** (gitignored): +- Lock files, metrics, state, activity logs, worktrees + +## Commit the Config + +```bash +git add .gsd/PREFERENCES.md +git commit -m "chore: enable GSD team workflow" +``` + +## Keeping `.gsd/` Local + +For teams where only some members use GSD: + +```yaml +git: + commit_docs: false +``` + +This gitignores `.gsd/` entirely. You get structured planning without affecting teammates. + +## Parallel Development + +Multiple developers can run auto mode simultaneously on different milestones. Each developer: + +- Gets their own worktree (`.gsd/worktrees//`) +- Works on a unique `milestone/` branch +- Squash-merges to main independently + +Milestone dependencies can be declared: + +```yaml +# In M00X-CONTEXT.md frontmatter +--- +depends_on: [M001-eh88as] +--- +``` + +GSD enforces that dependent milestones complete before starting downstream work. diff --git a/gitbook/features/token-optimization.md b/gitbook/features/token-optimization.md new file mode 100644 index 000000000..c89493618 --- /dev/null +++ b/gitbook/features/token-optimization.md @@ -0,0 +1,108 @@ +# Token Optimization + +GSD's token optimization system can reduce token usage by 40-60% without sacrificing output quality. It has three pillars: **token profiles**, **context compression**, and **complexity-based task routing**. + +## Token Profiles + +A token profile coordinates model selection, phase skipping, and context compression with a single setting: + +```yaml +token_profile: balanced +``` + +### `budget` — Maximum Savings (40-60%) + +| Setting | Value | +|---------|-------| +| Planning model | Sonnet | +| Execution model | Sonnet | +| Simple task model | Haiku | +| Milestone research | Skipped | +| Slice research | Skipped | +| Roadmap reassessment | Skipped | +| Context level | Minimal | + +Best for: prototyping, small projects, well-understood codebases. + +### `balanced` — Smart Defaults (default) + +| Setting | Value | +|---------|-------| +| All models | User's default | +| Milestone research | Runs | +| Slice research | Skipped | +| Roadmap reassessment | Runs | +| Context level | Standard | + +Best for: most projects, day-to-day development. + +### `quality` — Full Context + +| Setting | Value | +|---------|-------| +| All models | User's configured defaults | +| All phases | Run | +| Context level | Full | + +Best for: complex architectures, greenfield projects, critical work. + +## Context Compression + +Each profile controls how much context is pre-loaded into AI prompts: + +| Profile | What's Included | +|---------|----------------| +| `budget` | Task plan and essential prior summaries only | +| `balanced` | Task plan, summaries, slice plan, roadmap excerpt | +| `quality` | Everything — all plans, summaries, decisions, requirements | + +## Complexity-Based Task Routing + +GSD classifies each task by complexity and routes it to an appropriate model: + +| Complexity | Indicators | Model Level | +|-----------|------------|-------------| +| Simple | ≤3 steps, ≤3 files, short description | Haiku-class | +| Standard | 4-7 steps, 4-7 files | Sonnet-class | +| Complex | ≥8 steps, ≥8 files, complexity keywords | Opus-class | + +**Complexity keywords** that prevent simple classification: `refactor`, `migrate`, `integrate`, `architect`, `security`, `performance`, `concurrent`, `distributed`, and others. + +{% hint style="info" %} +Dynamic routing requires `models` configured in your preferences and `dynamic_routing.enabled: true`. See [Dynamic Model Routing](dynamic-model-routing.md). +{% endhint %} + +## Overriding Profile Defaults + +The `token_profile` sets defaults, but explicit preferences always win: + +```yaml +token_profile: budget +phases: + skip_research: false # override: keep research +models: + planning: claude-opus-4-6 # override: use Opus for planning +``` + +## Adaptive Learning + +GSD tracks success and failure of tier assignments over time. If a model tier's failure rate exceeds 20% for a given task type, future tasks of that type are bumped to a higher tier. + +Submit manual feedback with: + +``` +/gsd rate over # model was overpowered — use cheaper next time +/gsd rate ok # model was appropriate +/gsd rate under # model was too weak — use stronger next time +``` + +## Observation Masking + +During auto mode, old tool results are replaced with lightweight placeholders before each AI call. This reduces token usage between compactions with zero overhead. + +```yaml +context_management: + observation_masking: true # default: true + observation_mask_turns: 8 # keep results from last 8 turns + tool_result_max_chars: 800 # truncate large tool outputs +``` diff --git a/gitbook/features/visualizer.md b/gitbook/features/visualizer.md new file mode 100644 index 000000000..4155ec144 --- /dev/null +++ b/gitbook/features/visualizer.md @@ -0,0 +1,82 @@ +# Workflow Visualizer + +The workflow visualizer is a full-screen terminal overlay showing project progress, dependencies, cost metrics, and execution timeline. + +## Opening + +``` +/gsd visualize +``` + +Or configure automatic display after milestone completion: + +```yaml +auto_visualize: true +``` + +## Tabs + +Switch tabs with `Tab`, `1`-`4`, or arrow keys. + +### 1. Progress + +A tree view of milestones, slices, and tasks with completion status: + +``` +M001: User Management 3/6 tasks + ✅ S01: Auth module 3/3 tasks + ✅ T01: Core types + ✅ T02: JWT middleware + ✅ T03: Login flow + ⏳ S02: User dashboard 1/2 tasks + ✅ T01: Layout component + ⬜ T02: Profile page +``` + +### 2. Dependencies + +An ASCII dependency graph showing slice relationships: + +``` +S01 ──→ S02 ──→ S04 + └───→ S03 ──↗ +``` + +### 3. Metrics + +Bar charts showing cost and token usage: + +- By phase (research, planning, execution, completion) +- By slice (with running totals) +- By model (which models consumed the most budget) + +### 4. Timeline + +Chronological execution history: unit type, timestamps, duration, model, and token counts. + +## Controls + +| Key | Action | +|-----|--------| +| `Tab` | Next tab | +| `Shift+Tab` | Previous tab | +| `1`-`4` | Jump to tab | +| `↑`/`↓` | Scroll | +| `Escape` / `q` | Close | + +The visualizer auto-refreshes every 2 seconds, staying current alongside running auto mode. + +## HTML Reports + +For shareable reports outside the terminal: + +``` +/gsd export --html # current milestone +/gsd export --html --all # all milestones +``` + +Generates self-contained HTML files in `.gsd/reports/` with progress tree, dependency graph, cost charts, timeline, and changelog. All CSS and JS are inlined — no external dependencies. Printable to PDF from any browser. + +```yaml +auto_report: true # auto-generate after milestone completion (default) +``` diff --git a/gitbook/features/web-interface.md b/gitbook/features/web-interface.md new file mode 100644 index 000000000..6870ffc41 --- /dev/null +++ b/gitbook/features/web-interface.md @@ -0,0 +1,37 @@ +# Web Interface + +GSD includes a browser-based interface for project management and real-time progress monitoring. + +## Quick Start + +```bash +gsd --web +``` + +This starts a local web server and opens the dashboard in your default browser. + +## CLI Flags + +```bash +gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com" +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--host` | `localhost` | Bind address | +| `--port` | `3000` | Port | +| `--allowed-origins` | (none) | Comma-separated CORS origins | + +## Features + +- **Project management** — view milestones, slices, and tasks in a visual dashboard +- **Real-time progress** — live updates as auto mode executes +- **Multi-project support** — manage multiple projects from one browser tab via `?project=` URL parameter +- **Change project root** — switch directories from the web UI without restarting +- **Onboarding flow** — API key setup and provider configuration in the browser +- **Model selection** — switch models and providers from the web UI + +## Platform Notes + +- **macOS/Linux** — Full support +- **Windows** — Web build is skipped due to Next.js compatibility issues; CLI remains fully functional diff --git a/gitbook/features/workflow-templates.md b/gitbook/features/workflow-templates.md new file mode 100644 index 000000000..45246a33b --- /dev/null +++ b/gitbook/features/workflow-templates.md @@ -0,0 +1,45 @@ +# Workflow Templates + +Workflow templates are pre-built patterns for common development tasks. Instead of setting up a full milestone for a quick bugfix or spike, use a template to get started immediately. + +## Using Templates + +``` +/gsd start # pick from available templates +/gsd start resume # resume an in-progress workflow +``` + +## Available Templates + +| Template | Purpose | +|----------|---------| +| `bugfix` | Fix a specific bug with diagnosis and verification | +| `spike` | Time-boxed investigation or prototype | +| `feature` | Standard feature development | +| `hotfix` | Urgent production fix | +| `refactor` | Code restructuring and cleanup | +| `security-audit` | Security review and remediation | +| `dep-upgrade` | Dependency update and migration | +| `full-project` | Complete project from scratch | + +## Listing and Inspecting + +``` +/gsd templates # list all available templates +/gsd templates info # show details for a template +``` + +## Custom Workflows + +Create your own workflow definitions: + +``` +/gsd workflow new # create a new workflow YAML +/gsd workflow run # start a workflow run +/gsd workflow list # list active runs +/gsd workflow validate # validate definition +/gsd workflow pause # pause running workflow +/gsd workflow resume # resume paused workflow +``` + +Custom workflows are defined in YAML and can specify phases, dependencies, and configuration for each step. diff --git a/gitbook/getting-started/choosing-a-model.md b/gitbook/getting-started/choosing-a-model.md new file mode 100644 index 000000000..64b2e3aad --- /dev/null +++ b/gitbook/getting-started/choosing-a-model.md @@ -0,0 +1,94 @@ +# Choosing a Model + +GSD auto-selects a default model after you log in to a provider. You can switch models at any time. + +## Switch Models + +Inside a GSD session, type: + +``` +/model +``` + +This opens an interactive picker showing all available models from your configured providers. + +## Per-Phase Models + +Different phases of work have different requirements. You can assign specific models to each phase in your preferences: + +```yaml +models: + research: claude-sonnet-4-6 # scouting and research + planning: claude-opus-4-6 # architectural decisions + execution: claude-sonnet-4-6 # writing code + execution_simple: claude-haiku-4-5 # simple tasks (docs, config) + completion: claude-sonnet-4-6 # summaries and wrap-up + subagent: claude-sonnet-4-6 # delegated sub-tasks +``` + +Omit a key to use whatever model is currently active for that phase. + +## Model Fallbacks + +If a model is unavailable (provider down, rate limited, credits exhausted), GSD can automatically fall back to another: + +```yaml +models: + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + - openrouter/moonshotai/kimi-k2.5 +``` + +Fallbacks are tried in order until one works. + +## Token Profiles + +Token profiles coordinate model selection, phase skipping, and context compression with a single setting: + +| Profile | Cost Savings | Best For | +|---------|-------------|----------| +| `budget` | 40-60% | Prototyping, small projects, well-understood codebases | +| `balanced` | 10-20% | Most projects, day-to-day development (default) | +| `quality` | 0% (baseline) | Complex architectures, greenfield projects, critical work | + +```yaml +token_profile: balanced +``` + +See [Token Optimization](../features/token-optimization.md) for details. + +## Dynamic Model Routing + +When enabled, GSD automatically picks cheaper models for simple tasks and reserves expensive ones for complex work: + +```yaml +dynamic_routing: + enabled: true +``` + +A documentation fix gets Haiku. An architectural refactor gets Opus. Your configured model is always the ceiling — routing never upgrades beyond what you've set. + +See [Dynamic Model Routing](../features/dynamic-model-routing.md) for the full guide. + +## Supported Providers + +GSD supports 20+ providers out of the box. See [Provider Setup](../configuration/providers.md) for setup instructions: + +| Provider | Auth Method | +|----------|-------------| +| Anthropic (Claude) | OAuth or API key | +| OpenAI | API key | +| Google Gemini | API key | +| OpenRouter | API key | +| Groq | API key | +| xAI (Grok) | API key | +| Mistral | API key | +| GitHub Copilot | OAuth | +| Amazon Bedrock | IAM credentials | +| Vertex AI | ADC | +| Azure OpenAI | API key | +| Ollama | Local (no auth) | +| LM Studio | Local (no auth) | +| vLLM / SGLang | Local (no auth) | diff --git a/gitbook/getting-started/first-project.md b/gitbook/getting-started/first-project.md new file mode 100644 index 000000000..dd0551035 --- /dev/null +++ b/gitbook/getting-started/first-project.md @@ -0,0 +1,128 @@ +# Your First Project + +## Launch GSD + +Open a terminal in any project directory (or an empty one) and run: + +```bash +gsd +``` + +GSD shows a welcome screen with your version, active model, and available tool keys. + +## Start a Discussion + +Type `/gsd` to enter step mode. GSD reads the state of your project directory and determines the next logical action: + +- **No `.gsd/` directory** — starts a discussion flow to capture your project vision +- **Milestone exists, no roadmap** — discuss or research the milestone +- **Roadmap exists, slices pending** — plan the next slice or execute a task +- **Mid-task** — resume where you left off + +For a new project, GSD will ask you to describe what you want to build. Talk through your vision — GSD captures requirements, architectural decisions, and scope. + +## The Project Hierarchy + +After discussion, GSD organizes your work into: + +``` +Milestone → a shippable version (4-10 slices) + Slice → one demoable feature (1-7 tasks) + Task → one context-window-sized unit of work +``` + +The key rule: **a task must fit in one AI context window.** If it can't, it becomes two tasks. + +## Run Auto Mode + +Once you have a milestone and roadmap, let GSD take the wheel: + +``` +/gsd auto +``` + +GSD autonomously: +1. **Plans** each slice — scouts the codebase, researches docs, decomposes into tasks +2. **Executes** each task — writes code in a fresh AI session +3. **Completes** the slice — writes summaries, commits with meaningful messages +4. **Reassesses** the roadmap — checks if the plan still makes sense +5. **Repeats** until the milestone is done + +## The Two-Terminal Workflow + +The recommended approach: auto mode in one terminal, steering from another. + +**Terminal 1 — let it build:** + +```bash +gsd +/gsd auto +``` + +**Terminal 2 — steer while it works:** + +```bash +gsd +/gsd discuss # talk through architecture decisions +/gsd status # check progress +/gsd queue # queue the next milestone +/gsd capture "add rate limiting to the API" # fire-and-forget thought +``` + +Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically. + +## Check Progress + +Press `Ctrl+Alt+G` or type `/gsd status` to see the dashboard: + +- Current milestone, slice, and task +- Elapsed time and phase +- Per-unit cost and token breakdown +- Completed and in-progress work + +## Resume a Session + +```bash +gsd --continue # or gsd -c +``` + +Resumes the most recent session for the current directory. + +To browse and pick from all saved sessions: + +```bash +gsd sessions +``` + +Shows each session's date, message count, and preview so you can choose which to resume. + +## What's on Disk + +All state lives in `.gsd/` inside your project: + +``` +.gsd/ + PROJECT.md — what the project is + REQUIREMENTS.md — requirement contract + DECISIONS.md — architectural decisions + KNOWLEDGE.md — cross-session rules and patterns + STATE.md — quick-glance status + milestones/ + M001/ + M001-ROADMAP.md — slice plan with dependencies + M001-CONTEXT.md — scope and goals + slices/ + S01/ + S01-PLAN.md — task decomposition + S01-SUMMARY.md — what happened + S01-UAT.md — test script + tasks/ + T01-PLAN.md + T01-SUMMARY.md +``` + +## Next Steps + +- [Auto Mode](../core-concepts/auto-mode.md) — deep dive into autonomous execution +- [Preferences](../configuration/preferences.md) — model selection, timeouts, budgets +- [Commands](../reference/commands.md) — all commands and shortcuts diff --git a/gitbook/getting-started/installation.md b/gitbook/getting-started/installation.md new file mode 100644 index 000000000..e1e72fe80 --- /dev/null +++ b/gitbook/getting-started/installation.md @@ -0,0 +1,84 @@ +# Installation + +## Install GSD + +```bash +npm install -g gsd-pi +``` + +Requires **Node.js 22.0.0 or later** (24 LTS recommended) and **Git**. + +{% hint style="info" %} +**`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [Troubleshooting](../reference/troubleshooting.md) for details. +{% endhint %} + +GSD checks for updates once every 24 hours. When a new version is available, you'll see a prompt at startup with the option to update immediately or skip. You can also update from within a session with `/gsd update`. + +## Set Up Your LLM Provider + +Launch GSD for the first time: + +```bash +gsd +``` + +The setup wizard walks you through: + +1. **LLM Provider** — choose from 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth flows handle Claude Max and Copilot subscriptions automatically; otherwise paste an API key. +2. **Tool API Keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any. + +Re-run the wizard anytime with: + +```bash +gsd config +``` + +For detailed provider setup, see [Provider Setup](../configuration/providers.md). + +## Set Up API Keys for Tools + +If you use a non-Anthropic model, you may need a search API key for web search. Run `/gsd config` inside any GSD session to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects. + +| Tool | Purpose | Get a Key | +|------|---------|-----------| +| Tavily Search | Web search for non-Anthropic models | [tavily.com](https://tavily.com/app/api-keys) | +| Brave Search | Web search for non-Anthropic models | [brave.com](https://brave.com/search/api) | +| Context7 Docs | Library documentation lookup | [context7.com](https://context7.com/dashboard) | + +Anthropic models have built-in web search and don't need these keys. + +## VS Code Extension + +GSD is also available as a VS Code extension. Install from the marketplace (publisher: FluxLabs) or search for "GSD" in VS Code extensions. + +The extension provides: + +- **`@gsd` chat participant** — talk to the agent in VS Code Chat +- **Sidebar dashboard** — connection status, model info, token usage, quick actions +- **Full command palette** — start/stop agent, switch models, export sessions + +The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. + +## Web Interface + +GSD also has a browser-based interface: + +```bash +gsd --web +``` + +This starts a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](../features/web-interface.md) for details. + +## Alternative Binary Name + +If the `gsd` command conflicts with another tool (e.g., the oh-my-zsh git plugin aliases `gsd` to `git svn dcommit`), use the alternative: + +```bash +gsd-cli +``` + +Both `gsd` and `gsd-cli` point to the same binary. To remove the conflict permanently, add this to your `~/.zshrc`: + +```bash +unalias gsd 2>/dev/null +``` diff --git a/gitbook/reference/cli-flags.md b/gitbook/reference/cli-flags.md new file mode 100644 index 000000000..a1de87f37 --- /dev/null +++ b/gitbook/reference/cli-flags.md @@ -0,0 +1,61 @@ +# CLI Flags + +## Starting GSD + +| Flag | Description | +|------|-------------| +| `gsd` | Start a new interactive session | +| `gsd --continue` (`-c`) | Resume the most recent session | +| `gsd --model ` | Override the default model for this session | +| `gsd --web [path]` | Start browser-based web interface | +| `gsd --worktree` (`-w`) [name] | Start in a git worktree | +| `gsd --no-session` | Disable session persistence | +| `gsd --extension ` | Load an additional extension (repeatable) | +| `gsd --append-system-prompt ` | Append text to the system prompt | +| `gsd --tools ` | Comma-separated tools to enable | +| `gsd --version` (`-v`) | Print version and exit | +| `gsd --help` (`-h`) | Print help and exit | +| `gsd --debug` | Enable diagnostic logging | + +## Non-Interactive Modes + +| Flag | Description | +|------|-------------| +| `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) | +| `gsd --mode ` | Output mode for non-interactive use | + +## Session Management + +| Command | Description | +|---------|-------------| +| `gsd sessions` | Interactive session picker — list and resume saved sessions | +| `gsd --list-models [search]` | List available models and exit | + +## Configuration + +| Command | Description | +|---------|-------------| +| `gsd config` | Set up global API keys | +| `gsd update` | Update to the latest version | + +## Headless Mode + +| Flag | Description | +|------|-------------| +| `gsd headless` | Run without TUI | +| `gsd headless --timeout N` | Timeout in ms (default: 300000) | +| `gsd headless --max-restarts N` | Auto-restart on crash (default: 3) | +| `gsd headless --json` | Stream events as JSONL | +| `gsd headless --model ID` | Override model | +| `gsd headless --context ` | Context file for `new-milestone` | +| `gsd headless --context-text ` | Inline context for `new-milestone` | +| `gsd headless --auto` | Chain into auto mode after milestone creation | +| `gsd headless query` | Instant JSON state snapshot (~50ms) | + +## Web Interface + +| Flag | Default | Description | +|------|---------|-------------| +| `--host` | `localhost` | Bind address | +| `--port` | `3000` | Port | +| `--allowed-origins` | (none) | CORS origins | diff --git a/gitbook/reference/commands.md b/gitbook/reference/commands.md new file mode 100644 index 000000000..e042723da --- /dev/null +++ b/gitbook/reference/commands.md @@ -0,0 +1,128 @@ +# Commands + +## Session Commands + +| Command | Description | +|---------|-------------| +| `/gsd` | Step mode — execute one unit at a time | +| `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat | +| `/gsd quick` | Quick task with GSD guarantees but no full planning | +| `/gsd stop` | Stop auto mode gracefully | +| `/gsd pause` | Pause auto mode (preserves state) | +| `/gsd steer` | Modify plan documents during execution | +| `/gsd discuss` | Discuss architecture and decisions | +| `/gsd status` | Progress dashboard | +| `/gsd widget` | Cycle dashboard widget: full / small / min / off | +| `/gsd queue` | Queue and reorder future milestones | +| `/gsd capture` | Fire-and-forget thought capture | +| `/gsd triage` | Manually trigger capture triage | +| `/gsd dispatch` | Dispatch a specific phase directly | +| `/gsd history` | View execution history (supports `--cost`, `--phase`, `--model` filters) | +| `/gsd forensics` | Full debugger for auto-mode failures | +| `/gsd cleanup` | Clean up state files and stale worktrees | +| `/gsd visualize` | Open workflow visualizer | +| `/gsd export --html` | Generate HTML report for current milestone | +| `/gsd export --html --all` | Generate reports for all milestones | +| `/gsd update` | Update GSD to the latest version | +| `/gsd knowledge` | Add persistent project knowledge | +| `/gsd fast` | Toggle service tier for supported models | +| `/gsd rate` | Rate last unit's model tier (over/ok/under) | +| `/gsd changelog` | Show release notes | +| `/gsd logs` | Browse activity and debug logs | +| `/gsd remote` | Control remote auto-mode | +| `/gsd help` | Show all available commands | + +## Configuration & Diagnostics + +| Command | Description | +|---------|-------------| +| `/gsd prefs` | Preferences wizard | +| `/gsd mode` | Switch workflow mode (solo/team) | +| `/gsd config` | Re-run provider setup wizard | +| `/gsd keys` | API key manager | +| `/gsd doctor` | Runtime health checks with auto-fix | +| `/gsd inspect` | Show database diagnostics | +| `/gsd init` | Project init wizard | +| `/gsd setup` | Global setup status | +| `/gsd skill-health` | Skill lifecycle dashboard | +| `/gsd hooks` | Show configured hooks | +| `/gsd migrate` | Migrate v1 `.planning` to `.gsd` format | + +## Milestone Management + +| Command | Description | +|---------|-------------| +| `/gsd new-milestone` | Create a new milestone | +| `/gsd skip` | Prevent a unit from auto-mode dispatch | +| `/gsd undo` | Revert last completed unit | +| `/gsd undo-task` | Reset a specific task's completion state | +| `/gsd reset-slice` | Reset a slice and all its tasks | +| `/gsd park` | Park a milestone (skip without deleting) | +| `/gsd unpark` | Reactivate a parked milestone | + +## Parallel Orchestration + +| Command | Description | +|---------|-------------| +| `/gsd parallel start` | Analyze and start parallel workers | +| `/gsd parallel status` | Show worker state and progress | +| `/gsd parallel stop [MID]` | Stop workers | +| `/gsd parallel pause [MID]` | Pause workers | +| `/gsd parallel resume [MID]` | Resume workers | +| `/gsd parallel merge [MID]` | Merge completed milestones | + +## Workflow Templates + +| Command | Description | +|---------|-------------| +| `/gsd start` | Start a workflow template | +| `/gsd start resume` | Resume an in-progress workflow | +| `/gsd templates` | List available templates | +| `/gsd templates info ` | Show template details | + +## Custom Workflows + +| Command | Description | +|---------|-------------| +| `/gsd workflow new` | Create a workflow definition | +| `/gsd workflow run ` | Start a workflow run | +| `/gsd workflow list` | List workflow runs | +| `/gsd workflow validate ` | Validate a workflow YAML | +| `/gsd workflow pause` | Pause workflow auto-mode | +| `/gsd workflow resume` | Resume paused workflow | + +## Extensions + +| Command | Description | +|---------|-------------| +| `/gsd extensions list` | List all extensions | +| `/gsd extensions enable ` | Enable an extension | +| `/gsd extensions disable ` | Disable an extension | +| `/gsd extensions info ` | Show extension details | + +## GitHub Sync + +| Command | Description | +|---------|-------------| +| `/github-sync bootstrap` | Initial GitHub sync setup | +| `/github-sync status` | Show sync mapping counts | + +## Session Management + +| Command | Description | +|---------|-------------| +| `/clear` | Start a new session | +| `/exit` | Graceful shutdown | +| `/model` | Switch the active model | +| `/login` | Log in to an LLM provider | +| `/thinking` | Toggle thinking level | +| `/voice` | Toggle speech-to-text | +| `/worktree` (`/wt`) | Git worktree management | + +## In-Session Update + +``` +/gsd update +``` + +Checks npm for a newer version and installs it without leaving the session. diff --git a/gitbook/reference/environment-variables.md b/gitbook/reference/environment-variables.md new file mode 100644 index 000000000..c23af72df --- /dev/null +++ b/gitbook/reference/environment-variables.md @@ -0,0 +1,56 @@ +# Environment Variables + +## GSD Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `GSD_HOME` | `~/.gsd` | Global GSD directory. All paths derive from this unless individually overridden. | +| `GSD_PROJECT_ID` | (auto-hash) | Override automatic project identity hash. Useful for CI/CD or sharing state across repo clones. | +| `GSD_STATE_DIR` | `$GSD_HOME` | Per-project state root. Controls where `projects//` directories are created. | +| `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | Agent directory for extensions, auth, and managed resources. | +| `GSD_FETCH_ALLOWED_URLS` | (none) | Comma-separated hostnames exempt from internal URL blocking. | +| `GSD_ALLOWED_COMMAND_PREFIXES` | (built-in) | Comma-separated command prefixes allowed for value resolution. | +| `GSD_WEB_PROJECT_CWD` | — | Default project path for `gsd --web` when `?project=` is not specified. | + +## LLM Provider Keys + +| Variable | Provider | +|----------|----------| +| `ANTHROPIC_API_KEY` | Anthropic (Claude) | +| `OPENAI_API_KEY` | OpenAI | +| `GEMINI_API_KEY` | Google Gemini | +| `OPENROUTER_API_KEY` | OpenRouter | +| `GROQ_API_KEY` | Groq | +| `XAI_API_KEY` | xAI (Grok) | +| `MISTRAL_API_KEY` | Mistral | +| `GH_TOKEN` | GitHub Copilot | +| `AWS_PROFILE` | Amazon Bedrock (named profile) | +| `AWS_ACCESS_KEY_ID` | Amazon Bedrock (IAM keys) | +| `AWS_SECRET_ACCESS_KEY` | Amazon Bedrock (IAM keys) | +| `AWS_REGION` | Amazon Bedrock (region) | +| `AWS_BEARER_TOKEN_BEDROCK` | Amazon Bedrock (bearer token) | +| `ANTHROPIC_VERTEX_PROJECT_ID` | Vertex AI | +| `GOOGLE_APPLICATION_CREDENTIALS` | Vertex AI (ADC) | +| `AZURE_OPENAI_API_KEY` | Azure OpenAI | + +## Tool API Keys + +| Variable | Purpose | +|----------|---------| +| `TAVILY_API_KEY` | Tavily web search | +| `BRAVE_API_KEY` | Brave web search | +| `CONTEXT7_API_KEY` | Context7 documentation lookup | +| `DISCORD_BOT_TOKEN` | Discord remote questions | +| `TELEGRAM_BOT_TOKEN` | Telegram remote questions | + +## URL Blocking + +The `fetch_page` tool blocks requests to private/internal networks by default (SSRF protection). To allow specific internal hosts: + +```bash +export GSD_FETCH_ALLOWED_URLS="internal-docs.company.com,192.168.1.50" +``` + +Or set `fetchAllowedUrls` in `~/.gsd/agent/settings.json`. + +Blocked by default: private IP ranges, cloud metadata endpoints, localhost, non-HTTP protocols, IPv6 private ranges. diff --git a/gitbook/reference/keyboard-shortcuts.md b/gitbook/reference/keyboard-shortcuts.md new file mode 100644 index 000000000..8b2013729 --- /dev/null +++ b/gitbook/reference/keyboard-shortcuts.md @@ -0,0 +1,33 @@ +# Keyboard Shortcuts + +| Shortcut | Action | +|----------|--------| +| `Ctrl+Alt+G` | Toggle dashboard overlay | +| `Ctrl+Alt+V` | Toggle voice transcription | +| `Ctrl+Alt+B` | Show background shell processes | +| `Ctrl+V` / `Alt+V` | Paste image from clipboard (screenshot → vision input) | +| `Escape` | Pause auto mode (preserves conversation) | + +## Terminal Compatibility + +In terminals without Kitty keyboard protocol support (macOS Terminal.app, JetBrains IDEs), slash-command fallbacks are shown instead of `Ctrl+Alt` shortcuts. + +{% hint style="tip" %} +If `Ctrl+V` is intercepted by your terminal (e.g. Warp), use `Alt+V` instead for clipboard image paste. +{% endhint %} + +## iTerm2 Note + +If `Ctrl+Alt` shortcuts trigger the wrong action (e.g., `Ctrl+Alt+G` opens external editor instead of the dashboard), go to **Profiles → Keys → General** and set **Left Option Key** to **Esc+**. This makes Alt/Option work correctly with Ctrl combinations. + +## cmux Integration + +If you use cmux (terminal multiplexer), GSD can integrate with it: + +| Command | Description | +|---------|-------------| +| `/gsd cmux status` | Show cmux detection and capabilities | +| `/gsd cmux on` / `off` | Enable/disable integration | +| `/gsd cmux notifications on/off` | Toggle desktop notifications | +| `/gsd cmux sidebar on/off` | Toggle sidebar metadata | +| `/gsd cmux splits on/off` | Toggle visual subagent splits | diff --git a/gitbook/reference/migration.md b/gitbook/reference/migration.md new file mode 100644 index 000000000..54d548dc8 --- /dev/null +++ b/gitbook/reference/migration.md @@ -0,0 +1,48 @@ +# Migration from v1 + +If you have projects with `.planning` directories from the original Get Shit Done (v1), you can migrate them to GSD-2's `.gsd` format. + +## Running the Migration + +```bash +# From within the project directory +/gsd migrate + +# Or specify a path +/gsd migrate ~/projects/my-old-project +``` + +## What Gets Migrated + +The migration tool: + +- Parses your old `PROJECT.md`, `ROADMAP.md`, `REQUIREMENTS.md`, phase directories, plans, summaries, and research +- Maps phases → slices, plans → tasks, milestones → milestones +- Preserves completion state (`[x]` phases stay done, summaries carry over) +- Consolidates research files into the new structure +- Shows a preview before writing anything +- Optionally runs an AI-driven review for quality assurance + +## Supported Formats + +The migration handles various v1 format variations: + +- Milestone-sectioned roadmaps with `
` blocks +- Bold phase entries +- Bullet-format requirements +- Decimal phase numbering +- Duplicate phase numbers across milestones + +## Requirements + +Migration works best with a `ROADMAP.md` file for milestone structure. Without one, milestones are inferred from the `phases/` directory. + +## Post-Migration + +After migrating, verify the output: + +``` +/gsd doctor +``` + +This checks `.gsd/` integrity and flags any structural issues. diff --git a/gitbook/reference/troubleshooting.md b/gitbook/reference/troubleshooting.md new file mode 100644 index 000000000..8102ede58 --- /dev/null +++ b/gitbook/reference/troubleshooting.md @@ -0,0 +1,151 @@ +# Troubleshooting + +## `/gsd doctor` + +The built-in diagnostic tool validates `.gsd/` integrity: + +``` +/gsd doctor +``` + +It checks file structure, roadmap ↔ slice ↔ task consistency, completion state, git health, stale locks, and orphaned records. + +## Common Issues + +### Auto mode loops on the same unit + +The same unit dispatches repeatedly. + +**Fix:** Run `/gsd doctor` to repair state, then `/gsd auto`. If it persists, check that the expected artifact file exists on disk. + +### Auto mode stops with "Loop detected" + +A unit failed to produce its expected artifact twice. + +**Fix:** Check the task plan for clarity. Refine it manually, then `/gsd auto`. + +### `command not found: gsd` after install + +npm's global bin directory isn't in `$PATH`. + +**Fix:** +```bash +npm prefix -g +# Add the bin dir to PATH: +echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc +source ~/.zshrc +``` + +**Common causes:** +- **Homebrew Node** — `/opt/homebrew/bin` missing from PATH +- **Version manager (nvm, fnm, mise)** — global bin is version-specific +- **oh-my-zsh** — `gitfast` plugin aliases `gsd` to `git svn dcommit`; check with `alias gsd` + +### Provider errors during auto mode + +| Error Type | Auto-Resume? | Delay | +|-----------|-------------|-------| +| Rate limit (429) | Yes | 60s or retry-after header | +| Server error (500, 502, 503) | Yes | 30s | +| Auth/billing ("unauthorized") | No | Manual resume required | + +For permanent errors, configure fallback models: + +```yaml +models: + execution: + model: claude-sonnet-4-6 + fallbacks: + - openrouter/minimax/minimax-m2.5 +``` + +### Budget ceiling reached + +Auto mode pauses with "Budget ceiling reached." + +**Fix:** Increase `budget_ceiling` in preferences, or switch to `budget` token profile, then `/gsd auto`. + +### Stale lock file + +Auto mode won't start, says another session is running. + +**Fix:** GSD auto-detects stale locks (dead PID = auto cleanup). If automatic recovery fails: + +```bash +rm -f .gsd/auto.lock +rm -rf "$(dirname .gsd)/.gsd.lock" +``` + +### Git merge conflicts + +Worktree merge fails on `.gsd/` files. + +**Fix:** `.gsd/` conflicts are auto-resolved. Code conflicts get an AI fix attempt; if that fails, resolve manually. + +### Notifications not appearing on macOS + +**Fix:** Install `terminal-notifier`: + +```bash +brew install terminal-notifier +``` + +See [Notifications](../configuration/notifications.md) for details. + +## MCP Issues + +### No servers configured + +**Fix:** Add server to `.mcp.json` or `.gsd/mcp.json`, verify JSON is valid, run `mcp_servers(refresh=true)`. + +### Server discovery times out + +**Fix:** Run the configured command outside GSD to confirm it starts. Check that backend services are reachable. + +### Server connection closed immediately + +**Fix:** Verify `command` and `args` paths are correct and absolute. Run the command manually to catch errors. + +## Recovery Procedures + +### Reset auto mode state + +```bash +rm .gsd/auto.lock +rm .gsd/completed-units.json +``` + +Then `/gsd auto` to restart from current state. + +### Reset routing history + +```bash +rm .gsd/routing-history.json +``` + +### Full state rebuild + +``` +/gsd doctor +``` + +Rebuilds `STATE.md` from plan and roadmap files and fixes inconsistencies. + +## Getting Help + +- **GitHub Issues:** [github.com/gsd-build/GSD-2/issues](https://github.com/gsd-build/GSD-2/issues) +- **Dashboard:** `Ctrl+Alt+G` or `/gsd status` +- **Forensics:** `/gsd forensics` for post-mortem analysis +- **Session logs:** `.gsd/activity/` contains JSONL session dumps + +## Platform-Specific Issues + +### iTerm2 + +`Ctrl+Alt` shortcuts trigger wrong actions → Set **Profiles → Keys → General → Left Option Key** to **Esc+**. + +### Windows + +- LSP ENOENT on MSYS2/Git Bash → Fixed in v2.29+, upgrade +- EBUSY errors during builds → Close browser extension, or change output directory +- Transient EBUSY/EPERM on `.gsd/` files → Retry; close file-locking tools if persistent diff --git a/mintlify-docs/docs.json b/mintlify-docs/docs.json index a100f196a..5b37bb057 100644 --- a/mintlify-docs/docs.json +++ b/mintlify-docs/docs.json @@ -77,6 +77,7 @@ { "group": "Features", "pages": [ + "guides/change-management", "guides/captures-triage", "guides/parallel-orchestration", "guides/remote-questions", diff --git a/mintlify-docs/guides/change-management.mdx b/mintlify-docs/guides/change-management.mdx new file mode 100644 index 000000000..168511fd7 --- /dev/null +++ b/mintlify-docs/guides/change-management.mdx @@ -0,0 +1,211 @@ +--- +title: "Change management" +description: "How to handle bugs, new features, and roadmap reshuffling after milestones are underway." +--- + +Reality diverges from plans. This guide covers every situation where you need to act on something discovered after work is already in flight — from a single bug fix to inserting a whole new milestone before the next planned one. + +## Mental model + +GSD's pipeline looks like this: + +``` +M001 complete → M002 complete → M003 → M004 → ... +``` + +The key invariant: **completed units are sealed.** A finished task, slice, or milestone can't be un-completed. What you can always do is add new work ahead of or after any remaining unit. + +Between milestones you have the most freedom. Inside a running milestone you have real steering options without breaking the state machine. + +--- + +## Small bug or quick fix + +**A self-contained fix that can be described in a sentence.** + +``` +/gsd quick "fix the date formatting bug in the invoice renderer" +``` + +`/gsd quick` executes immediately with full GSD guarantees (atomic commit, state tracking) but skips milestone ceremony. It doesn't touch the milestone pipeline. + +--- + +## Bug or idea discovered while auto-mode is running + +**You spot something mid-execution but don't want to interrupt the run.** + +``` +/gsd capture "the login redirect is broken on mobile viewports" +/gsd capture "add a loading spinner to the data table" +``` + +Captures are appended to `.gsd/CAPTURES.md` and triaged automatically at natural seams between tasks. See [captures and triage](/guides/captures-triage) for the full classification system. + +To force processing immediately: + +``` +/gsd triage +``` + +--- + +## Current slice plan is wrong + +**You're mid-slice and the plan no longer makes sense — wrong approach, missing step, or a blocker.** + +``` +/gsd steer +``` + +This opens an interactive session to hard-edit plan documents. Changes are picked up at the next phase boundary without stopping auto-mode. + +For structural changes (adding tasks, removing tasks), the agent triggers a slice replan internally when it discovers a blocker. Completed tasks are protected — only pending tasks can be mutated. + +--- + +## Bugs and features need to land before the next milestone + +**M001 and M002 are done. You've found bugs and have new features that must ship before M003 can proceed.** + + + + Separate into: bugs vs. features, and must-before-M003 vs. can-wait. + + + ``` + /gsd new-milestone + ``` + Describe the bugs and features. GSD creates a milestone — the title is what matters, not the number. + + + ``` + /gsd queue + ``` + Confirm the new milestone is queued before M003. Reorder if needed. + + + ``` + /gsd park M003 + ``` + Parking skips M003 without deleting it. Unpark when ready: + ``` + /gsd unpark M003 + ``` + + + ``` + /gsd auto + ``` + Auto-mode dispatches the next active milestone in queue order. + + + +--- + +## Modifying a not-yet-started milestone + +**You want to change M003's scope — add slices, remove slices, change the approach — before it starts.** + +Since M003 hasn't started, its plan files can be edited directly. Use `/gsd discuss` to talk through the changes and let GSD rewrite the artifacts: + +``` +/gsd discuss +``` + +> "M003 needs to include the new auth flow we discovered. Can we add a slice for that and remove the old token refresh slice?" + +Or use `/gsd steer` to edit plan files directly. + +If M003 is partially done (some slices complete), auto-mode calls `reassess-roadmap` automatically after each slice. You can also discuss changes during a pause — GSD can add, modify, or remove pending slices without touching the completed ones. + +--- + +## A milestone needs to step back one position + +**Your "Milestone 3" is effectively now "Milestone 4" because new work must insert before it.** + +GSD milestone numbers are labels, not positions. Execution order is controlled by the queue, not the ID. The procedure is the same as above: create the new milestone, confirm queue order with `/gsd queue`, park M003 if needed. + +The milestone IDs stay as-is — M003 just executes later. No renumbering needed. + +--- + +## Many bugs — worth a dedicated bugfix milestone + +**After M002 you have 10+ bugs across multiple systems. Too scattered for individual quick tasks.** + +``` +/gsd new-milestone +``` + +Describe the full bug list. GSD creates a milestone with slices organized by system or severity. Run it in auto-mode like any other milestone. When done, all bugs land as clean commits with a formal milestone summary — readable as a bugfix release. + +--- + +## Feature ideas that can wait + +**Real ideas, but nothing that blocks the current plan.** + +``` +/gsd capture "dark mode toggle on the dashboard" +``` + +Deferred captures surface during roadmap reassessment. GSD can fold them into a later milestone when the timing makes sense. Or queue a dedicated features milestone directly: + +``` +/gsd queue +``` + +--- + +## Bug in a completed slice + +**A shipped slice has a bug but it's already sealed.** + +You cannot un-complete the slice. Options: + +- `/gsd quick` for small fixes +- A new slice in the next milestone that explicitly patches the bug — reference the original slice in the description +- `/gsd steer` to add a fix task to the current active milestone if you're still inside it + +The completed slice record is preserved as-is. The fix lands as new work with its own commit and summary. + +--- + +## An entire milestone went in the wrong direction + +**M002 is done but the approach was wrong, and M003 builds on it.** + + + + ``` + /gsd discuss + ``` + Work through what's wrong and what the correction looks like before touching anything. + + + A focused "M002b" or "M002-fix" that refactors or replaces what went wrong. Scope it precisely to the broken parts. + + + If M003 doesn't depend on the broken parts, trim its scope so it doesn't compound the problem. Fix fully in the correction milestone first. + + + +--- + +## Quick reference + +| Situation | Command | +|---|---| +| Small self-contained fix | `/gsd quick` | +| Thought during auto-mode | `/gsd capture` | +| Force-process captures now | `/gsd triage` | +| Current slice plan is wrong | `/gsd steer` | +| New work must land before next milestone | `/gsd new-milestone` + `/gsd queue` | +| Delay a future milestone | `/gsd park ` / `/gsd unpark ` | +| Modify a not-yet-started milestone | `/gsd discuss` or `/gsd steer` | +| Many bugs → dedicated milestone | `/gsd new-milestone` (bugfix scope) | +| Ideas that can wait | `/gsd capture` or `/gsd queue` | +| Check/reorder pipeline | `/gsd queue` | +| Architecture discussion | `/gsd discuss` | diff --git a/mintlify-docs/guides/working-in-teams.mdx b/mintlify-docs/guides/working-in-teams.mdx index 72baa19e2..d59bce66b 100644 --- a/mintlify-docs/guides/working-in-teams.mdx +++ b/mintlify-docs/guides/working-in-teams.mdx @@ -26,19 +26,24 @@ Share planning artifacts while keeping runtime files local: ```bash # Runtime / ephemeral (per-developer) .gsd/auto.lock -.gsd/completed-units.json +.gsd/completed-units*.json +.gsd/state-manifest.json .gsd/STATE.md .gsd/metrics.json .gsd/activity/ .gsd/runtime/ .gsd/worktrees/ +.gsd/gsd.db* +.gsd/journal/ +.gsd/doctor-history.jsonl +.gsd/event-log.jsonl .gsd/milestones/**/continue.md .gsd/milestones/**/*-CONTINUE.md ``` **Shared** (committed): preferences, PROJECT.md, REQUIREMENTS.md, DECISIONS.md, milestones. -**Local** (gitignored): lock files, metrics, state cache, worktrees, activity logs. +**Local** (gitignored): lock files, metrics, state cache, worktrees, activity logs, database files, journals. ### 3. Commit diff --git a/native/npm/darwin-arm64/package.json b/native/npm/darwin-arm64/package.json index f619507b8..c72fddbe1 100644 --- a/native/npm/darwin-arm64/package.json +++ b/native/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-arm64", - "version": "2.68.0", + "version": "2.73.0", "description": "GSD native engine binary for macOS ARM64", "os": [ "darwin" diff --git a/native/npm/darwin-x64/package.json b/native/npm/darwin-x64/package.json index de80d45c2..31593c568 100644 --- a/native/npm/darwin-x64/package.json +++ b/native/npm/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-x64", - "version": "2.68.0", + "version": "2.73.0", "description": "GSD native engine binary for macOS Intel", "os": [ "darwin" diff --git a/native/npm/linux-arm64-gnu/package.json b/native/npm/linux-arm64-gnu/package.json index 5c73452dc..ffb8ac1ea 100644 --- a/native/npm/linux-arm64-gnu/package.json +++ b/native/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-arm64-gnu", - "version": "2.68.0", + "version": "2.73.0", "description": "GSD native engine binary for Linux ARM64 (glibc)", "os": [ "linux" diff --git a/native/npm/linux-x64-gnu/package.json b/native/npm/linux-x64-gnu/package.json index 6ff3a766f..8b0454997 100644 --- a/native/npm/linux-x64-gnu/package.json +++ b/native/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-x64-gnu", - "version": "2.68.0", + "version": "2.73.0", "description": "GSD native engine binary for Linux x64 (glibc)", "os": [ "linux" diff --git a/native/npm/win32-x64-msvc/package.json b/native/npm/win32-x64-msvc/package.json index 19e54234b..b1b27cdad 100644 --- a/native/npm/win32-x64-msvc/package.json +++ b/native/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-win32-x64-msvc", - "version": "2.68.0", + "version": "2.73.0", "description": "GSD native engine binary for Windows x64 (MSVC)", "os": [ "win32" diff --git a/package-lock.json b/package-lock.json index cae86f699..9a69520aa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "gsd-pi", - "version": "2.66.1", + "version": "2.72.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "gsd-pi", - "version": "2.66.1", + "version": "2.72.0", "hasInstallScript": true, "license": "MIT", "workspaces": [ @@ -9535,7 +9535,7 @@ }, "packages/pi-coding-agent": { "name": "@gsd/pi-coding-agent", - "version": "2.66.1", + "version": "2.72.0", "dependencies": { "@mariozechner/jiti": "^2.6.2", "@silvia-odwyer/photon-node": "^0.3.4", diff --git a/package.json b/package.json index bfb1a5ebc..e76440ea2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gsd-pi", - "version": "2.68.0", + "version": "2.73.0", "description": "GSD — Get Shit Done coding agent", "license": "MIT", "repository": { @@ -56,22 +56,22 @@ "copy-themes": "node scripts/copy-themes.cjs", "copy-export-html": "node scripts/copy-export-html.cjs", "test:compile": "node scripts/compile-tests.mjs", - "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test 'dist-test/src/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.mjs' 'dist-test/src/resources/extensions/shared/tests/*.test.js' 'dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js' 'dist-test/src/resources/extensions/github-sync/tests/*.test.js' 'dist-test/src/resources/extensions/universal-config/tests/*.test.js' 'dist-test/src/resources/extensions/voice/tests/*.test.js' 'dist-test/src/resources/extensions/mcp-client/tests/*.test.js'", - "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js", - "test:marketplace": "GSD_TEST_CLONE_MARKETPLACES=1 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts", - "test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts", - "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test 'src/tests/integration/*.test.ts' 'src/resources/extensions/gsd/tests/integration/*.test.ts' 'src/resources/extensions/async-jobs/*.test.ts' 'src/resources/extensions/browser-tools/tests/*.test.mjs'", + "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test \"dist-test/src/tests/*.test.js\" \"dist-test/src/resources/extensions/gsd/tests/*.test.js\" \"dist-test/src/resources/extensions/gsd/tests/*.test.mjs\" \"dist-test/src/resources/extensions/shared/tests/*.test.js\" \"dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js\" \"dist-test/src/resources/extensions/github-sync/tests/*.test.js\" \"dist-test/src/resources/extensions/universal-config/tests/*.test.js\" \"dist-test/src/resources/extensions/voice/tests/*.test.js\" \"dist-test/src/resources/extensions/mcp-client/tests/*.test.js\"", + "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js packages/pi-coding-agent/dist/core/tools/spawn-shell-windows.test.js", + "test:marketplace": "node scripts/with-env.mjs GSD_TEST_CLONE_MARKETPLACES=1 -- node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts", + "test:coverage": "c8 --reporter=text --reporter=lcov --exclude=\"src/resources/extensions/gsd/tests/**\" --exclude=\"src/tests/**\" --exclude=\"scripts/**\" --exclude=\"native/**\" --exclude=\"node_modules/**\" --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts", + "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test \"src/tests/integration/*.test.ts\" \"src/resources/extensions/gsd/tests/integration/*.test.ts\" \"src/resources/extensions/async-jobs/*.test.ts\" \"src/resources/extensions/browser-tools/tests/*.test.mjs\"", "pretest": "npm run typecheck:extensions", "test": "npm run test:unit && npm run test:integration", "test:smoke": "node --experimental-strip-types tests/smoke/run.ts", "test:fixtures": "node --experimental-strip-types tests/fixtures/run.ts", - "test:fixtures:record": "GSD_FIXTURE_MODE=record node --experimental-strip-types tests/fixtures/record.ts", - "test:live": "GSD_LIVE_TESTS=1 node --experimental-strip-types tests/live/run.ts", + "test:fixtures:record": "node scripts/with-env.mjs GSD_FIXTURE_MODE=record -- node --experimental-strip-types tests/fixtures/record.ts", + "test:live": "node scripts/with-env.mjs GSD_LIVE_TESTS=1 -- node --experimental-strip-types tests/live/run.ts", "test:browser-tools": "node --test src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs", "test:native": "node --test packages/native/src/__tests__/grep.test.mjs", "test:secret-scan": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/tests/secret-scan.test.ts", - "secret-scan": "bash scripts/secret-scan.sh", - "secret-scan:install-hook": "bash scripts/install-hooks.sh", + "secret-scan": "node scripts/secret-scan.mjs", + "secret-scan:install-hook": "node scripts/install-hooks.mjs", "build:native": "node native/scripts/build.js", "build:native:dev": "node native/scripts/build.js --dev", "dev": "node scripts/dev.js", @@ -92,7 +92,7 @@ "release:update-changelog": "node scripts/update-changelog.mjs", "docker:build-runtime": "docker build --target runtime -t ghcr.io/gsd-build/gsd-pi .", "docker:build-builder": "docker build --target builder -t ghcr.io/gsd-build/gsd-ci-builder .", - "prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && ([ \"$CI\" = 'true' ] || git diff --exit-code || (echo 'ERROR: version sync changed files — commit them before publishing' && exit 1)) && npm run build && npm run typecheck:extensions && npm run validate-pack", + "prepublishOnly": "npm run sync-pkg-version && npm run sync-platform-versions && node scripts/prepublish-check.mjs && npm run build && npm run typecheck:extensions && npm run validate-pack", "test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts" }, "dependencies": { diff --git a/packages/daemon/src/orchestrator.ts b/packages/daemon/src/orchestrator.ts index 678874cec..fe2998d8f 100644 --- a/packages/daemon/src/orchestrator.ts +++ b/packages/daemon/src/orchestrator.ts @@ -12,9 +12,6 @@ */ import { z } from 'zod'; -import { readFileSync, writeFileSync, chmodSync } from 'node:fs'; -import { join } from 'node:path'; -import { homedir } from 'node:os'; import type Anthropic from '@anthropic-ai/sdk'; import type { MessageParam, @@ -30,90 +27,18 @@ import type { ProjectInfo, ManagedSession } from './types.js'; import type { Logger } from './logger.js'; // --------------------------------------------------------------------------- -// OAuth token resolution — reads GSD's auth.json, refreshes if expired +// API key resolution — requires ANTHROPIC_API_KEY env var +// Anthropic OAuth removed per TOS compliance (see docs/user-docs/claude-code-auth-compliance.md) // --------------------------------------------------------------------------- -interface OAuthCredentials { - type: 'oauth'; - refresh: string; - access: string; - expires: number; -} - -const TOKEN_URL = 'https://platform.claude.com/v1/oauth/token'; -const CLIENT_ID = atob('OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl'); - -/** - * Read the Anthropic OAuth access token from GSD's auth.json. - * If expired, refresh it and write the new credentials back. - * Falls back to ANTHROPIC_API_KEY env var if no OAuth credential exists. - */ -async function resolveAnthropicApiKey(logger?: Logger): Promise { - // Try env var first (explicit override) - if (process.env.ANTHROPIC_API_KEY) { - return process.env.ANTHROPIC_API_KEY; - } - - const authPath = join(homedir(), '.gsd', 'agent', 'auth.json'); - let authData: Record; - try { - authData = JSON.parse(readFileSync(authPath, 'utf-8')); - } catch { +function resolveAnthropicApiKey(): string { + const apiKey = process.env.ANTHROPIC_API_KEY; + if (!apiKey) { throw new Error( - 'No Anthropic auth found. Run `gsd login` to authenticate, or set ANTHROPIC_API_KEY.', + 'ANTHROPIC_API_KEY is required. Set it in your environment or run `gsd config`.', ); } - - const cred = authData.anthropic as OAuthCredentials | undefined; - if (!cred || cred.type !== 'oauth' || !cred.access) { - throw new Error( - 'No Anthropic OAuth credential in auth.json. Run `gsd login` to authenticate.', - ); - } - - // If token is still valid, use it - if (Date.now() < cred.expires) { - return cred.access; - } - - // Token expired — refresh it - logger?.info('orchestrator: refreshing Anthropic OAuth token'); - const response = await fetch(TOKEN_URL, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - grant_type: 'refresh_token', - client_id: CLIENT_ID, - refresh_token: cred.refresh, - }), - signal: AbortSignal.timeout(30_000), - }); - - if (!response.ok) { - const error = await response.text(); - throw new Error(`Anthropic token refresh failed: ${error}`); - } - - const data = (await response.json()) as { - access_token: string; - refresh_token: string; - expires_in: number; - }; - - const newCred: OAuthCredentials = { - type: 'oauth', - refresh: data.refresh_token, - access: data.access_token, - expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000, - }; - - // Write back to auth.json - authData.anthropic = newCred; - writeFileSync(authPath, JSON.stringify(authData, null, 2), 'utf-8'); - chmodSync(authPath, 0o600); - logger?.info('orchestrator: Anthropic OAuth token refreshed'); - - return newCred.access; + return apiKey; } // --------------------------------------------------------------------------- @@ -254,11 +179,11 @@ export class Orchestrator { /** * Lazily initialise the Anthropic client. Dynamic import handles K007 module resolution. - * Resolves auth from GSD's OAuth credentials (auth.json), refreshing if needed. + * Requires ANTHROPIC_API_KEY environment variable. */ private async getClient(): Promise { if (this.client) return this.client; - const apiKey = await resolveAnthropicApiKey(this.deps.logger); + const apiKey = resolveAnthropicApiKey(); const { default: AnthropicSDK } = await import('@anthropic-ai/sdk'); this.client = new AnthropicSDK({ apiKey }); return this.client; diff --git a/packages/mcp-server/README.md b/packages/mcp-server/README.md index dcc32aa94..642657dd7 100644 --- a/packages/mcp-server/README.md +++ b/packages/mcp-server/README.md @@ -7,7 +7,8 @@ Start GSD auto-mode sessions, poll progress, resolve blockers, and retrieve resu This package now exposes two tool surfaces: - session/read tools for starting and inspecting GSD sessions -- workflow mutation tools for planning, completion, validation, reassessment, and gate persistence +- MCP-native interactive tools for structured user input +- headless-safe workflow tools for planning, completion, validation, reassessment, metadata persistence, and journal reads ## Installation @@ -74,18 +75,29 @@ Add to `.cursor/mcp.json`: ## Tools -### Workflow mutation tools +### Workflow tools The workflow MCP surface includes: +- `gsd_decision_save` +- `gsd_save_decision` +- `gsd_requirement_update` +- `gsd_update_requirement` +- `gsd_requirement_save` +- `gsd_save_requirement` +- `gsd_milestone_generate_id` +- `gsd_generate_milestone_id` - `gsd_plan_milestone` - `gsd_plan_slice` +- `gsd_plan_task` +- `gsd_task_plan` - `gsd_replan_slice` - `gsd_slice_replan` - `gsd_task_complete` - `gsd_complete_task` - `gsd_slice_complete` - `gsd_complete_slice` +- `gsd_skip_slice` - `gsd_validate_milestone` - `gsd_milestone_validate` - `gsd_complete_milestone` @@ -95,13 +107,21 @@ The workflow MCP surface includes: - `gsd_save_gate_result` - `gsd_summary_save` - `gsd_milestone_status` +- `gsd_journal_query` -These mutation tools use the same GSD workflow handlers as the native in-process tool path. +These tools use the same GSD workflow handlers as the native in-process tool path wherever a shared handler exists. + +### Interactive tools + +The packaged server now exposes `ask_user_questions` through MCP form elicitation. This keeps the existing GSD answer payload shape while allowing Claude Code CLI and other elicitation-capable clients to surface structured user choices. + +`secure_env_collect` is still not exposed by this package. That path needs MCP URL elicitation or an equivalent secure bridge because secrets should not flow through form elicitation. Current support boundary: - when running inside the GSD monorepo checkout, the MCP server auto-discovers the shared workflow executor module - outside the monorepo, set `GSD_WORKFLOW_EXECUTORS_MODULE` to an importable `workflow-tool-executors` module path if you want the mutation tools enabled +- `ask_user_questions` requires an MCP client that supports form elicitation - session/read tools do not depend on this bridge If the executor bridge cannot be loaded, workflow mutation calls will fail with a precise configuration error instead of silently degrading. @@ -214,6 +234,8 @@ Resolve a pending blocker in a session by sending a response to the blocked UI r | `GSD_CLI_PATH` | Absolute path to the GSD CLI binary. If not set, the server resolves `gsd` via `which`. | | `GSD_WORKFLOW_EXECUTORS_MODULE` | Optional absolute path or `file:` URL for the shared GSD workflow executor module used by workflow mutation tools. | +The server also hydrates supported model-provider and tool credentials from `~/.gsd/agent/auth.json` on startup. Keys saved through `/gsd config` or `/gsd keys` become available to the MCP server process automatically, and any explicitly-set environment variable still wins. + ## Architecture ``` diff --git a/packages/mcp-server/src/cli.ts b/packages/mcp-server/src/cli.ts index 744749d03..e9b64d794 100644 --- a/packages/mcp-server/src/cli.ts +++ b/packages/mcp-server/src/cli.ts @@ -1,5 +1,3 @@ -#!/usr/bin/env node - /** * @gsd-build/mcp-server CLI — stdio transport entry point. * @@ -9,13 +7,17 @@ import { SessionManager } from './session-manager.js'; import { createMcpServer } from './server.js'; +import { loadStoredCredentialEnvKeys } from './tool-credentials.js'; const MCP_PKG = '@modelcontextprotocol/sdk'; async function main(): Promise { + loadStoredCredentialEnvKeys(); + const sessionManager = new SessionManager(); - // Create the configured MCP server with all 12 tools (6 session + 6 read-only) + // Create the configured MCP server with session, interactive, read-only, + // and workflow tools. const { server } = await createMcpServer(sessionManager); // Dynamic import for StdioServerTransport (same TS subpath workaround) diff --git a/packages/mcp-server/src/env-writer.test.ts b/packages/mcp-server/src/env-writer.test.ts new file mode 100644 index 000000000..5932d1cfb --- /dev/null +++ b/packages/mcp-server/src/env-writer.test.ts @@ -0,0 +1,280 @@ +// @gsd-build/mcp-server — Tests for env-writer utilities +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { + checkExistingEnvKeys, + detectDestination, + writeEnvKey, + applySecrets, + isSafeEnvVarKey, + isSupportedDeploymentEnvironment, + shellEscapeSingle, +} from './env-writer.js'; + +function makeTempDir(prefix: string): string { + return mkdtempSync(join(tmpdir(), `${prefix}-`)); +} + +// --------------------------------------------------------------------------- +// checkExistingEnvKeys +// --------------------------------------------------------------------------- + +describe('checkExistingEnvKeys', () => { + it('finds key in .env file', async () => { + const tmp = makeTempDir('env-check'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'API_KEY=secret123\nOTHER=val\n'); + const result = await checkExistingEnvKeys(['API_KEY'], envPath); + assert.deepStrictEqual(result, ['API_KEY']); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('finds key in process.env', async () => { + const tmp = makeTempDir('env-check'); + const saved = process.env.GSD_MCP_TEST_KEY_1; + try { + process.env.GSD_MCP_TEST_KEY_1 = 'some-value'; + const envPath = join(tmp, '.env'); + const result = await checkExistingEnvKeys(['GSD_MCP_TEST_KEY_1'], envPath); + assert.deepStrictEqual(result, ['GSD_MCP_TEST_KEY_1']); + } finally { + delete process.env.GSD_MCP_TEST_KEY_1; + if (saved !== undefined) process.env.GSD_MCP_TEST_KEY_1 = saved; + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns empty for missing keys', async () => { + const tmp = makeTempDir('env-check'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'OTHER=val\n'); + delete process.env.DEFINITELY_NOT_SET_MCP_XYZ; + const result = await checkExistingEnvKeys(['DEFINITELY_NOT_SET_MCP_XYZ'], envPath); + assert.deepStrictEqual(result, []); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('handles missing .env file gracefully', async () => { + const tmp = makeTempDir('env-check'); + try { + const envPath = join(tmp, 'nonexistent.env'); + delete process.env.DEFINITELY_NOT_SET_MCP_XYZ; + const result = await checkExistingEnvKeys(['DEFINITELY_NOT_SET_MCP_XYZ'], envPath); + assert.deepStrictEqual(result, []); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// detectDestination +// --------------------------------------------------------------------------- + +describe('detectDestination', () => { + it('returns vercel when vercel.json exists', () => { + const tmp = makeTempDir('dest'); + try { + writeFileSync(join(tmp, 'vercel.json'), '{}'); + assert.equal(detectDestination(tmp), 'vercel'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns convex when convex/ dir exists', () => { + const tmp = makeTempDir('dest'); + try { + mkdirSync(join(tmp, 'convex')); + assert.equal(detectDestination(tmp), 'convex'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns dotenv when neither exists', () => { + const tmp = makeTempDir('dest'); + try { + assert.equal(detectDestination(tmp), 'dotenv'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('vercel takes priority over convex', () => { + const tmp = makeTempDir('dest'); + try { + writeFileSync(join(tmp, 'vercel.json'), '{}'); + mkdirSync(join(tmp, 'convex')); + assert.equal(detectDestination(tmp), 'vercel'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// writeEnvKey +// --------------------------------------------------------------------------- + +describe('writeEnvKey', () => { + it('creates .env file with new key', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + await writeEnvKey(envPath, 'NEW_KEY', 'new-value'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('NEW_KEY=new-value')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('updates existing key in-place', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'EXISTING=old\nOTHER=keep\n'); + await writeEnvKey(envPath, 'EXISTING', 'new'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('EXISTING=new')); + assert.ok(content.includes('OTHER=keep')); + assert.ok(!content.includes('old')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('escapes newlines in values', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + await writeEnvKey(envPath, 'MULTI', 'line1\nline2'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('MULTI=line1\\nline2')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('rejects non-string values', async () => { + const tmp = makeTempDir('write'); + try { + const envPath = join(tmp, '.env'); + await assert.rejects( + () => writeEnvKey(envPath, 'KEY', undefined as unknown as string), + /expects a string value/, + ); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// applySecrets (dotenv) +// --------------------------------------------------------------------------- + +describe('applySecrets', () => { + const savedKeys: Record = {}; + + afterEach(() => { + for (const [k, v] of Object.entries(savedKeys)) { + if (v === undefined) delete process.env[k]; + else process.env[k] = v; + } + }); + + it('writes keys to .env and hydrates process.env', async () => { + const tmp = makeTempDir('apply'); + const envPath = join(tmp, '.env'); + savedKeys.GSD_APPLY_TEST_A = process.env.GSD_APPLY_TEST_A; + try { + const { applied, errors } = await applySecrets( + [{ key: 'GSD_APPLY_TEST_A', value: 'val-a' }], + 'dotenv', + { envFilePath: envPath }, + ); + assert.deepStrictEqual(applied, ['GSD_APPLY_TEST_A']); + assert.deepStrictEqual(errors, []); + assert.equal(process.env.GSD_APPLY_TEST_A, 'val-a'); + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('GSD_APPLY_TEST_A=val-a')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('returns errors for invalid vercel environment', async () => { + const tmp = makeTempDir('apply'); + try { + const { applied, errors } = await applySecrets( + [{ key: 'KEY', value: 'val' }], + 'vercel', + { + envFilePath: join(tmp, '.env'), + environment: 'staging' as 'development', + execFn: async () => ({ code: 0, stderr: '' }), + }, + ); + assert.deepStrictEqual(applied, []); + assert.ok(errors[0]?.includes('unsupported')); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// Validation helpers +// --------------------------------------------------------------------------- + +describe('isSafeEnvVarKey', () => { + it('accepts valid keys', () => { + assert.ok(isSafeEnvVarKey('API_KEY')); + assert.ok(isSafeEnvVarKey('_PRIVATE')); + assert.ok(isSafeEnvVarKey('key123')); + }); + + it('rejects invalid keys', () => { + assert.ok(!isSafeEnvVarKey('123BAD')); + assert.ok(!isSafeEnvVarKey('has-dash')); + assert.ok(!isSafeEnvVarKey('has space')); + assert.ok(!isSafeEnvVarKey('')); + }); +}); + +describe('isSupportedDeploymentEnvironment', () => { + it('accepts valid environments', () => { + assert.ok(isSupportedDeploymentEnvironment('development')); + assert.ok(isSupportedDeploymentEnvironment('preview')); + assert.ok(isSupportedDeploymentEnvironment('production')); + }); + + it('rejects invalid environments', () => { + assert.ok(!isSupportedDeploymentEnvironment('staging')); + assert.ok(!isSupportedDeploymentEnvironment('test')); + }); +}); + +describe('shellEscapeSingle', () => { + it('wraps in single quotes', () => { + assert.equal(shellEscapeSingle('hello'), "'hello'"); + }); + + it('escapes embedded single quotes', () => { + assert.equal(shellEscapeSingle("it's"), "'it'\\''s'"); + }); +}); diff --git a/packages/mcp-server/src/env-writer.ts b/packages/mcp-server/src/env-writer.ts new file mode 100644 index 000000000..219496539 --- /dev/null +++ b/packages/mcp-server/src/env-writer.ts @@ -0,0 +1,183 @@ +// @gsd-build/mcp-server — Environment variable write utilities +// Copyright (c) 2026 Jeremy McSpadden +// +// Shared helpers for writing env vars to .env files, detecting project +// destinations, and checking existing keys. Used by secure_env_collect +// MCP tool. No TUI dependencies — pure filesystem + process.env operations. + +import { readFile, writeFile } from "node:fs/promises"; +import { existsSync, statSync } from "node:fs"; +import { resolve } from "node:path"; + +// --------------------------------------------------------------------------- +// checkExistingEnvKeys +// --------------------------------------------------------------------------- + +/** + * Check which keys already exist in a .env file or process.env. + * Returns the subset of `keys` that are already set. + */ +export async function checkExistingEnvKeys(keys: string[], envFilePath: string): Promise { + let fileContent = ""; + try { + fileContent = await readFile(envFilePath, "utf8"); + } catch { + // ENOENT or other read error — proceed with empty content + } + + const existing: string[] = []; + for (const key of keys) { + const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const regex = new RegExp(`^${escaped}\\s*=`, "m"); + if (regex.test(fileContent) || key in process.env) { + existing.push(key); + } + } + return existing; +} + +// --------------------------------------------------------------------------- +// detectDestination +// --------------------------------------------------------------------------- + +/** + * Detect the write destination based on project files in basePath. + * Priority: vercel.json → convex/ dir → fallback "dotenv". + */ +export function detectDestination(basePath: string): "dotenv" | "vercel" | "convex" { + if (existsSync(resolve(basePath, "vercel.json"))) { + return "vercel"; + } + const convexPath = resolve(basePath, "convex"); + try { + if (existsSync(convexPath) && statSync(convexPath).isDirectory()) { + return "convex"; + } + } catch { + // stat error — treat as not found + } + return "dotenv"; +} + +// --------------------------------------------------------------------------- +// writeEnvKey +// --------------------------------------------------------------------------- + +/** + * Write a single key=value pair to a .env file. + * Updates existing keys in-place, appends new ones at the end. + */ +export async function writeEnvKey(filePath: string, key: string, value: string): Promise { + if (typeof value !== "string") { + throw new TypeError(`writeEnvKey expects a string value for key "${key}", got ${typeof value}`); + } + let content = ""; + try { + content = await readFile(filePath, "utf8"); + } catch { + content = ""; + } + const escaped = value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/\r/g, ""); + const line = `${key}=${escaped}`; + const regex = new RegExp(`^${key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*=.*$`, "m"); + if (regex.test(content)) { + content = content.replace(regex, line); + } else { + if (content.length > 0 && !content.endsWith("\n")) content += "\n"; + content += `${line}\n`; + } + await writeFile(filePath, content, "utf8"); +} + +// --------------------------------------------------------------------------- +// Validation helpers +// --------------------------------------------------------------------------- + +export function isSafeEnvVarKey(key: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*$/.test(key); +} + +export function isSupportedDeploymentEnvironment(env: string): boolean { + return env === "development" || env === "preview" || env === "production"; +} + +// --------------------------------------------------------------------------- +// Shell helpers (for vercel/convex CLI) +// --------------------------------------------------------------------------- + +export function shellEscapeSingle(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +// --------------------------------------------------------------------------- +// applySecrets +// --------------------------------------------------------------------------- + +interface ApplyResult { + applied: string[]; + errors: string[]; +} + +/** + * Apply collected secrets to the target destination. + * Dotenv writes are handled directly; vercel/convex shell out via execFn. + */ +export async function applySecrets( + provided: Array<{ key: string; value: string }>, + destination: "dotenv" | "vercel" | "convex", + opts: { + envFilePath: string; + environment?: string; + execFn?: (cmd: string, args: string[]) => Promise<{ code: number; stderr: string }>; + }, +): Promise { + const applied: string[] = []; + const errors: string[] = []; + + if (destination === "dotenv") { + for (const { key, value } of provided) { + try { + await writeEnvKey(opts.envFilePath, key, value); + applied.push(key); + // Hydrate process.env so the current session sees the new value + process.env[key] = value; + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + errors.push(`${key}: ${msg}`); + } + } + } + + if ((destination === "vercel" || destination === "convex") && opts.execFn) { + const env = opts.environment ?? "development"; + if (!isSupportedDeploymentEnvironment(env)) { + errors.push(`environment: unsupported target environment "${env}"`); + return { applied, errors }; + } + for (const { key, value } of provided) { + if (!isSafeEnvVarKey(key)) { + errors.push(`${key}: invalid environment variable name`); + continue; + } + const cmd = destination === "vercel" + ? `printf %s ${shellEscapeSingle(value)} | vercel env add ${key} ${env}` + : ""; + try { + const result = destination === "vercel" + ? await opts.execFn("sh", ["-c", cmd]) + : await opts.execFn("npx", ["convex", "env", "set", key, value]); + if (result.code !== 0) { + errors.push(`${key}: ${result.stderr.slice(0, 200)}`); + } else { + applied.push(key); + process.env[key] = value; + } + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + errors.push(`${key}: ${msg}`); + } + } + } + + return { applied, errors }; +} diff --git a/packages/mcp-server/src/import-candidates.test.ts b/packages/mcp-server/src/import-candidates.test.ts new file mode 100644 index 000000000..5b0171f3f --- /dev/null +++ b/packages/mcp-server/src/import-candidates.test.ts @@ -0,0 +1,48 @@ +// GSD-2 — Regression tests for importLocalModule candidate resolution (#3954) +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { _buildImportCandidates } from "./workflow-tools.js"; + +describe("_buildImportCandidates", () => { + it("includes dist/ fallback for src/ paths", () => { + const candidates = _buildImportCandidates("../../../src/resources/extensions/gsd/db-writer.js"); + assert.ok( + candidates.some((c) => c.includes("/dist/resources/extensions/gsd/db-writer.js")), + "should include dist/ swapped candidate", + ); + }); + + it("includes src/ fallback for dist/ paths", () => { + const candidates = _buildImportCandidates("../../../dist/resources/extensions/gsd/db-writer.js"); + assert.ok( + candidates.some((c) => c.includes("/src/resources/extensions/gsd/db-writer.js")), + "should include src/ swapped candidate", + ); + }); + + it("includes .ts variants for .js paths", () => { + const candidates = _buildImportCandidates("../../../src/resources/extensions/gsd/db-writer.js"); + assert.ok( + candidates.some((c) => c.endsWith("db-writer.ts") && c.includes("/src/")), + "should include .ts variant for original src/ path", + ); + assert.ok( + candidates.some((c) => c.endsWith("db-writer.ts") && c.includes("/dist/")), + "should include .ts variant for swapped dist/ path", + ); + }); + + it("returns original path first", () => { + const input = "../../../src/resources/extensions/gsd/db-writer.js"; + const candidates = _buildImportCandidates(input); + assert.equal(candidates[0], input, "first candidate should be the original path"); + }); + + it("handles paths without src/ or dist/ gracefully", () => { + const candidates = _buildImportCandidates("./local-module.js"); + assert.equal(candidates.length, 2, "should have original + .ts variant only"); + assert.equal(candidates[0], "./local-module.js"); + assert.equal(candidates[1], "./local-module.ts"); + }); +}); diff --git a/packages/mcp-server/src/mcp-server.test.ts b/packages/mcp-server/src/mcp-server.test.ts index 6d7ce156e..c3ba68065 100644 --- a/packages/mcp-server/src/mcp-server.test.ts +++ b/packages/mcp-server/src/mcp-server.test.ts @@ -16,7 +16,11 @@ import { resolve } from 'node:path'; import { EventEmitter } from 'node:events'; import { SessionManager } from './session-manager.js'; -import { createMcpServer } from './server.js'; +import { + buildAskUserQuestionsElicitRequest, + createMcpServer, + formatAskUserQuestionsElicitResult, +} from './server.js'; import { MAX_EVENTS } from './types.js'; import type { ManagedSession, CostAccumulator, PendingBlocker } from './types.js'; @@ -574,6 +578,8 @@ describe('createMcpServer tool registration', () => { it('creates server successfully with all required methods', async () => { const { server } = await createMcpServer(sm); assert.ok(server); + assert.ok(server.server); + assert.equal(typeof server.server.elicitInput, 'function'); assert.ok(typeof server.connect === 'function'); assert.ok(typeof server.close === 'function'); }); @@ -625,4 +631,82 @@ describe('createMcpServer tool registration', () => { const session = sm.getSession(sessionId)!; assert.equal(session.status, 'cancelled'); }); + + it('buildAskUserQuestionsElicitRequest adds None of the above note field for single-select questions', () => { + const request = buildAskUserQuestionsElicitRequest([ + { + id: 'depth_verification_M001', + header: 'Depth Check', + question: 'Did I capture the depth right?', + options: [ + { label: 'Yes, you got it (Recommended)', description: 'Continue with the current summary.' }, + { label: 'Not quite', description: 'I need to clarify the depth further.' }, + ], + }, + { + id: 'focus_areas', + header: 'Focus', + question: 'Which areas matter most?', + allowMultiple: true, + options: [ + { label: 'Frontend', description: 'Prioritize the UI.' }, + { label: 'Backend', description: 'Prioritize server logic.' }, + ], + }, + ]); + + assert.equal(request.mode, 'form'); + assert.deepEqual(request.requestedSchema.required, ['depth_verification_M001', 'focus_areas']); + assert.ok(request.requestedSchema.properties['depth_verification_M001']); + assert.ok(request.requestedSchema.properties['depth_verification_M001__note']); + assert.ok(!request.requestedSchema.properties['focus_areas__note']); + }); + + it('formatAskUserQuestionsElicitResult preserves the existing answers JSON shape', () => { + const result = formatAskUserQuestionsElicitResult( + [ + { + id: 'depth_verification_M001', + header: 'Depth Check', + question: 'Did I capture the depth right?', + options: [ + { label: 'Yes, you got it (Recommended)', description: 'Continue with the current summary.' }, + { label: 'Not quite', description: 'I need to clarify the depth further.' }, + ], + }, + { + id: 'focus_areas', + header: 'Focus', + question: 'Which areas matter most?', + allowMultiple: true, + options: [ + { label: 'Frontend', description: 'Prioritize the UI.' }, + { label: 'Backend', description: 'Prioritize server logic.' }, + ], + }, + ], + { + action: 'accept', + content: { + depth_verification_M001: 'None of the above', + depth_verification_M001__note: 'Need more implementation detail.', + focus_areas: ['Frontend', 'Backend'], + }, + }, + ); + + assert.equal( + result, + JSON.stringify({ + answers: { + depth_verification_M001: { + answers: ['None of the above', 'user_note: Need more implementation detail.'], + }, + focus_areas: { + answers: ['Frontend', 'Backend'], + }, + }, + }), + ); + }); }); diff --git a/packages/mcp-server/src/secure-env-collect.test.ts b/packages/mcp-server/src/secure-env-collect.test.ts new file mode 100644 index 000000000..c33ad2949 --- /dev/null +++ b/packages/mcp-server/src/secure-env-collect.test.ts @@ -0,0 +1,265 @@ +// @gsd-build/mcp-server — Tests for secure_env_collect MCP tool +// Copyright (c) 2026 Jeremy McSpadden +// +// Tests the secure_env_collect tool registered in createMcpServer. +// Uses a mock MCP server to intercept tool registration and elicitInput calls. + +import { describe, it, beforeEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { createMcpServer } from './server.js'; +import { SessionManager } from './session-manager.js'; + +// --------------------------------------------------------------------------- +// Mock infrastructure +// --------------------------------------------------------------------------- + +/** + * We intercept McpServer construction by monkey-patching the dynamic import. + * Instead, we'll test the tool handler indirectly through the exported + * createMcpServer function — capturing the registered tool handlers. + * + * Since createMcpServer dynamically imports McpServer, we need to test at + * a level that exercises the tool handler logic. We do this by extracting + * the tool handler through the server.tool() calls. + */ + +interface RegisteredTool { + name: string; + description: string; + params: Record; + handler: (args: Record) => Promise; +} + +interface ToolResult { + content?: Array<{ type: string; text: string }>; + isError?: boolean; +} + +/** + * Mock McpServer that captures tool registrations and provides + * a controllable elicitInput response. + */ +class MockMcpServer { + registeredTools: RegisteredTool[] = []; + elicitResponse: { action: string; content?: Record } = { action: 'accept', content: {} }; + + server = { + elicitInput: async (_params: unknown) => { + return this.elicitResponse; + }, + }; + + tool(name: string, description: string, params: Record, handler: (args: Record) => Promise) { + this.registeredTools.push({ name, description, params, handler }); + } + + async connect(_transport: unknown) { /* no-op */ } + async close() { /* no-op */ } + + getToolHandler(name: string): ((args: Record) => Promise) | undefined { + return this.registeredTools.find((t) => t.name === name)?.handler; + } +} + +// --------------------------------------------------------------------------- +// Helper to create a mock MCP server with secure_env_collect registered +// --------------------------------------------------------------------------- + +/** + * Since createMcpServer uses dynamic import for McpServer, we can't easily + * mock it. Instead, we test the env-writer utilities directly (in env-writer.test.ts) + * and test the tool integration by verifying: + * 1. The tool exists in the registered tools list + * 2. The handler produces correct results with mock data + * + * For handler-level testing, we create a standalone test that replicates + * the tool handler logic with a controllable mock. + */ + +function makeTempDir(prefix: string): string { + return mkdtempSync(join(tmpdir(), `${prefix}-`)); +} + +// --------------------------------------------------------------------------- +// Integration test — verify tool is registered +// --------------------------------------------------------------------------- + +describe('secure_env_collect tool registration', () => { + it('createMcpServer registers secure_env_collect tool', async () => { + // This test verifies the tool exists — createMcpServer internally calls + // server.tool('secure_env_collect', ...) which we can't intercept without + // module mocking, but we can verify the server creates successfully + const sm = new SessionManager(); + try { + const { server } = await createMcpServer(sm); + assert.ok(server, 'server should be created'); + // The McpServer internally tracks registered tools — we verify no error + } finally { + await sm.cleanup(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Handler logic tests — using env-writer directly to test the flow +// --------------------------------------------------------------------------- + +describe('secure_env_collect handler logic', () => { + it('skips keys that already exist in .env', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'ALREADY_SET=existing-value\n'); + + // Import the utility directly to test the pre-check logic + const { checkExistingEnvKeys } = await import('./env-writer.js'); + const existing = await checkExistingEnvKeys(['ALREADY_SET', 'NEW_KEY'], envPath); + assert.deepStrictEqual(existing, ['ALREADY_SET']); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('writes collected values to .env without returning secret values', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + const savedKey = process.env.SEC_COLLECT_TEST_KEY; + + const { applySecrets } = await import('./env-writer.js'); + const { applied, errors } = await applySecrets( + [{ key: 'SEC_COLLECT_TEST_KEY', value: 'super-secret-value' }], + 'dotenv', + { envFilePath: envPath }, + ); + + assert.deepStrictEqual(applied, ['SEC_COLLECT_TEST_KEY']); + assert.deepStrictEqual(errors, []); + + // Verify the value was written + const content = readFileSync(envPath, 'utf8'); + assert.ok(content.includes('SEC_COLLECT_TEST_KEY=super-secret-value')); + + // Verify process.env was hydrated + assert.equal(process.env.SEC_COLLECT_TEST_KEY, 'super-secret-value'); + + // Cleanup + if (savedKey === undefined) delete process.env.SEC_COLLECT_TEST_KEY; + else process.env.SEC_COLLECT_TEST_KEY = savedKey; + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('auto-detects vercel destination from vercel.json', async () => { + const tmp = makeTempDir('sec-collect'); + try { + writeFileSync(join(tmp, 'vercel.json'), '{}'); + const { detectDestination } = await import('./env-writer.js'); + assert.equal(detectDestination(tmp), 'vercel'); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('handles empty form values as skipped', async () => { + // Simulate what happens when user leaves a field empty in the form + const formContent: Record = { + 'API_KEY': 'provided-value', + 'OPTIONAL_KEY': '', // empty = skip + }; + + const provided: Array<{ key: string; value: string }> = []; + const skipped: string[] = []; + + for (const [key, raw] of Object.entries(formContent)) { + const value = typeof raw === 'string' ? raw.trim() : ''; + if (value.length > 0) { + provided.push({ key, value }); + } else { + skipped.push(key); + } + } + + assert.deepStrictEqual(provided, [{ key: 'API_KEY', value: 'provided-value' }]); + assert.deepStrictEqual(skipped, ['OPTIONAL_KEY']); + }); + + it('result text never contains secret values', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + const savedKey = process.env.RESULT_TEXT_TEST; + + const { applySecrets } = await import('./env-writer.js'); + const { applied } = await applySecrets( + [{ key: 'RESULT_TEXT_TEST', value: 'sk-super-secret-abc123' }], + 'dotenv', + { envFilePath: envPath }, + ); + + // Simulate building result text (same logic as the tool handler) + const lines: string[] = [ + 'destination: dotenv (auto-detected)', + ...applied.map((k) => `✓ ${k}: applied`), + ]; + const resultText = lines.join('\n'); + + // The result MUST NOT contain the secret value + assert.ok(!resultText.includes('sk-super-secret-abc123'), 'result text must not contain secret value'); + assert.ok(resultText.includes('RESULT_TEXT_TEST'), 'result text should contain key name'); + + // Cleanup + if (savedKey === undefined) delete process.env.RESULT_TEXT_TEST; + else process.env.RESULT_TEXT_TEST = savedKey; + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it('handles multiple keys with mixed existing/new/skipped', async () => { + const tmp = makeTempDir('sec-collect'); + try { + const envPath = join(tmp, '.env'); + writeFileSync(envPath, 'EXISTING_A=already-here\n'); + const savedB = process.env.NEW_B; + const savedC = process.env.SKIP_C; + + const { checkExistingEnvKeys, applySecrets } = await import('./env-writer.js'); + + const allKeys = ['EXISTING_A', 'NEW_B', 'SKIP_C']; + const existing = await checkExistingEnvKeys(allKeys, envPath); + assert.deepStrictEqual(existing, ['EXISTING_A']); + + // Simulate form response: NEW_B has value, SKIP_C is empty + const formContent = { NEW_B: 'new-value', SKIP_C: '' }; + const provided: Array<{ key: string; value: string }> = []; + const skipped: string[] = []; + + for (const key of allKeys.filter((k) => !existing.includes(k))) { + const raw = formContent[key as keyof typeof formContent] ?? ''; + if (raw.trim().length > 0) provided.push({ key, value: raw.trim() }); + else skipped.push(key); + } + + const { applied, errors } = await applySecrets(provided, 'dotenv', { envFilePath: envPath }); + + assert.deepStrictEqual(applied, ['NEW_B']); + assert.deepStrictEqual(skipped, ['SKIP_C']); + assert.deepStrictEqual(errors, []); + assert.deepStrictEqual(existing, ['EXISTING_A']); + + // Cleanup + if (savedB === undefined) delete process.env.NEW_B; + else process.env.NEW_B = savedB; + if (savedC === undefined) delete process.env.SKIP_C; + else process.env.SKIP_C = savedC; + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/mcp-server/src/server.ts b/packages/mcp-server/src/server.ts index f4f5fe206..ef230c22e 100644 --- a/packages/mcp-server/src/server.ts +++ b/packages/mcp-server/src/server.ts @@ -2,8 +2,9 @@ * MCP Server — registers GSD orchestration, project-state, and workflow tools. * * Session tools (6): gsd_execute, gsd_status, gsd_result, gsd_cancel, gsd_query, gsd_resolve_blocker + * Interactive tools (2): ask_user_questions, secure_env_collect via MCP form elicitation * Read-only tools (6): gsd_progress, gsd_roadmap, gsd_history, gsd_doctor, gsd_captures, gsd_knowledge - * Workflow tools (17): planning, replanning, completion, validation, reassessment, gate result, and milestone status tools + * Workflow tools (29): headless-safe planning, metadata persistence, replanning, completion, validation, reassessment, gate result, status, and journal tools * * Uses dynamic imports for @modelcontextprotocol/sdk because TS Node16 * cannot resolve the SDK's subpath exports statically (same pattern as @@ -21,6 +22,7 @@ import { readCaptures } from './readers/captures.js'; import { readKnowledge } from './readers/knowledge.js'; import { runDoctorLite } from './readers/doctor-lite.js'; import { registerWorkflowTools } from './workflow-tools.js'; +import { applySecrets, checkExistingEnvKeys, detectDestination } from './env-writer.js'; // --------------------------------------------------------------------------- // Constants @@ -44,50 +46,89 @@ function errorContent(message: string): { isError: true; content: Array<{ type: return { isError: true, content: [{ type: 'text' as const, text: message }] }; } +/** Return raw text content without JSON wrapping. */ +function textContent(text: string): { content: Array<{ type: 'text'; text: string }> } { + return { content: [{ type: 'text' as const, text }] }; +} + // --------------------------------------------------------------------------- // gsd_query filesystem reader // --------------------------------------------------------------------------- -async function readProjectState(projectDir: string, _query: string): Promise> { +/** + * Normalized query categories for {@link readProjectState}. + * + * Maps user-supplied query strings (or empty) to the set of fields we return. + * Accepts common synonyms so the MCP client can pass intuitive values. + */ +const QUERY_FIELDS = { + all: ['state', 'project', 'requirements', 'milestones'] as const, + state: ['state'] as const, + status: ['state'] as const, + project: ['project'] as const, + requirements: ['requirements'] as const, + milestones: ['milestones'] as const, +} as const; + +type QueryCategory = keyof typeof QUERY_FIELDS; +type ProjectStateField = (typeof QUERY_FIELDS)[QueryCategory][number]; + +function normalizeQuery(query: string | undefined): QueryCategory { + const key = (query ?? 'all').trim().toLowerCase(); + if (key in QUERY_FIELDS) return key as QueryCategory; + return 'all'; +} + +async function readProjectState(projectDir: string, query: string | undefined): Promise> { const gsdDir = join(resolve(projectDir), '.gsd'); - const result: Record = { projectDir: resolve(projectDir) }; + const category = normalizeQuery(query); + const wanted = new Set(QUERY_FIELDS[category]); - // STATE.md — current execution state - try { - result.state = await readFile(join(gsdDir, 'STATE.md'), 'utf-8'); - } catch { - result.state = null; - } + const result: Record = { + projectDir: resolve(projectDir), + query: category, + }; - // PROJECT.md — project description - try { - result.project = await readFile(join(gsdDir, 'PROJECT.md'), 'utf-8'); - } catch { - result.project = null; - } - - // REQUIREMENTS.md — requirement contract - try { - result.requirements = await readFile(join(gsdDir, 'REQUIREMENTS.md'), 'utf-8'); - } catch { - result.requirements = null; - } - - // List milestones with basic metadata - const milestonesDir = join(gsdDir, 'milestones'); - try { - const entries = await readdir(milestonesDir, { withFileTypes: true }); - const milestones: Array<{ id: string; hasRoadmap: boolean; hasSummary: boolean }> = []; - for (const entry of entries) { - if (!entry.isDirectory()) continue; - const mDir = join(milestonesDir, entry.name); - const hasRoadmap = await fileExists(join(mDir, `${entry.name}-ROADMAP.md`)); - const hasSummary = await fileExists(join(mDir, `${entry.name}-SUMMARY.md`)); - milestones.push({ id: entry.name, hasRoadmap, hasSummary }); + if (wanted.has('state')) { + try { + result.state = await readFile(join(gsdDir, 'STATE.md'), 'utf-8'); + } catch { + result.state = null; + } + } + + if (wanted.has('project')) { + try { + result.project = await readFile(join(gsdDir, 'PROJECT.md'), 'utf-8'); + } catch { + result.project = null; + } + } + + if (wanted.has('requirements')) { + try { + result.requirements = await readFile(join(gsdDir, 'REQUIREMENTS.md'), 'utf-8'); + } catch { + result.requirements = null; + } + } + + if (wanted.has('milestones')) { + const milestonesDir = join(gsdDir, 'milestones'); + try { + const entries = await readdir(milestonesDir, { withFileTypes: true }); + const milestones: Array<{ id: string; hasRoadmap: boolean; hasSummary: boolean }> = []; + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const mDir = join(milestonesDir, entry.name); + const hasRoadmap = await fileExists(join(mDir, `${entry.name}-ROADMAP.md`)); + const hasSummary = await fileExists(join(mDir, `${entry.name}-SUMMARY.md`)); + milestones.push({ id: entry.name, hasRoadmap, hasSummary }); + } + result.milestones = milestones; + } catch { + result.milestones = []; } - result.milestones = milestones; - } catch { - result.milestones = []; } return result; @@ -106,12 +147,189 @@ async function fileExists(path: string): Promise { // MCP Server type — minimal interface for the dynamically-imported McpServer // --------------------------------------------------------------------------- +interface ElicitResult { + action: 'accept' | 'decline' | 'cancel'; + content?: Record; +} + +interface ElicitRequestFormParams { + mode?: 'form'; + message: string; + requestedSchema: { + type: 'object'; + properties: Record>; + required?: string[]; + }; +} + +/** + * Handler extra — the second argument passed by McpServer.tool handlers. + * Contains an AbortSignal scoped to the JSON-RPC request (cancelled when + * the client cancels the `tools/call`) plus other per-request metadata. + * Tools that can actually be stopped mid-flight should honour `signal`. + */ +export interface McpToolExtra { + signal?: AbortSignal; + requestId?: string | number; + sendNotification?: (notification: unknown) => void | Promise; +} + interface McpServerInstance { - tool(name: string, description: string, params: Record, handler: (args: Record) => Promise): unknown; + tool( + name: string, + description: string, + params: Record, + handler: (args: Record, extra?: McpToolExtra) => Promise, + ): unknown; + server: { + elicitInput( + params: AskUserQuestionsElicitRequest | ElicitRequestFormParams, + options?: unknown, + ): Promise; + }; connect(transport: unknown): Promise; close(): Promise; } +interface AskUserQuestionOption { + label: string; + description: string; +} + +interface AskUserQuestion { + id: string; + header: string; + question: string; + options: AskUserQuestionOption[]; + allowMultiple?: boolean; +} + +interface AskUserQuestionsParams { + questions: AskUserQuestion[]; +} + +type AskUserQuestionsContentValue = string | number | boolean | string[]; + +interface AskUserQuestionsElicitResult { + action: 'accept' | 'decline' | 'cancel'; + content?: Record; +} + +interface AskUserQuestionsElicitRequest { + mode: 'form'; + message: string; + requestedSchema: { + type: 'object'; + properties: Record>; + required?: string[]; + }; +} + +const OTHER_OPTION_LABEL = 'None of the above'; + +function normalizeAskUserQuestionsNote(value: AskUserQuestionsContentValue | undefined): string { + return typeof value === 'string' ? value.trim() : ''; +} + +function normalizeAskUserQuestionsAnswers( + value: AskUserQuestionsContentValue | undefined, + allowMultiple: boolean, +): string[] { + if (allowMultiple) { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []; + } + + return typeof value === 'string' && value.length > 0 ? [value] : []; +} + +function validateAskUserQuestionsPayload(questions: AskUserQuestion[]): string | null { + if (questions.length === 0 || questions.length > 3) { + return 'Error: questions must contain 1-3 items'; + } + + for (const question of questions) { + if (!question.options || question.options.length === 0) { + return `Error: ask_user_questions requires non-empty options for every question (question "${question.id}" has none)`; + } + } + + return null; +} + +export function buildAskUserQuestionsElicitRequest(questions: AskUserQuestion[]): AskUserQuestionsElicitRequest { + const properties: Record> = {}; + const required = questions.map((question) => question.id); + + for (const question of questions) { + if (question.allowMultiple) { + properties[question.id] = { + type: 'array', + title: question.header, + description: question.question, + minItems: 1, + maxItems: question.options.length, + items: { + anyOf: question.options.map((option) => ({ + const: option.label, + title: option.label, + })), + }, + }; + continue; + } + + properties[question.id] = { + type: 'string', + title: question.header, + description: question.question, + oneOf: [...question.options, { label: OTHER_OPTION_LABEL, description: 'Choose this when the listed options do not fit.' }].map((option) => ({ + const: option.label, + title: option.label, + })), + }; + + properties[`${question.id}__note`] = { + type: 'string', + title: `${question.header} Note`, + description: `Optional note for "${OTHER_OPTION_LABEL}".`, + maxLength: 500, + }; + } + + return { + mode: 'form', + message: 'Please answer the following question(s). For single-select questions, choose "None of the above" and add a note if the provided options do not fit.', + requestedSchema: { + type: 'object', + properties, + required, + }, + }; +} + +export function formatAskUserQuestionsElicitResult( + questions: AskUserQuestion[], + result: AskUserQuestionsElicitResult, +): string { + const answers: Record = {}; + const content = result.content ?? {}; + + for (const question of questions) { + const answerList = normalizeAskUserQuestionsAnswers(content[question.id], !!question.allowMultiple); + + if (!question.allowMultiple && answerList[0] === OTHER_OPTION_LABEL) { + const note = normalizeAskUserQuestionsNote(content[`${question.id}__note`]); + if (note) { + answerList.push(`user_note: ${note}`); + } + } + + answers[question.id] = { answers: answerList }; + } + + return JSON.stringify({ answers }); +} + // --------------------------------------------------------------------------- // createMcpServer // --------------------------------------------------------------------------- @@ -131,11 +349,16 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ const server: McpServerInstance = new McpServer( { name: SERVER_NAME, version: SERVER_VERSION }, - { capabilities: { tools: {} } }, + { capabilities: { tools: {}, elicitation: {} } }, ); // ----------------------------------------------------------------------- - // gsd_execute — start a new GSD auto-mode session + // gsd_execute — start a new GSD auto-mode session. + // + // If the JSON-RPC request is aborted while the session is starting (or + // immediately after), we cancel the session so we don't leak a background + // RpcClient process. Once the session is running the caller should use + // `gsd_cancel` to stop it via sessionId. // ----------------------------------------------------------------------- server.tool( 'gsd_execute', @@ -146,12 +369,20 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ model: z.string().optional().describe('Model ID override'), bare: z.boolean().optional().describe('Run in bare mode (skip user config)'), }, - async (args: Record) => { + async (args: Record, extra?: McpToolExtra) => { const { projectDir, command, model, bare } = args as { projectDir: string; command?: string; model?: string; bare?: boolean; }; try { const sessionId = await sessionManager.startSession(projectDir, { command, model, bare }); + + // If the client aborted while startSession was running, cancel the + // newly-created session rather than leaving an orphaned process. + if (extra?.signal?.aborted) { + await sessionManager.cancelSession(sessionId).catch(() => { /* swallow */ }); + return errorContent('gsd_execute aborted by client before returning'); + } + return jsonContent({ sessionId, status: 'started' }); } catch (err) { return errorContent(err instanceof Error ? err.message : String(err)); @@ -244,17 +475,25 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ ); // ----------------------------------------------------------------------- - // gsd_query — read project state from filesystem (no session needed) + // gsd_query — read project state from filesystem (no session needed). + // + // `query` is optional: when omitted the tool returns all fields (STATE.md, + // PROJECT.md, requirements, milestone listing). Accepted narrow values: + // "state" / "status", "project", "requirements", "milestones", "all". + // Unknown values fall back to "all" for forward-compatibility. // ----------------------------------------------------------------------- server.tool( 'gsd_query', - 'Query GSD project state from the filesystem. Returns STATE.md, PROJECT.md, requirements, and milestone listing. Does not require an active session.', + 'Query GSD project state from the filesystem. By default returns STATE.md, PROJECT.md, requirements, and milestone listing. Pass `query` to narrow the response (accepted: "state"/"status", "project", "requirements", "milestones", "all"). Does not require an active session.', { projectDir: z.string().describe('Absolute path to the project directory'), - query: z.string().describe('What to query (e.g. "status", "milestones", "requirements")'), + query: z + .enum(['all', 'state', 'status', 'project', 'requirements', 'milestones']) + .optional() + .describe('Narrow the response to a single field (default: "all")'), }, async (args: Record) => { - const { projectDir, query } = args as { projectDir: string; query: string }; + const { projectDir, query } = args as { projectDir: string; query?: string }; try { const state = await readProjectState(projectDir, query); return jsonContent(state); @@ -285,6 +524,160 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{ }, ); + // ----------------------------------------------------------------------- + // ask_user_questions — structured user input via MCP form elicitation + // ----------------------------------------------------------------------- + server.tool( + 'ask_user_questions', + 'Request user input for one to three short questions and wait for the response. Single-select questions include a free-form "None of the above" path. Multi-select questions allow multiple choices.', + { + questions: z.array(z.object({ + id: z.string().describe('Stable identifier for mapping answers (snake_case)'), + header: z.string().describe('Short header label shown in the UI (12 or fewer chars)'), + question: z.string().describe('Single-sentence prompt shown to the user'), + options: z.array(z.object({ + label: z.string().describe('User-facing label (1-5 words)'), + description: z.string().describe('One short sentence explaining impact/tradeoff if selected'), + })).describe('Provide 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with "(Recommended)". Do not include an "Other" option for single-select questions.'), + allowMultiple: z.boolean().optional().describe('If true, the user can select multiple options. No "None of the above" option is added.'), + })).describe('Questions to show the user. Prefer 1 and do not exceed 3.'), + }, + async (args: Record) => { + const { questions } = args as unknown as AskUserQuestionsParams; + try { + const validationError = validateAskUserQuestionsPayload(questions); + if (validationError) return errorContent(validationError); + + const elicitation = await server.server.elicitInput(buildAskUserQuestionsElicitRequest(questions)); + if (elicitation.action !== 'accept' || !elicitation.content) { + return textContent('ask_user_questions was cancelled before receiving a response'); + } + + return textContent(formatAskUserQuestionsElicitResult(questions, elicitation)); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // secure_env_collect — collect secrets via MCP form elicitation + // ----------------------------------------------------------------------- + server.tool( + 'secure_env_collect', + 'Collect environment variables securely via form input. Values are written directly to .env (or Vercel/Convex) and NEVER appear in tool output — only key names and applied/skipped status are returned. Use this instead of asking users to manually edit .env files or paste secrets into chat.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + keys: z.array(z.object({ + key: z.string().describe('Env var name, e.g. OPENAI_API_KEY'), + hint: z.string().optional().describe('Format hint shown to user, e.g. "starts with sk-"'), + guidance: z.array(z.string()).optional().describe('Step-by-step instructions for obtaining this key'), + })).min(1).describe('Environment variables to collect'), + destination: z.enum(['dotenv', 'vercel', 'convex']).optional().describe('Where to write secrets. Auto-detected from project files if omitted.'), + envFilePath: z.string().optional().describe('Path to .env file (dotenv only). Defaults to .env in projectDir.'), + environment: z.enum(['development', 'preview', 'production']).optional().describe('Target environment (vercel/convex only)'), + }, + async (args: Record) => { + const { projectDir, keys, destination, envFilePath, environment } = args as { + projectDir: string; + keys: Array<{ key: string; hint?: string; guidance?: string[] }>; + destination?: 'dotenv' | 'vercel' | 'convex'; + envFilePath?: string; + environment?: 'development' | 'preview' | 'production'; + }; + + try { + const resolvedProjectDir = resolve(projectDir); + const resolvedEnvPath = resolve(resolvedProjectDir, envFilePath ?? '.env'); + + // (1) Check which keys already exist + const allKeyNames = keys.map((k) => k.key); + const existingKeys = await checkExistingEnvKeys(allKeyNames, resolvedEnvPath); + const existingSet = new Set(existingKeys); + const pendingKeys = keys.filter((k) => !existingSet.has(k.key)); + + // If all keys already exist, return immediately + if (pendingKeys.length === 0) { + const lines = existingKeys.map((k) => `• ${k}: already set`); + return textContent(`All ${existingKeys.length} key(s) already set.\n${lines.join('\n')}`); + } + + // (2) Build elicitation form — one string field per pending key + const properties: Record> = {}; + const required: string[] = []; + + for (const item of pendingKeys) { + const descParts: string[] = []; + if (item.hint) descParts.push(`Format: ${item.hint}`); + if (item.guidance && item.guidance.length > 0) { + descParts.push('How to get this:'); + item.guidance.forEach((step, i) => descParts.push(`${i + 1}. ${step}`)); + } + descParts.push('Leave empty to skip.'); + + properties[item.key] = { + type: 'string', + title: item.key, + description: descParts.join('\n'), + }; + // Don't mark as required — empty string = skip + } + + // (3) Elicit input from the MCP client + const elicitation = await server.server.elicitInput({ + message: `Enter values for ${pendingKeys.length} environment variable(s). Values are written directly to the project and never shown to the AI.`, + requestedSchema: { + type: 'object', + properties, + required, + }, + }); + + if (elicitation.action !== 'accept' || !elicitation.content) { + return textContent('secure_env_collect was cancelled by user.'); + } + + // (4) Separate provided vs skipped from form response + const provided: Array<{ key: string; value: string }> = []; + const skipped: string[] = []; + + for (const item of pendingKeys) { + const raw = elicitation.content[item.key]; + const value = typeof raw === 'string' ? raw.trim() : ''; + if (value.length > 0) { + provided.push({ key: item.key, value }); + } else { + skipped.push(item.key); + } + } + + // (5) Auto-detect destination if not specified + const resolvedDestination = destination ?? detectDestination(resolvedProjectDir); + + // (6) Write secrets to destination + const { applied, errors } = await applySecrets(provided, resolvedDestination, { + envFilePath: resolvedEnvPath, + environment, + }); + + // (7) Build result — NEVER include secret values + const lines: string[] = [ + `destination: ${resolvedDestination}${!destination ? ' (auto-detected)' : ''}${environment ? ` (${environment})` : ''}`, + ]; + for (const k of applied) lines.push(`✓ ${k}: applied`); + for (const k of skipped) lines.push(`• ${k}: skipped`); + for (const k of existingKeys) lines.push(`• ${k}: already set`); + for (const e of errors) lines.push(`✗ ${e}`); + + return errors.length > 0 && applied.length === 0 + ? errorContent(lines.join('\n')) + : textContent(lines.join('\n')); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + // ======================================================================= // READ-ONLY TOOLS — no session required, pure filesystem reads // ======================================================================= diff --git a/packages/mcp-server/src/tool-credentials.test.ts b/packages/mcp-server/src/tool-credentials.test.ts new file mode 100644 index 000000000..b6838a29f --- /dev/null +++ b/packages/mcp-server/src/tool-credentials.test.ts @@ -0,0 +1,95 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { loadStoredCredentialEnvKeys, resolveAuthPath } from "./tool-credentials.js"; + +describe("tool credentials", () => { + it("hydrates supported model and tool keys from auth.json", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-")); + const authPath = join(tempRoot, "auth.json"); + const env: NodeJS.ProcessEnv = {}; + + try { + writeFileSync(authPath, JSON.stringify({ + anthropic: { type: "api_key", key: "sk-ant-secret" }, + openai: { type: "api_key", key: "sk-openai-secret" }, + tavily: { type: "api_key", key: "tvly-secret" }, + context7: [{ type: "api_key", key: "ctx7-secret" }], + })); + + const loaded = loadStoredCredentialEnvKeys({ authPath, env }); + assert.deepEqual(loaded.sort(), [ + "ANTHROPIC_API_KEY", + "CONTEXT7_API_KEY", + "OPENAI_API_KEY", + "TAVILY_API_KEY", + ]); + assert.equal(env.ANTHROPIC_API_KEY, "sk-ant-secret"); + assert.equal(env.OPENAI_API_KEY, "sk-openai-secret"); + assert.equal(env.TAVILY_API_KEY, "tvly-secret"); + assert.equal(env.CONTEXT7_API_KEY, "ctx7-secret"); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it("does not overwrite explicit environment variables", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-")); + const authPath = join(tempRoot, "auth.json"); + const env: NodeJS.ProcessEnv = { + BRAVE_API_KEY: "already-set", + }; + + try { + writeFileSync(authPath, JSON.stringify({ + brave: { type: "api_key", key: "from-auth-json" }, + anthropic: { type: "api_key", key: "sk-ant-from-auth-json" }, + })); + + const loaded = loadStoredCredentialEnvKeys({ authPath, env }); + assert.deepEqual(loaded, ["ANTHROPIC_API_KEY"]); + assert.equal(env.BRAVE_API_KEY, "already-set"); + assert.equal(env.ANTHROPIC_API_KEY, "sk-ant-from-auth-json"); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it("ignores oauth credentials because they are resolved through auth storage, not env hydration", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-auth-")); + const authPath = join(tempRoot, "auth.json"); + const env: NodeJS.ProcessEnv = {}; + + try { + writeFileSync(authPath, JSON.stringify({ + openai: { type: "oauth", access: "oauth-access-token" }, + "google-gemini-cli": { type: "oauth", token: "ya29.oauth-token" }, + })); + + const loaded = loadStoredCredentialEnvKeys({ authPath, env }); + assert.deepEqual(loaded, []); + assert.equal(env.OPENAI_API_KEY, undefined); + assert.equal(env.GEMINI_API_KEY, undefined); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it("resolves auth.json from GSD_CODING_AGENT_DIR", () => { + const tempRoot = mkdtempSync(join(tmpdir(), "gsd-mcp-agent-dir-")); + const agentDir = join(tempRoot, "agent"); + mkdirSync(agentDir, { recursive: true }); + + try { + assert.equal( + resolveAuthPath({ GSD_CODING_AGENT_DIR: agentDir }), + join(agentDir, "auth.json"), + ); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/mcp-server/src/tool-credentials.ts b/packages/mcp-server/src/tool-credentials.ts new file mode 100644 index 000000000..d19487437 --- /dev/null +++ b/packages/mcp-server/src/tool-credentials.ts @@ -0,0 +1,97 @@ +import { existsSync, readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; + +type AuthCredential = + | { type?: unknown; key?: unknown } + | Array<{ type?: unknown; key?: unknown }>; + +type AuthStorageData = Record; + +const AUTH_ENV_KEYS = [ + ["anthropic", "ANTHROPIC_API_KEY"], + ["openai", "OPENAI_API_KEY"], + ["github-copilot", "GITHUB_TOKEN"], + ["google", "GEMINI_API_KEY"], + ["groq", "GROQ_API_KEY"], + ["xai", "XAI_API_KEY"], + ["openrouter", "OPENROUTER_API_KEY"], + ["mistral", "MISTRAL_API_KEY"], + ["ollama-cloud", "OLLAMA_API_KEY"], + ["custom-openai", "CUSTOM_OPENAI_API_KEY"], + ["cerebras", "CEREBRAS_API_KEY"], + ["azure-openai-responses", "AZURE_OPENAI_API_KEY"], + ["vercel-ai-gateway", "AI_GATEWAY_API_KEY"], + ["zai", "ZAI_API_KEY"], + ["minimax", "MINIMAX_API_KEY"], + ["minimax-cn", "MINIMAX_CN_API_KEY"], + ["huggingface", "HF_TOKEN"], + ["opencode", "OPENCODE_API_KEY"], + ["opencode-go", "OPENCODE_API_KEY"], + ["kimi-coding", "KIMI_API_KEY"], + ["alibaba-coding-plan", "ALIBABA_API_KEY"], + ["brave", "BRAVE_API_KEY"], + ["brave_answers", "BRAVE_ANSWERS_KEY"], + ["context7", "CONTEXT7_API_KEY"], + ["jina", "JINA_API_KEY"], + ["tavily", "TAVILY_API_KEY"], + ["slack_bot", "SLACK_BOT_TOKEN"], + ["discord_bot", "DISCORD_BOT_TOKEN"], + ["telegram_bot", "TELEGRAM_BOT_TOKEN"], +] as const; + +function expandHome(pathValue: string): string { + if (pathValue === "~") return homedir(); + if (pathValue.startsWith("~/")) return join(homedir(), pathValue.slice(2)); + return pathValue; +} + +function getStoredApiKey(data: AuthStorageData, providerId: string): string | undefined { + const raw = data[providerId]; + const credentials = Array.isArray(raw) ? raw : raw ? [raw] : []; + + for (const credential of credentials) { + if (credential?.type !== "api_key") continue; + if (typeof credential.key !== "string") continue; + if (credential.key.trim().length === 0) continue; + return credential.key; + } + + return undefined; +} + +export function resolveAuthPath(env: NodeJS.ProcessEnv = process.env): string { + const agentDir = env.GSD_CODING_AGENT_DIR?.trim(); + if (agentDir) return join(expandHome(agentDir), "auth.json"); + return join(homedir(), ".gsd", "agent", "auth.json"); +} + +export function loadStoredCredentialEnvKeys(options: { + env?: NodeJS.ProcessEnv; + authPath?: string; +} = {}): string[] { + const env = options.env ?? process.env; + const authPath = options.authPath ?? resolveAuthPath(env); + if (!existsSync(authPath)) return []; + + let parsed: AuthStorageData; + try { + const raw = readFileSync(authPath, "utf-8"); + const data = JSON.parse(raw) as unknown; + if (!data || typeof data !== "object" || Array.isArray(data)) return []; + parsed = data as AuthStorageData; + } catch { + return []; + } + + const loaded: string[] = []; + for (const [providerId, envVar] of AUTH_ENV_KEYS) { + if (env[envVar]) continue; + const key = getStoredApiKey(parsed, providerId); + if (!key) continue; + env[envVar] = key; + loaded.push(envVar); + } + + return loaded; +} diff --git a/packages/mcp-server/src/workflow-tools.test.ts b/packages/mcp-server/src/workflow-tools.test.ts index 35a883b3b..8435203c6 100644 --- a/packages/mcp-server/src/workflow-tools.test.ts +++ b/packages/mcp-server/src/workflow-tools.test.ts @@ -6,7 +6,7 @@ import { tmpdir } from "node:os"; import { randomUUID } from "node:crypto"; import { _getAdapter, closeDatabase } from "../../../src/resources/extensions/gsd/gsd-db.ts"; -import { registerWorkflowTools } from "./workflow-tools.ts"; +import { registerWorkflowTools, WORKFLOW_TOOL_NAMES } from "./workflow-tools.ts"; function makeTmpBase(): string { const base = join(tmpdir(), `gsd-mcp-workflow-${randomUUID()}`); @@ -68,33 +68,12 @@ function makeMockServer() { } describe("workflow MCP tools", () => { - it("registers the seventeen workflow tools", () => { + it("registers the full headless-safe workflow tool surface", () => { const server = makeMockServer(); registerWorkflowTools(server as any); - assert.equal(server.tools.length, 17); - assert.deepEqual( - server.tools.map((t) => t.name), - [ - "gsd_plan_milestone", - "gsd_plan_slice", - "gsd_replan_slice", - "gsd_slice_replan", - "gsd_slice_complete", - "gsd_complete_slice", - "gsd_complete_milestone", - "gsd_milestone_complete", - "gsd_validate_milestone", - "gsd_milestone_validate", - "gsd_reassess_roadmap", - "gsd_roadmap_reassess", - "gsd_save_gate_result", - "gsd_summary_save", - "gsd_task_complete", - "gsd_complete_task", - "gsd_milestone_status", - ], - ); + assert.equal(server.tools.length, WORKFLOW_TOOL_NAMES.length); + assert.deepEqual(server.tools.map((t) => t.name), [...WORKFLOW_TOOL_NAMES]); }); it("gsd_summary_save writes artifact through the shared executor", async () => { @@ -405,6 +384,116 @@ describe("workflow MCP tools", () => { } }); + it("gsd_requirement_save opens the DB before inline requirement writes", async () => { + const base = makeTmpBase(); + try { + const server = makeMockServer(); + registerWorkflowTools(server as any); + const requirementTool = server.tools.find((t) => t.name === "gsd_requirement_save"); + assert.ok(requirementTool, "requirement tool should be registered"); + + closeDatabase(); + + const result = await requirementTool!.handler({ + projectDir: base, + class: "operability", + description: "Inline MCP requirement save regression", + why: "Reproduce missing ensureDbOpen in workflow-tools", + source: "user", + status: "active", + primary_owner: "M010/S10", + validation: "n/a", + }); + + assert.match((result as any).content[0].text as string, /Saved requirement R\d+/); + assert.ok(existsSync(join(base, ".gsd", "REQUIREMENTS.md")), "REQUIREMENTS.md should be written to disk"); + const row = _getAdapter()! + .prepare("SELECT id, class, description FROM requirements WHERE description = ?") + .get("Inline MCP requirement save regression") as Record | undefined; + assert.ok(row, "requirement should be written to the database"); + assert.equal(row["class"], "operability"); + } finally { + cleanup(base); + } + }); + + it("gsd_plan_task reopens the DB before inline task planning writes", async () => { + const base = makeTmpBase(); + try { + const server = makeMockServer(); + registerWorkflowTools(server as any); + const milestoneTool = server.tools.find((t) => t.name === "gsd_plan_milestone"); + const sliceTool = server.tools.find((t) => t.name === "gsd_plan_slice"); + const taskTool = server.tools.find((t) => t.name === "gsd_plan_task"); + assert.ok(milestoneTool, "milestone planning tool should be registered"); + assert.ok(sliceTool, "slice planning tool should be registered"); + assert.ok(taskTool, "task planning tool should be registered"); + + await milestoneTool!.handler({ + projectDir: base, + milestoneId: "M010", + title: "Inline task planning DB reopen", + vision: "Seed a slice, close the DB, then plan another task inline.", + slices: [ + { + sliceId: "S10", + title: "Inline task planning", + risk: "medium", + depends: [], + demo: "Inline gsd_plan_task reopens the DB after it was closed.", + goal: "Preserve MCP task planning after the DB adapter is closed.", + successCriteria: "The second task plan persists after a closed DB is reopened.", + proofLevel: "integration", + integrationClosure: "The inline MCP handler reopens the DB before planning.", + observabilityImpact: "workflow-tools MCP tests cover the inline reopen path.", + }, + ], + }); + await sliceTool!.handler({ + projectDir: base, + milestoneId: "M010", + sliceId: "S10", + goal: "Create the initial slice plan before closing the DB.", + tasks: [ + { + taskId: "T10", + title: "Seed existing task", + description: "Create the initial task plan before closing the DB.", + estimate: "5m", + files: ["packages/mcp-server/src/workflow-tools.ts"], + verify: "node --test", + inputs: ["M010-ROADMAP.md"], + expectedOutput: ["T10-PLAN.md"], + }, + ], + }); + + closeDatabase(); + + const result = await taskTool!.handler({ + projectDir: base, + milestoneId: "M010", + sliceId: "S10", + taskId: "T11", + title: "Reopen and plan", + description: "Exercise the inline plan-task path after the DB was closed.", + estimate: "5m", + files: ["packages/mcp-server/src/workflow-tools.ts"], + verify: "node --test", + inputs: ["M010-ROADMAP.md", "S10-PLAN.md"], + expectedOutput: ["T11-PLAN.md"], + }); + + assert.match((result as any).content[0].text as string, /Planned task T11/); + assert.ok( + existsSync(join(base, ".gsd", "milestones", "M010", "slices", "S10", "tasks", "T11-PLAN.md")), + "T11 plan should be written after reopening the DB", + ); + } finally { + cleanup(base); + } + }); + it("gsd_replan_slice and gsd_slice_replan work end-to-end", async () => { const base = makeTmpBase(); try { @@ -974,3 +1063,31 @@ describe("workflow MCP tools", () => { } }); }); + +describe("URL scheme regex — Windows drive letter safety", () => { + // This is the regex used in getWriteGateModuleCandidates() and + // getWorkflowExecutorModuleCandidates() to reject non-file URL schemes. + // It must NOT match single-letter Windows drive prefixes (C:, D:, etc.). + const urlSchemeRegex = /^[a-z]{2,}:/i; + + it("rejects multi-letter URL schemes", () => { + assert.ok(urlSchemeRegex.test("http://example.com"), "http: should match"); + assert.ok(urlSchemeRegex.test("https://example.com"), "https: should match"); + assert.ok(urlSchemeRegex.test("ftp://files.example.com"), "ftp: should match"); + assert.ok(urlSchemeRegex.test("file:///C:/Users"), "file: should match"); + assert.ok(urlSchemeRegex.test("node:fs"), "node: should match"); + }); + + it("allows single-letter Windows drive prefixes", () => { + assert.ok(!urlSchemeRegex.test("C:\\Users\\user\\project"), "C:\\ should not match"); + assert.ok(!urlSchemeRegex.test("D:\\other\\path"), "D:\\ should not match"); + assert.ok(!urlSchemeRegex.test("c:\\lowercase\\drive"), "c:\\ should not match"); + assert.ok(!urlSchemeRegex.test("E:/forward/slash/path"), "E:/ should not match"); + }); + + it("allows bare filesystem paths", () => { + assert.ok(!urlSchemeRegex.test("/usr/local/lib/module.js"), "unix absolute path should not match"); + assert.ok(!urlSchemeRegex.test("./relative/path.js"), "relative path should not match"); + assert.ok(!urlSchemeRegex.test("../parent/path.js"), "parent relative path should not match"); + }); +}); diff --git a/packages/mcp-server/src/workflow-tools.ts b/packages/mcp-server/src/workflow-tools.ts index 95ea20494..ba6986beb 100644 --- a/packages/mcp-server/src/workflow-tools.ts +++ b/packages/mcp-server/src/workflow-tools.ts @@ -244,6 +244,10 @@ type WorkflowWriteGateModule = { ) => { block: boolean; reason?: string }; }; +type WorkflowDbBootstrapModule = { + ensureDbOpen: (basePath?: string) => Promise; +}; + let workflowToolExecutorsPromise: Promise | null = null; let workflowExecutionQueue: Promise = Promise.resolve(); let workflowWriteGatePromise: Promise | null = null; @@ -318,7 +322,7 @@ function getWriteGateModuleCandidates(): string[] { const candidates: string[] = []; const explicitModule = process.env.GSD_WORKFLOW_WRITE_GATE_MODULE?.trim(); if (explicitModule) { - if (/^[a-z]+:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { + if (/^[a-z]{2,}:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { throw new Error("GSD_WORKFLOW_WRITE_GATE_MODULE only supports file: URLs or filesystem paths."); } candidates.push(explicitModule.startsWith("file:") ? explicitModule : toFileUrl(explicitModule)); @@ -326,6 +330,7 @@ function getWriteGateModuleCandidates(): string[] { candidates.push( new URL("../../../src/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url).href, + new URL("../../../dist/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url).href, new URL("../../../src/resources/extensions/gsd/bootstrap/write-gate.ts", import.meta.url).href, ); @@ -336,11 +341,46 @@ function toFileUrl(modulePath: string): string { return pathToFileURL(resolve(modulePath)).href; } +/** @internal — exported for testing only */ +export function _buildImportCandidates(relativePath: string): string[] { + // Build candidate paths: try the given path first, then swap src/<->dist/ + // and try .ts extension. This handles both dev (tsx from src/) and prod + // (compiled from dist/) execution contexts. + const candidates: string[] = [relativePath]; + const swapped = relativePath.includes("/src/") + ? relativePath.replace("/src/", "/dist/") + : relativePath.includes("/dist/") + ? relativePath.replace("/dist/", "/src/") + : null; + if (swapped) candidates.push(swapped); + // Also try .ts variants for dev-mode tsx execution + if (relativePath.endsWith(".js")) { + candidates.push(relativePath.replace(/\.js$/, ".ts")); + if (swapped) candidates.push(swapped.replace(/\.js$/, ".ts")); + } + return candidates; +} + +async function importLocalModule(relativePath: string): Promise { + const candidates = _buildImportCandidates(relativePath) + .map((p) => new URL(p, import.meta.url).href); + + let lastErr: unknown; + for (const candidate of candidates) { + try { + return await import(candidate) as T; + } catch (err) { + lastErr = err; + } + } + throw lastErr; +} + function getWorkflowExecutorModuleCandidates(env: NodeJS.ProcessEnv = process.env): string[] { const candidates: string[] = []; const explicitModule = env.GSD_WORKFLOW_EXECUTORS_MODULE?.trim(); if (explicitModule) { - if (/^[a-z]+:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { + if (/^[a-z]{2,}:/i.test(explicitModule) && !explicitModule.startsWith("file:")) { throw new Error("GSD_WORKFLOW_EXECUTORS_MODULE only supports file: URLs or filesystem paths."); } candidates.push(explicitModule.startsWith("file:") ? explicitModule : toFileUrl(explicitModule)); @@ -348,6 +388,7 @@ function getWorkflowExecutorModuleCandidates(env: NodeJS.ProcessEnv = process.en candidates.push( new URL("../../../src/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url).href, + new URL("../../../dist/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url).href, new URL("../../../src/resources/extensions/gsd/tools/workflow-tool-executors.ts", import.meta.url).href, ); @@ -420,6 +461,38 @@ interface McpToolServer { ): unknown; } +export const WORKFLOW_TOOL_NAMES = [ + "gsd_decision_save", + "gsd_save_decision", + "gsd_requirement_update", + "gsd_update_requirement", + "gsd_requirement_save", + "gsd_save_requirement", + "gsd_milestone_generate_id", + "gsd_generate_milestone_id", + "gsd_plan_milestone", + "gsd_plan_slice", + "gsd_plan_task", + "gsd_task_plan", + "gsd_replan_slice", + "gsd_slice_replan", + "gsd_slice_complete", + "gsd_complete_slice", + "gsd_skip_slice", + "gsd_complete_milestone", + "gsd_milestone_complete", + "gsd_validate_milestone", + "gsd_milestone_validate", + "gsd_reassess_roadmap", + "gsd_roadmap_reassess", + "gsd_save_gate_result", + "gsd_summary_save", + "gsd_task_complete", + "gsd_complete_task", + "gsd_milestone_status", + "gsd_journal_query", +] as const; + async function runSerializedWorkflowOperation(fn: () => Promise): Promise { // The shared DB adapter and workflow log base path are process-global, so // workflow MCP mutations must not overlap within a single server process. @@ -437,6 +510,22 @@ async function runSerializedWorkflowOperation(fn: () => Promise): Promise< } } +async function runSerializedWorkflowDbOperation( + projectDir: string, + fn: () => Promise, +): Promise { + return runSerializedWorkflowOperation(async () => { + const { ensureDbOpen } = await importLocalModule( + "../../../src/resources/extensions/gsd/bootstrap/dynamic-tools.js", + ); + const dbAvailable = await ensureDbOpen(projectDir); + if (!dbAvailable) { + throw new Error("GSD database is not available"); + } + return fn(); + }); +} + async function enforceWorkflowWriteGate( toolName: string, projectDir: string, @@ -566,6 +655,15 @@ async function handleSaveGateResult( return runSerializedWorkflowOperation(() => executeSaveGateResult(params, projectDir)); } +async function ensureMilestoneDbRow(milestoneId: string): Promise { + try { + const { insertMilestone } = await importLocalModule("../../../src/resources/extensions/gsd/gsd-db.js"); + insertMilestone({ id: milestoneId, status: "queued" }); + } catch { + // Ignore pre-existing rows or transient DB availability issues. + } +} + const projectDirParam = z.string().describe("Absolute path to the project directory within the configured workflow root"); const planMilestoneParams = { @@ -689,7 +787,7 @@ const saveGateResultParams = { projectDir: projectDirParam, milestoneId: z.string().describe("Milestone ID (e.g. M001)"), sliceId: z.string().describe("Slice ID (e.g. S01)"), - gateId: z.enum(["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"]).describe("Gate ID"), + gateId: z.enum(["Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "MV01", "MV02", "MV03", "MV04"]).describe("Gate ID"), taskId: z.string().optional().describe("Task ID for task-scoped gates"), verdict: z.enum(["pass", "flag", "omitted"]).describe("Gate verdict"), rationale: z.string().describe("One-sentence justification"), @@ -772,6 +870,73 @@ const summarySaveParams = { }; const summarySaveSchema = z.object(summarySaveParams); +const decisionSaveParams = { + projectDir: projectDirParam, + scope: z.string().describe("Scope of the decision (e.g. architecture, library, observability)"), + decision: z.string().describe("What is being decided"), + choice: z.string().describe("The choice made"), + rationale: z.string().describe("Why this choice was made"), + revisable: z.string().optional().describe("Whether this can be revisited"), + when_context: z.string().optional().describe("When/context for the decision"), + made_by: z.enum(["human", "agent", "collaborative"]).optional().describe("Who made the decision"), +}; +const decisionSaveSchema = z.object(decisionSaveParams); + +const requirementUpdateParams = { + projectDir: projectDirParam, + id: z.string().describe("Requirement ID (e.g. R001)"), + status: z.string().optional().describe("New status"), + validation: z.string().optional().describe("Validation criteria or proof"), + notes: z.string().optional().describe("Additional notes"), + description: z.string().optional().describe("Updated description"), + primary_owner: z.string().optional().describe("Primary owning slice"), + supporting_slices: z.string().optional().describe("Supporting slices"), +}; +const requirementUpdateSchema = z.object(requirementUpdateParams); + +const requirementSaveParams = { + projectDir: projectDirParam, + class: z.string().describe("Requirement class"), + description: z.string().describe("Short description of the requirement"), + why: z.string().describe("Why this requirement matters"), + source: z.string().describe("Origin of the requirement"), + status: z.string().optional().describe("Requirement status"), + primary_owner: z.string().optional().describe("Primary owning slice"), + supporting_slices: z.string().optional().describe("Supporting slices"), + validation: z.string().optional().describe("Validation criteria"), + notes: z.string().optional().describe("Additional notes"), +}; +const requirementSaveSchema = z.object(requirementSaveParams); + +const milestoneGenerateIdParams = { + projectDir: projectDirParam, +}; +const milestoneGenerateIdSchema = z.object(milestoneGenerateIdParams); + +const planTaskParams = { + projectDir: projectDirParam, + milestoneId: z.string().describe("Milestone ID (e.g. M001)"), + sliceId: z.string().describe("Slice ID (e.g. S01)"), + taskId: z.string().describe("Task ID (e.g. T01)"), + title: z.string().describe("Task title"), + description: z.string().describe("Task description / steps block"), + estimate: z.string().describe("Task estimate"), + files: z.array(z.string()).describe("Files likely touched"), + verify: z.string().describe("Verification command or block"), + inputs: z.array(z.string()).describe("Input files or references"), + expectedOutput: z.array(z.string()).describe("Expected output files or artifacts"), + observabilityImpact: z.string().optional().describe("Task observability impact"), +}; +const planTaskSchema = z.object(planTaskParams); + +const skipSliceParams = { + projectDir: projectDirParam, + sliceId: z.string().describe("Slice ID (e.g. S02)"), + milestoneId: z.string().describe("Milestone ID (e.g. M003)"), + reason: z.string().optional().describe("Reason for skipping this slice"), +}; +const skipSliceSchema = z.object(skipSliceParams); + const taskCompleteParams = { projectDir: projectDirParam, taskId: z.string().describe("Task ID (e.g. T01)"), @@ -803,7 +968,171 @@ const milestoneStatusParams = { }; const milestoneStatusSchema = z.object(milestoneStatusParams); +const journalQueryParams = { + projectDir: projectDirParam, + flowId: z.string().optional().describe("Filter by flow ID"), + unitId: z.string().optional().describe("Filter by unit ID"), + rule: z.string().optional().describe("Filter by rule name"), + eventType: z.string().optional().describe("Filter by event type"), + after: z.string().optional().describe("ISO-8601 lower bound (inclusive)"), + before: z.string().optional().describe("ISO-8601 upper bound (inclusive)"), + limit: z.number().optional().describe("Maximum entries to return"), +}; +const journalQuerySchema = z.object(journalQueryParams); + export function registerWorkflowTools(server: McpToolServer): void { + server.tool( + "gsd_decision_save", + "Record a project decision to the GSD database and regenerate DECISIONS.md.", + decisionSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(decisionSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_decision_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveDecisionToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveDecisionToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved decision ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_save_decision", + "Alias for gsd_decision_save. Record a project decision to the GSD database and regenerate DECISIONS.md.", + decisionSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(decisionSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_decision_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveDecisionToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveDecisionToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved decision ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_requirement_update", + "Update an existing requirement in the GSD database and regenerate REQUIREMENTS.md.", + requirementUpdateParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementUpdateSchema, args); + const { projectDir, id, ...updates } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_update", projectDir); + await runSerializedWorkflowDbOperation(projectDir, async () => { + const { updateRequirementInDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return updateRequirementInDb(id, updates, projectDir); + }); + return { content: [{ type: "text" as const, text: `Updated requirement ${id}` }] }; + }, + ); + + server.tool( + "gsd_update_requirement", + "Alias for gsd_requirement_update. Update an existing requirement in the GSD database and regenerate REQUIREMENTS.md.", + requirementUpdateParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementUpdateSchema, args); + const { projectDir, id, ...updates } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_update", projectDir); + await runSerializedWorkflowDbOperation(projectDir, async () => { + const { updateRequirementInDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return updateRequirementInDb(id, updates, projectDir); + }); + return { content: [{ type: "text" as const, text: `Updated requirement ${id}` }] }; + }, + ); + + server.tool( + "gsd_requirement_save", + "Record a new requirement to the GSD database and regenerate REQUIREMENTS.md.", + requirementSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveRequirementToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveRequirementToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_save_requirement", + "Alias for gsd_requirement_save. Record a new requirement to the GSD database and regenerate REQUIREMENTS.md.", + requirementSaveParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(requirementSaveSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_requirement_save", projectDir); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { saveRequirementToDb } = await importLocalModule("../../../src/resources/extensions/gsd/db-writer.js"); + return saveRequirementToDb(params, projectDir); + }); + return { content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }] }; + }, + ); + + server.tool( + "gsd_milestone_generate_id", + "Generate the next milestone ID for a new GSD milestone.", + milestoneGenerateIdParams, + async (args: Record) => { + const { projectDir } = parseWorkflowArgs(milestoneGenerateIdSchema, args); + await enforceWorkflowWriteGate("gsd_milestone_generate_id", projectDir); + const id = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { + claimReservedId, + findMilestoneIds, + getReservedMilestoneIds, + nextMilestoneId, + } = await importLocalModule("../../../src/resources/extensions/gsd/milestone-ids.js"); + const reserved = claimReservedId(); + if (reserved) { + await ensureMilestoneDbRow(reserved); + return reserved; + } + const allIds = [...new Set([...findMilestoneIds(projectDir), ...getReservedMilestoneIds()])]; + const nextId = nextMilestoneId(allIds); + await ensureMilestoneDbRow(nextId); + return nextId; + }); + return { content: [{ type: "text" as const, text: id }] }; + }, + ); + + server.tool( + "gsd_generate_milestone_id", + "Alias for gsd_milestone_generate_id. Generate the next milestone ID for a new GSD milestone.", + milestoneGenerateIdParams, + async (args: Record) => { + const { projectDir } = parseWorkflowArgs(milestoneGenerateIdSchema, args); + await enforceWorkflowWriteGate("gsd_milestone_generate_id", projectDir); + const id = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { + claimReservedId, + findMilestoneIds, + getReservedMilestoneIds, + nextMilestoneId, + } = await importLocalModule("../../../src/resources/extensions/gsd/milestone-ids.js"); + const reserved = claimReservedId(); + if (reserved) { + await ensureMilestoneDbRow(reserved); + return reserved; + } + const allIds = [...new Set([...findMilestoneIds(projectDir), ...getReservedMilestoneIds()])]; + const nextId = nextMilestoneId(allIds); + await ensureMilestoneDbRow(nextId); + return nextId; + }); + return { content: [{ type: "text" as const, text: id }] }; + }, + ); + server.tool( "gsd_plan_milestone", "Write milestone planning state to the GSD database and render ROADMAP.md from DB.", @@ -830,6 +1159,48 @@ export function registerWorkflowTools(server: McpToolServer): void { }, ); + server.tool( + "gsd_plan_task", + "Write task planning state to the GSD database and render tasks/T##-PLAN.md from DB.", + planTaskParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(planTaskSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_plan_task", projectDir, params.milestoneId); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { handlePlanTask } = await importLocalModule("../../../src/resources/extensions/gsd/tools/plan-task.js"); + return handlePlanTask(params, projectDir); + }); + if ("error" in result) { + throw new Error(result.error); + } + return { + content: [{ type: "text" as const, text: `Planned task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }], + }; + }, + ); + + server.tool( + "gsd_task_plan", + "Alias for gsd_plan_task. Write task planning state to the GSD database and render tasks/T##-PLAN.md from DB.", + planTaskParams, + async (args: Record) => { + const parsed = parseWorkflowArgs(planTaskSchema, args); + const { projectDir, ...params } = parsed; + await enforceWorkflowWriteGate("gsd_plan_task", projectDir, params.milestoneId); + const result = await runSerializedWorkflowDbOperation(projectDir, async () => { + const { handlePlanTask } = await importLocalModule("../../../src/resources/extensions/gsd/tools/plan-task.js"); + return handlePlanTask(params, projectDir); + }); + if ("error" in result) { + throw new Error(result.error); + } + return { + content: [{ type: "text" as const, text: `Planned task ${result.taskId} (${result.sliceId}/${result.milestoneId})` }], + }; + }, + ); + server.tool( "gsd_replan_slice", "Replan a slice after a blocker is discovered, preserving completed tasks and re-rendering PLAN.md + REPLAN.md.", @@ -870,6 +1241,36 @@ export function registerWorkflowTools(server: McpToolServer): void { }, ); + server.tool( + "gsd_skip_slice", + "Mark a slice as skipped so auto-mode advances past it without executing.", + skipSliceParams, + async (args: Record) => { + const { projectDir, milestoneId, sliceId, reason } = parseWorkflowArgs(skipSliceSchema, args); + await enforceWorkflowWriteGate("gsd_skip_slice", projectDir, milestoneId); + await runSerializedWorkflowDbOperation(projectDir, async () => { + const { getSlice, updateSliceStatus } = await importLocalModule("../../../src/resources/extensions/gsd/gsd-db.js"); + const { invalidateStateCache } = await importLocalModule("../../../src/resources/extensions/gsd/state.js"); + const { rebuildState } = await importLocalModule("../../../src/resources/extensions/gsd/doctor.js"); + const slice = getSlice(milestoneId, sliceId); + if (!slice) { + throw new Error(`Slice ${sliceId} not found in milestone ${milestoneId}`); + } + if (slice.status === "complete" || slice.status === "done") { + throw new Error(`Slice ${sliceId} is already complete and cannot be skipped`); + } + if (slice.status !== "skipped") { + updateSliceStatus(milestoneId, sliceId, "skipped"); + invalidateStateCache(); + await rebuildState(projectDir); + } + }); + return { + content: [{ type: "text" as const, text: `Skipped slice ${sliceId} (${milestoneId}). Reason: ${reason ?? "User-directed skip"}.` }], + }; + }, + ); + server.tool( "gsd_complete_milestone", "Record a completed milestone to the GSD database and render its SUMMARY.md.", @@ -994,4 +1395,19 @@ export function registerWorkflowTools(server: McpToolServer): void { return runSerializedWorkflowOperation(() => executeMilestoneStatus({ milestoneId }, projectDir)); }, ); + + server.tool( + "gsd_journal_query", + "Query the structured event journal for auto-mode iterations.", + journalQueryParams, + async (args: Record) => { + const { projectDir, limit, ...filters } = parseWorkflowArgs(journalQuerySchema, args); + const { queryJournal } = await importLocalModule("../../../src/resources/extensions/gsd/journal.js"); + const entries = queryJournal(projectDir, filters).slice(0, limit ?? 100); + if (entries.length === 0) { + return { content: [{ type: "text" as const, text: "No matching journal entries found." }] }; + } + return { content: [{ type: "text" as const, text: JSON.stringify(entries, null, 2) }] }; + }, + ); } diff --git a/packages/pi-agent-core/src/agent.test.ts b/packages/pi-agent-core/src/agent.test.ts index e0b838cd4..4ecd23af2 100644 --- a/packages/pi-agent-core/src/agent.test.ts +++ b/packages/pi-agent-core/src/agent.test.ts @@ -8,6 +8,8 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; +import { Agent } from "./agent.ts"; +import { getModel, type AssistantMessageEventStream } from "@gsd/pi-ai"; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -50,4 +52,84 @@ describe("Agent — activeInferenceModel (#1844 Bug 2)", () => { assert.ok(setLine < abortLine, "activeInferenceModel must be set before streaming infrastructure is created"); }); + + it("getProviderOptions are forwarded into the provider stream call", async () => { + let capturedOptions: Record | undefined; + const agent = new Agent({ + initialState: { + model: getModel("anthropic", "claude-3-5-sonnet-20241022"), + systemPrompt: "test", + tools: [], + }, + getProviderOptions: async () => ({ customRuntimeOption: "present" }), + streamFn: (_model, _context, options): AssistantMessageEventStream => { + capturedOptions = options as Record | undefined; + return { + async *[Symbol.asyncIterator]() { + yield { + type: "start", + partial: { + role: "assistant", + content: [], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-3-5-sonnet-20241022", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }, + }; + yield { + type: "done", + message: { + role: "assistant", + content: [{ type: "text", text: "ok" }], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-3-5-sonnet-20241022", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }, + }; + }, + result: async () => ({ + role: "assistant", + content: [{ type: "text", text: "ok" }], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-3-5-sonnet-20241022", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }), + [Symbol.asyncDispose]: async () => {}, + } as AssistantMessageEventStream; + }, + }); + + await agent.prompt("hello"); + assert.equal(capturedOptions?.customRuntimeOption, "present"); + }); }); diff --git a/packages/pi-agent-core/src/agent.ts b/packages/pi-agent-core/src/agent.ts index e65ae7a35..924dd8d39 100644 --- a/packages/pi-agent-core/src/agent.ts +++ b/packages/pi-agent-core/src/agent.ts @@ -108,6 +108,14 @@ export interface AgentOptions { * switches mid-session are handled correctly. */ externalToolExecution?: (model: Model) => boolean; + + /** + * Optional provider-specific options to merge into the next stream call. + * + * Use this for runtime-only callbacks or handles that should not live in + * shared agent state, such as UI bridges for external CLI providers. + */ + getProviderOptions?: (model: Model) => Record | undefined | Promise | undefined>; } /** @@ -152,6 +160,7 @@ export class Agent { private _beforeToolCall?: AgentLoopConfig["beforeToolCall"]; private _afterToolCall?: AgentLoopConfig["afterToolCall"]; private _externalToolExecution?: (model: Model) => boolean; + private _getProviderOptions?: AgentOptions["getProviderOptions"]; constructor(opts: AgentOptions = {}) { this._state = { ...this._state, ...opts.initialState }; @@ -167,6 +176,7 @@ export class Agent { this._transport = opts.transport ?? "sse"; this._maxRetryDelayMs = opts.maxRetryDelayMs; this._externalToolExecution = opts.externalToolExecution; + this._getProviderOptions = opts.getProviderOptions; } /** @@ -486,8 +496,10 @@ export class Agent { }; let skipInitialSteeringPoll = options?.skipInitialSteeringPoll === true; + const providerOptions = await this._getProviderOptions?.(model); const config: AgentLoopConfig = { + ...(providerOptions ?? {}), model, reasoning, sessionId: this._sessionId, diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts index 1036c4b28..48edd5575 100644 --- a/packages/pi-ai/src/env-api-keys.ts +++ b/packages/pi-ai/src/env-api-keys.ts @@ -137,6 +137,7 @@ export function getEnvApiKey(provider: any): string | undefined { "opencode-go": "OPENCODE_API_KEY", "kimi-coding": "KIMI_API_KEY", "alibaba-coding-plan": "ALIBABA_API_KEY", + "alibaba-dashscope": "DASHSCOPE_API_KEY", ollama: "OLLAMA_API_KEY", "ollama-cloud": "OLLAMA_API_KEY", "custom-openai": "CUSTOM_OPENAI_API_KEY", diff --git a/packages/pi-ai/src/index.ts b/packages/pi-ai/src/index.ts index c8d9e1e8c..8b81cc22e 100644 --- a/packages/pi-ai/src/index.ts +++ b/packages/pi-ai/src/index.ts @@ -12,7 +12,10 @@ export * from "./providers/google-vertex.js"; export * from "./providers/mistral.js"; export * from "./providers/openai-completions.js"; export * from "./providers/openai-responses.js"; +export * from "./providers/provider-capabilities.js"; export * from "./providers/register-builtins.js"; +export type { ProviderSwitchReport } from "./providers/transform-messages.js"; +export { createEmptyReport, hasTransformations, transformMessagesWithReport } from "./providers/transform-messages.js"; export * from "./stream.js"; export * from "./types.js"; export * from "./utils/event-stream.js"; diff --git a/packages/pi-ai/src/models.custom.ts b/packages/pi-ai/src/models.custom.ts index c3cc5ac04..37bccc97a 100644 --- a/packages/pi-ai/src/models.custom.ts +++ b/packages/pi-ai/src/models.custom.ts @@ -170,6 +170,104 @@ export const CUSTOM_MODELS = { } satisfies Model<"openai-completions">, }, + // ─── Alibaba DashScope ─────────────────────────────────────────────── + // Regular DashScope API for users without the Coding Plan. + // Uses the international OpenAI-compatible endpoint. + // Requires DASHSCOPE_API_KEY from: dashscope.console.aliyun.com + // Pricing: https://www.alibabacloud.com/help/en/model-studio/model-pricing + "alibaba-dashscope": { + "qwen3-max": { + id: "qwen3-max", + name: "Qwen3 Max", + api: "openai-completions", + provider: "alibaba-dashscope", + baseUrl: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1.2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 32768, + compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "qwen3.5-plus": { + id: "qwen3.5-plus", + name: "Qwen3.5 Plus", + api: "openai-completions", + provider: "alibaba-dashscope", + baseUrl: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.4, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 65536, + compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "qwen3.5-flash": { + id: "qwen3.5-flash", + name: "Qwen3.5 Flash", + api: "openai-completions", + provider: "alibaba-dashscope", + baseUrl: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.1, + output: 0.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 32768, + compat: { supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "qwen3-coder-plus": { + id: "qwen3-coder-plus", + name: "Qwen3 Coder Plus", + api: "openai-completions", + provider: "alibaba-dashscope", + baseUrl: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1.0, + output: 5.0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 65536, + compat: { supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "qwen3.6-plus": { + id: "qwen3.6-plus", + name: "Qwen3.6 Plus", + api: "openai-completions", + provider: "alibaba-dashscope", + baseUrl: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.5, + output: 3.0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 65536, + compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + }, + // ─── Z.AI (GLM-5.1) ──────────────────────────────────────────────── // GLM-5.1 is the latest GLM model from Zhipu AI, not yet in models.dev. // Uses the Z.AI Coding Plan endpoint (OpenAI-compatible). diff --git a/packages/pi-ai/src/models.generated.test.ts b/packages/pi-ai/src/models.generated.test.ts new file mode 100644 index 000000000..bfba0704d --- /dev/null +++ b/packages/pi-ai/src/models.generated.test.ts @@ -0,0 +1,373 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { MODELS } from "./models.generated.js"; +import { getModel, getModels, getProviders } from "./models.js"; + +// ═══════════════════════════════════════════════════════════════════════════ +// Regression: qwen/qwen3.6-plus missing from OpenRouter (issue #3582) +// ═══════════════════════════════════════════════════════════════════════════ + +describe("regression #3582 — qwen/qwen3.6-plus available via openrouter", () => { + it("qwen/qwen3.6-plus exists in MODELS['openrouter']", () => { + const model = MODELS["openrouter"]["qwen/qwen3.6-plus" as keyof (typeof MODELS)["openrouter"]]; + assert.ok(model, "qwen/qwen3.6-plus must be present in MODELS.openrouter"); + }); + + it("qwen/qwen3.6-plus is accessible via getModel()", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.ok(model, "getModel('openrouter', 'qwen/qwen3.6-plus') must return a model"); + }); + + it("qwen/qwen3.6-plus has id matching its registry key", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.id, "qwen/qwen3.6-plus"); + }); + + it("qwen/qwen3.6-plus has provider set to openrouter", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.provider, "openrouter"); + }); + + it("qwen/qwen3.6-plus has reasoning enabled", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.reasoning, true, "Qwen3.6 Plus is a reasoning model"); + }); + + it("qwen/qwen3.6-plus has 1M context window", () => { + const model = getModel("openrouter", "qwen/qwen3.6-plus" as any); + assert.equal(model.contextWindow, 1_000_000); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Regression: z-ai/glm-5.1 missing from OpenRouter (issue #4069) +// ═══════════════════════════════════════════════════════════════════════════ + +describe("regression #4069 — z-ai/glm-5.1 available via openrouter", () => { + it("z-ai/glm-5.1 exists in MODELS['openrouter']", () => { + const model = MODELS["openrouter"]["z-ai/glm-5.1" as keyof (typeof MODELS)["openrouter"]]; + assert.ok(model, "z-ai/glm-5.1 must be present in MODELS.openrouter"); + }); + + it("z-ai/glm-5.1 is accessible via getModel()", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.ok(model, "getModel('openrouter', 'z-ai/glm-5.1') must return a model"); + }); + + it("z-ai/glm-5.1 has id matching its registry key", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.equal(model.id, "z-ai/glm-5.1"); + }); + + it("z-ai/glm-5.1 has provider set to openrouter", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.equal(model.provider, "openrouter"); + }); + + it("z-ai/glm-5.1 has a positive context window", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.ok(model.contextWindow > 0); + }); + + it("z-ai/glm-5.1 uses the OpenRouter base URL", () => { + const model = getModel("openrouter", "z-ai/glm-5.1" as any); + assert.equal(model.baseUrl, "https://openrouter.ai/api/v1"); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Structural invariants — every model in MODELS must be well-formed +// ═══════════════════════════════════════════════════════════════════════════ + +describe("MODELS structural invariants", () => { + type ModelEntry = { providerKey: string; modelKey: string; model: Record }; + + function allModels(): ModelEntry[] { + const entries: ModelEntry[] = []; + for (const [providerKey, providerModels] of Object.entries(MODELS)) { + for (const [modelKey, model] of Object.entries(providerModels)) { + entries.push({ providerKey, modelKey, model: model as Record }); + } + } + return entries; + } + + it("every model's id field matches its key in MODELS", () => { + const mismatches: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (model["id"] !== modelKey) { + mismatches.push(`${providerKey}/${modelKey}: id="${model["id"]}"`); + } + } + assert.deepEqual(mismatches, [], `Models where 'id' doesn't match registry key:\n ${mismatches.join("\n ")}`); + }); + + it("every model's provider field matches its parent provider key", () => { + const mismatches: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (model["provider"] !== providerKey) { + mismatches.push(`${providerKey}/${modelKey}: provider="${model["provider"]}"`); + } + } + assert.deepEqual(mismatches, [], `Models where 'provider' doesn't match parent key:\n ${mismatches.join("\n ")}`); + }); + + it("every model has a non-empty string name", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (typeof model["name"] !== "string" || model["name"].trim() === "") { + invalid.push(`${providerKey}/${modelKey}`); + } + } + assert.deepEqual(invalid, [], `Models with missing or empty name:\n ${invalid.join("\n ")}`); + }); + + it("every model has a non-empty string api", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (typeof model["api"] !== "string" || model["api"].trim() === "") { + invalid.push(`${providerKey}/${modelKey}`); + } + } + assert.deepEqual(invalid, [], `Models with missing or empty api:\n ${invalid.join("\n ")}`); + }); + + it("every model's baseUrl starts with https:// (or is empty for azure-openai-responses)", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (providerKey === "azure-openai-responses") continue; + const url = model["baseUrl"]; + if (typeof url !== "string" || !url.startsWith("https://")) { + invalid.push(`${providerKey}/${modelKey}: baseUrl="${url}"`); + } + } + assert.deepEqual(invalid, [], `Models with missing or non-HTTPS baseUrl:\n ${invalid.join("\n ")}`); + }); + + it("azure-openai-responses models have an empty baseUrl (runtime-configured)", () => { + const models = getModels("azure-openai-responses"); + assert.ok(models.length > 0, "azure-openai-responses must have at least one model"); + for (const model of models) { + assert.equal(model.baseUrl, "", `azure-openai-responses/${model.id} should have empty baseUrl`); + } + }); + + it("every model has a boolean reasoning field", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (typeof model["reasoning"] !== "boolean") { + invalid.push(`${providerKey}/${modelKey}: reasoning=${model["reasoning"]}`); + } + } + assert.deepEqual(invalid, [], `Models with non-boolean reasoning:\n ${invalid.join("\n ")}`); + }); + + it("every model has a non-empty input array", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + const input = model["input"]; + if (!Array.isArray(input) || input.length === 0) { + invalid.push(`${providerKey}/${modelKey}`); + } + } + assert.deepEqual(invalid, [], `Models with missing or empty input array:\n ${invalid.join("\n ")}`); + }); + + it("every model has a positive contextWindow", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + const cw = model["contextWindow"]; + if (typeof cw !== "number" || cw <= 0 || !Number.isFinite(cw)) { + invalid.push(`${providerKey}/${modelKey}: contextWindow=${cw}`); + } + } + assert.deepEqual(invalid, [], `Models with invalid contextWindow:\n ${invalid.join("\n ")}`); + }); + + it("every model has a positive maxTokens", () => { + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + const mt = model["maxTokens"]; + if (typeof mt !== "number" || mt <= 0 || !Number.isFinite(mt)) { + invalid.push(`${providerKey}/${modelKey}: maxTokens=${mt}`); + } + } + assert.deepEqual(invalid, [], `Models with invalid maxTokens:\n ${invalid.join("\n ")}`); + }); + + it("every model's maxTokens does not exceed contextWindow", () => { + const knownExceptions = new Set([ + "openrouter/meta-llama/llama-3-8b-instruct", + "openrouter/nex-agi/deepseek-v3.1-nex-n1", + "openrouter/openai/gpt-3.5-turbo-0613", + "openrouter/z-ai/glm-5", + ]); + + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (knownExceptions.has(`${providerKey}/${modelKey}`)) continue; + const cw = model["contextWindow"] as number; + const mt = model["maxTokens"] as number; + if (typeof cw === "number" && typeof mt === "number" && mt > cw) { + invalid.push(`${providerKey}/${modelKey}: maxTokens(${mt}) > contextWindow(${cw})`); + } + } + assert.deepEqual(invalid, [], `Models where maxTokens exceeds contextWindow:\n ${invalid.join("\n ")}`); + }); + + it("every model has a cost object with non-negative numeric fields", () => { + const knownNegativeCostModels = new Set([ + "openrouter/openrouter/auto", + ]); + + const invalid: string[] = []; + for (const { providerKey, modelKey, model } of allModels()) { + if (knownNegativeCostModels.has(`${providerKey}/${modelKey}`)) continue; + const cost = model["cost"] as Record | undefined; + if (!cost || typeof cost !== "object") { + invalid.push(`${providerKey}/${modelKey}: missing cost object`); + continue; + } + for (const field of ["input", "output", "cacheRead", "cacheWrite"] as const) { + const val = cost[field]; + if (typeof val !== "number" || val < 0 || !Number.isFinite(val)) { + invalid.push(`${providerKey}/${modelKey}: cost.${field}=${val}`); + } + } + } + assert.deepEqual(invalid, [], `Models with invalid cost fields:\n ${invalid.join("\n ")}`); + }); + + it("no provider has duplicate model IDs", () => { + const duplicates: string[] = []; + for (const [providerKey, providerModels] of Object.entries(MODELS)) { + const ids = Object.values(providerModels).map((m) => (m as Record)["id"] as string); + const seen = new Set(); + for (const id of ids) { + if (seen.has(id)) duplicates.push(`${providerKey}/${id}`); + seen.add(id); + } + } + assert.deepEqual(duplicates, [], `Duplicate model IDs within a provider:\n ${duplicates.join("\n ")}`); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Registry shape +// ═══════════════════════════════════════════════════════════════════════════ + +describe("MODELS registry shape", () => { + it("has exactly 23 providers", () => { + const count = Object.keys(MODELS).length; + assert.equal(count, 23, `Expected 23 providers, got ${count}: ${Object.keys(MODELS).join(", ")}`); + }); + + it("has at least 200 models in total (sanity check)", () => { + let total = 0; + for (const providerModels of Object.values(MODELS)) { + total += Object.keys(providerModels).length; + } + assert.ok(total >= 200, `Registry has only ${total} models — unexpectedly small`); + }); + + it("all 23 expected providers are present", () => { + const expected = [ + "amazon-bedrock", + "anthropic", + "azure-openai-responses", + "cerebras", + "github-copilot", + "google", + "google-antigravity", + "google-gemini-cli", + "google-vertex", + "groq", + "huggingface", + "kimi-coding", + "minimax", + "minimax-cn", + "mistral", + "openai", + "openai-codex", + "opencode", + "opencode-go", + "openrouter", + "vercel-ai-gateway", + "xai", + "zai", + ]; + const actual = Object.keys(MODELS).sort(); + assert.deepEqual(actual, expected.sort()); + }); + + it("getProviders() returns all generated providers", () => { + const providers = getProviders(); + for (const p of Object.keys(MODELS)) { + assert.ok(providers.includes(p as any), `getProviders() missing generated provider: ${p}`); + } + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Removed models must not exist +// ═══════════════════════════════════════════════════════════════════════════ + +describe("removed models are absent from the registry", () => { + const removedModels: Array<{ provider: string; id: string }> = [ + { provider: "openrouter", id: "anthropic/claude-3.5-sonnet" }, + { provider: "openrouter", id: "anthropic/claude-3.5-sonnet-20240620" }, + { provider: "openrouter", id: "mistralai/mistral-small-24b-instruct-2501" }, + { provider: "openrouter", id: "mistralai/mistral-small-3.1-24b-instruct:free" }, + { provider: "openrouter", id: "qwen/qwen3-4b:free" }, + { provider: "openrouter", id: "stepfun/step-3.5-flash:free" }, + { provider: "openrouter", id: "x-ai/grok-4.20-beta" }, + { provider: "openrouter", id: "arcee-ai/trinity-mini:free" }, + { provider: "openrouter", id: "google/gemini-3-pro-preview" }, + { provider: "openrouter", id: "kwaipilot/kat-coder-pro" }, + { provider: "openrouter", id: "meituan/longcat-flash-thinking" }, + { provider: "vercel-ai-gateway", id: "xai/grok-2-vision" }, + { provider: "anthropic", id: "claude-3-7-sonnet-latest" }, + ]; + + for (const { provider, id } of removedModels) { + it(`${provider}/${id} has been removed`, () => { + const model = getModel(provider as any, id as any); + assert.equal(model, undefined, `${provider}/${id} should be removed but is still present`); + }); + } +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Spot-checks for notable models added in this regeneration +// ═══════════════════════════════════════════════════════════════════════════ + +describe("spot-checks for models added in this regeneration", () => { + const newModels: Array<{ provider: string; id: string; reasoning?: boolean }> = [ + { provider: "openrouter", id: "z-ai/glm-5.1" }, + { provider: "openrouter", id: "z-ai/glm-5v-turbo" }, + { provider: "openrouter", id: "google/gemma-4-31b-it" }, + { provider: "openrouter", id: "google/gemma-4-26b-a4b-it" }, + { provider: "openrouter", id: "arcee-ai/trinity-large-thinking", reasoning: true }, + { provider: "openrouter", id: "openai/gpt-audio" }, + { provider: "openrouter", id: "anthropic/claude-opus-4.6-fast" }, + { provider: "openrouter", id: "qwen/qwen3.6-plus" }, + { provider: "groq", id: "groq/compound" }, + { provider: "groq", id: "groq/compound-mini" }, + { provider: "huggingface", id: "zai-org/GLM-5.1" }, + { provider: "openai", id: "gpt-5.3-chat-latest" }, + { provider: "mistral", id: "mistral-small-2603" }, + { provider: "zai", id: "glm-5.1" }, + ]; + + for (const { provider, id, reasoning } of newModels) { + it(`${provider}/${id} is present in the registry`, () => { + const model = getModel(provider as any, id as any); + assert.ok(model, `Expected ${provider}/${id} to be present after regeneration`); + assert.equal(model.id, id); + assert.equal(model.provider, provider); + if (reasoning !== undefined) { + assert.equal(model.reasoning, reasoning, `${id} reasoning should be ${reasoning}`); + } + }); + } +}); diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index e62965533..cb775bf68 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -804,6 +804,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"bedrock-converse-stream">, + "minimax.minimax-m2.5": { + id: "minimax.minimax-m2.5", + name: "MiniMax M2.5", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 196608, + maxTokens: 98304, + } satisfies Model<"bedrock-converse-stream">, "mistral.devstral-2-123b": { id: "mistral.devstral-2-123b", name: "Devstral 2 123B", @@ -1042,6 +1059,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "nvidia.nemotron-super-3-120b": { + id: "nvidia.nemotron-super-3-120b", + name: "NVIDIA Nemotron 3 Super 120B A12B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.15, + output: 0.65, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"bedrock-converse-stream">, "openai.gpt-oss-120b-1:0": { id: "openai.gpt-oss-120b-1:0", name: "gpt-oss-120b", @@ -1178,6 +1212,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"bedrock-converse-stream">, + "qwen.qwen3-coder-next": { + id: "qwen.qwen3-coder-next", + name: "Qwen3 Coder Next", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.22, + output: 1.8, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 65536, + } satisfies Model<"bedrock-converse-stream">, "qwen.qwen3-next-80b-a3b": { id: "qwen.qwen3-next-80b-a3b", name: "Qwen/Qwen3-Next-80B-A3B-Instruct", @@ -1416,6 +1467,23 @@ export const MODELS = { contextWindow: 200000, maxTokens: 131072, } satisfies Model<"bedrock-converse-stream">, + "zai.glm-5": { + id: "zai.glm-5", + name: "GLM-5", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 101376, + } satisfies Model<"bedrock-converse-stream">, }, "anthropic": { "claude-3-5-haiku-20241022": { @@ -1503,23 +1571,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, - "claude-3-7-sonnet-latest": { - id: "claude-3-7-sonnet-latest", - name: "Claude Sonnet 3.7 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, "claude-3-haiku-20240307": { id: "claude-3-haiku-20240307", name: "Claude Haiku 3", @@ -2253,6 +2304,23 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"azure-openai-responses">, + "gpt-5.3-chat-latest": { + id: "gpt-5.3-chat-latest", + name: "GPT-5.3 Chat (latest)", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, "gpt-5.3-codex": { id: "gpt-5.3-codex", name: "GPT-5.3 Codex", @@ -2967,7 +3035,7 @@ export const MODELS = { } satisfies Model<"openai-responses">, "gpt-5.4-mini": { id: "gpt-5.4-mini", - name: "GPT-5.4 mini", + name: "GPT-5.4 Mini", api: "openai-responses", provider: "github-copilot", baseUrl: "https://api.individual.githubcopilot.com", @@ -3412,6 +3480,57 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"google-generative-ai">, + "gemma-3-27b-it": { + id: "gemma-3-27b-it", + name: "Gemma 3 27B", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, + "gemma-4-26b-it": { + id: "gemma-4-26b-it", + name: "Gemma 4 26B", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, + "gemma-4-31b-it": { + id: "gemma-4-31b-it", + name: "Gemma 4 31B", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, }, "google-antigravity": { "claude-opus-4-5-thinking": { @@ -3913,6 +4032,40 @@ export const MODELS = { contextWindow: 8192, maxTokens: 8192, } satisfies Model<"openai-completions">, + "groq/compound": { + id: "groq/compound", + name: "Compound", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "groq/compound-mini": { + id: "groq/compound-mini", + name: "Compound Mini", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, "llama-3.1-8b-instant": { id: "llama-3.1-8b-instant", name: "Llama 3.1 8B Instant", @@ -4100,6 +4253,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"openai-completions">, + "openai/gpt-oss-safeguard-20b": { + id: "openai/gpt-oss-safeguard-20b", + name: "Safety GPT OSS 20B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.075, + output: 0.3, + cacheRead: 0.037, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 65536, + } satisfies Model<"openai-completions">, "qwen-qwq-32b": { id: "qwen-qwq-32b", name: "Qwen QwQ 32B", @@ -4132,7 +4302,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 16384, + maxTokens: 40960, } satisfies Model<"openai-completions">, }, "huggingface": { @@ -4460,6 +4630,24 @@ export const MODELS = { contextWindow: 202752, maxTokens: 131072, } satisfies Model<"openai-completions">, + "zai-org/GLM-5.1": { + id: "zai-org/GLM-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3.2, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, "kimi-coding": { "k2p5": { @@ -5029,22 +5217,39 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"mistral-conversations">, + "mistral-small-2603": { + id: "mistral-small-2603", + name: "Mistral Small 4", + api: "mistral-conversations", + provider: "mistral", + baseUrl: "https://api.mistral.ai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 256000, + } satisfies Model<"mistral-conversations">, "mistral-small-latest": { id: "mistral-small-latest", name: "Mistral Small (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", - reasoning: false, + reasoning: true, input: ["text", "image"], cost: { - input: 0.1, - output: 0.3, + input: 0.15, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 16384, + contextWindow: 256000, + maxTokens: 256000, } satisfies Model<"mistral-conversations">, "open-mistral-7b": { id: "open-mistral-7b", @@ -5575,6 +5780,23 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.3-chat-latest": { + id: "gpt-5.3-chat-latest", + name: "GPT-5.3 Chat (latest)", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-responses">, "gpt-5.3-codex": { id: "gpt-5.3-codex", name: "GPT-5.3 Codex", @@ -6157,6 +6379,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-5.1": { + id: "glm-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "gpt-5": { id: "gpt-5", name: "GPT-5", @@ -6412,40 +6651,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, - "mimo-v2-omni-free": { - id: "mimo-v2-omni-free", - name: "MiMo V2 Omni Free", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "mimo-v2-pro-free": { - id: "mimo-v2-pro-free", - name: "MiMo V2 Pro Free", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 64000, - } satisfies Model<"openai-completions">, "minimax-m2.5": { id: "minimax-m2.5", name: "MiniMax M2.5", @@ -6494,7 +6699,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 1000000, + contextWindow: 204800, maxTokens: 128000, } satisfies Model<"openai-completions">, }, @@ -6516,6 +6721,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-5.1": { + id: "glm-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "kimi-k2.5": { id: "kimi-k2.5", name: "Kimi K2.5", @@ -6533,6 +6755,40 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, + "mimo-v2-omni": { + id: "mimo-v2-omni", + name: "MiMo V2 Omni", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0.08, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "mimo-v2-pro": { + id: "mimo-v2-pro", + name: "MiMo V2 Pro", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 64000, + } satisfies Model<"openai-completions">, "minimax-m2.5": { id: "minimax-m2.5", name: "MiniMax M2.5", @@ -6739,23 +6995,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 8192, } satisfies Model<"openai-completions">, - "anthropic/claude-3.5-sonnet": { - id: "anthropic/claude-3.5-sonnet", - name: "Anthropic: Claude 3.5 Sonnet", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 6, - output: 30, - cacheRead: 0.6, - cacheWrite: 7.5, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"openai-completions">, "anthropic/claude-3.7-sonnet": { id: "anthropic/claude-3.7-sonnet", name: "Anthropic: Claude 3.7 Sonnet", @@ -6771,7 +7010,7 @@ export const MODELS = { cacheWrite: 3.75, }, contextWindow: 200000, - maxTokens: 64000, + maxTokens: 128000, } satisfies Model<"openai-completions">, "anthropic/claude-3.7-sonnet:thinking": { id: "anthropic/claude-3.7-sonnet:thinking", @@ -6875,6 +7114,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "anthropic/claude-opus-4.6-fast": { + id: "anthropic/claude-opus-4.6-fast", + name: "Anthropic: Claude Opus 4.6 (Fast)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 30, + output: 150, + cacheRead: 3, + cacheWrite: 37.5, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, "anthropic/claude-sonnet-4": { id: "anthropic/claude-sonnet-4", name: "Anthropic: Claude Sonnet 4", @@ -6889,7 +7145,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"openai-completions">, "anthropic/claude-sonnet-4.5": { @@ -6943,6 +7199,23 @@ export const MODELS = { contextWindow: 131000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "arcee-ai/trinity-large-thinking": { + id: "arcee-ai/trinity-large-thinking", + name: "Arcee AI: Trinity Large Thinking", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.22, + output: 0.85, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, "arcee-ai/trinity-mini": { id: "arcee-ai/trinity-mini", name: "Arcee AI: Trinity Mini", @@ -6960,23 +7233,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, - "arcee-ai/trinity-mini:free": { - id: "arcee-ai/trinity-mini:free", - name: "Arcee AI: Trinity Mini (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "arcee-ai/virtuoso-large": { id: "arcee-ai/virtuoso-large", name: "Arcee AI: Virtuoso Large", @@ -7224,13 +7480,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.44999999999999996, + input: 0.5, output: 2.1500000000000004, - cacheRead: 0.22499999999999998, + cacheRead: 0.35, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 65536, + maxTokens: 4096, } satisfies Model<"openai-completions">, "deepseek/deepseek-v3.1-terminus": { id: "deepseek/deepseek-v3.1-terminus", @@ -7383,7 +7639,7 @@ export const MODELS = { cacheWrite: 0.08333333333333334, }, contextWindow: 1048576, - maxTokens: 65536, + maxTokens: 65535, } satisfies Model<"openai-completions">, "google/gemini-2.5-pro": { id: "google/gemini-2.5-pro", @@ -7453,23 +7709,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"openai-completions">, - "google/gemini-3-pro-preview": { - id: "google/gemini-3-pro-preview", - name: "Google: Gemini 3 Pro Preview", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 12, - cacheRead: 0.19999999999999998, - cacheWrite: 0.375, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, "google/gemini-3.1-flash-lite-preview": { id: "google/gemini-3.1-flash-lite-preview", name: "Google: Gemini 3.1 Flash Lite Preview", @@ -7521,6 +7760,74 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"openai-completions">, + "google/gemma-4-26b-a4b-it": { + id: "google/gemma-4-26b-a4b-it", + name: "Google: Gemma 4 26B A4B ", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.12, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "google/gemma-4-26b-a4b-it:free": { + id: "google/gemma-4-26b-a4b-it:free", + name: "Google: Gemma 4 26B A4B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "google/gemma-4-31b-it": { + id: "google/gemma-4-31b-it", + name: "Google: Gemma 4 31B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.14, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "google/gemma-4-31b-it:free": { + id: "google/gemma-4-31b-it:free", + name: "Google: Gemma 4 31B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"openai-completions">, "inception/mercury": { id: "inception/mercury", name: "Inception: Mercury", @@ -7572,22 +7879,22 @@ export const MODELS = { contextWindow: 128000, maxTokens: 32000, } satisfies Model<"openai-completions">, - "kwaipilot/kat-coder-pro": { - id: "kwaipilot/kat-coder-pro", - name: "Kwaipilot: KAT-Coder-Pro V1", + "kwaipilot/kat-coder-pro-v2": { + id: "kwaipilot/kat-coder-pro-v2", + name: "Kwaipilot: KAT-Coder-Pro V2", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: false, input: ["text"], cost: { - input: 0.207, - output: 0.828, - cacheRead: 0.0414, + input: 0.3, + output: 1.2, + cacheRead: 0.06, cacheWrite: 0, }, contextWindow: 256000, - maxTokens: 128000, + maxTokens: 80000, } satisfies Model<"openai-completions">, "meituan/longcat-flash-chat": { id: "meituan/longcat-flash-chat", @@ -7768,13 +8075,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.27, + input: 0.29, output: 0.95, - cacheRead: 0.0290000007, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 196608, - maxTokens: 4096, + maxTokens: 196608, } satisfies Model<"openai-completions">, "minimax/minimax-m2.5": { id: "minimax/minimax-m2.5", @@ -7785,9 +8092,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.19999999999999998, - output: 1.17, - cacheRead: 0.09999999999999999, + input: 0.118, + output: 0.9900000000000001, + cacheRead: 0.059, cacheWrite: 0, }, contextWindow: 196608, @@ -7808,7 +8115,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 196608, - maxTokens: 196608, + maxTokens: 8192, } satisfies Model<"openai-completions">, "minimax/minimax-m2.7": { id: "minimax/minimax-m2.7", @@ -7821,11 +8128,11 @@ export const MODELS = { cost: { input: 0.3, output: 1.2, - cacheRead: 0.06, + cacheRead: 0.059, cacheWrite: 0, }, - contextWindow: 204800, - maxTokens: 131072, + contextWindow: 196608, + maxTokens: 4096, } satisfies Model<"openai-completions">, "mistralai/codestral-2508": { id: "mistralai/codestral-2508", @@ -8082,23 +8389,6 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-small-24b-instruct-2501": { - id: "mistralai/mistral-small-24b-instruct-2501", - name: "Mistral: Mistral Small 3", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.049999999999999996, - output: 0.08, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "mistralai/mistral-small-2603": { id: "mistralai/mistral-small-2603", name: "Mistral: Mistral Small 4", @@ -8116,23 +8406,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-small-3.1-24b-instruct:free": { - id: "mistralai/mistral-small-3.1-24b-instruct:free", - name: "Mistral: Mistral Small 3.1 24B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/mistral-small-3.2-24b-instruct": { id: "mistralai/mistral-small-3.2-24b-instruct", name: "Mistral: Mistral Small 3.2 24B", @@ -8244,13 +8517,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.55, - output: 2.2, + input: 0.5700000000000001, + output: 2.3, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131000, - maxTokens: 4096, + contextWindow: 131072, + maxTokens: 131072, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-0905": { id: "moonshotai/kimi-k2-0905", @@ -8263,11 +8536,11 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0.15, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 4096, + contextWindow: 262144, + maxTokens: 262144, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", @@ -8278,12 +8551,12 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.47, - output: 2, - cacheRead: 0.14100000000000001, + input: 0.6, + output: 2.5, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, "moonshotai/kimi-k2.5": { @@ -8312,8 +8585,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.27, - output: 1, + input: 0.135, + output: 0.5, cacheRead: 0, cacheWrite: 0, }, @@ -8399,7 +8672,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.5, - cacheRead: 0.04, + cacheRead: 0.09999999999999999, cacheWrite: 0, }, contextWindow: 262144, @@ -8624,7 +8897,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 1047576, - maxTokens: 32768, + maxTokens: 4096, } satisfies Model<"openai-completions">, "openai/gpt-4.1-mini": { id: "openai/gpt-4.1-mini", @@ -8671,7 +8944,7 @@ export const MODELS = { cost: { input: 2.5, output: 10, - cacheRead: 1.25, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 128000, @@ -8892,11 +9165,11 @@ export const MODELS = { cost: { input: 0.049999999999999996, output: 0.39999999999999997, - cacheRead: 0.005, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 400000, - maxTokens: 128000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "openai/gpt-5-pro": { id: "openai/gpt-5-pro", @@ -8926,7 +9199,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.125, + cacheRead: 0.13, cacheWrite: 0, }, contextWindow: 400000, @@ -8994,11 +9267,11 @@ export const MODELS = { cost: { input: 0.25, output: 2, - cacheRead: 0.024999999999999998, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 400000, - maxTokens: 100000, + maxTokens: 128000, } satisfies Model<"openai-completions">, "openai/gpt-5.2": { id: "openai/gpt-5.2", @@ -9032,7 +9305,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 128000, - maxTokens: 16384, + maxTokens: 32000, } satisfies Model<"openai-completions">, "openai/gpt-5.2-codex": { id: "openai/gpt-5.2-codex", @@ -9170,6 +9443,40 @@ export const MODELS = { contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "openai/gpt-audio": { + id: "openai/gpt-audio", + name: "OpenAI: GPT Audio", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2.5, + output: 10, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-audio-mini": { + id: "openai/gpt-audio-mini", + name: "OpenAI: GPT Audio Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "openai/gpt-oss-120b": { id: "openai/gpt-oss-120b", name: "OpenAI: gpt-oss-120b", @@ -9214,12 +9521,12 @@ export const MODELS = { input: ["text"], cost: { input: 0.03, - output: 0.11, - cacheRead: 0.015, + output: 0.14, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, + maxTokens: 4096, } satisfies Model<"openai-completions">, "openai/gpt-oss-20b:free": { id: "openai/gpt-oss-20b:free", @@ -9236,7 +9543,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, + maxTokens: 8192, } satisfies Model<"openai-completions">, "openai/gpt-oss-safeguard-20b": { id: "openai/gpt-oss-safeguard-20b", @@ -9491,7 +9798,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 32768, - maxTokens: 4096, + maxTokens: 32768, } satisfies Model<"openai-completions">, "qwen/qwen-max": { id: "qwen/qwen-max", @@ -9731,23 +10038,6 @@ export const MODELS = { contextWindow: 40960, maxTokens: 40960, } satisfies Model<"openai-completions">, - "qwen/qwen3-4b:free": { - id: "qwen/qwen3-4b:free", - name: "Qwen: Qwen3 4B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 40960, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "qwen/qwen3-8b": { id: "qwen/qwen3-8b", name: "Qwen: Qwen3 8B", @@ -9825,13 +10115,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.12, - output: 0.75, - cacheRead: 0.06, + input: 0.15, + output: 0.7999999999999999, + cacheRead: 0.12, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 65536, + maxTokens: 262144, } satisfies Model<"openai-completions">, "qwen/qwen3-coder-plus": { id: "qwen/qwen3-coder-plus", @@ -10154,7 +10444,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 256000, - maxTokens: 4096, + maxTokens: 32768, } satisfies Model<"openai-completions">, "qwen/qwen3.5-flash-02-23": { id: "qwen/qwen3.5-flash-02-23", @@ -10190,6 +10480,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 65536, } satisfies Model<"openai-completions">, + "qwen/qwen3.6-plus": { + id: "qwen/qwen3.6-plus", + name: "Qwen: Qwen3.6 Plus", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.325, + output: 1.95, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 65536, + } satisfies Model<"openai-completions">, "qwen/qwq-32b": { id: "qwen/qwq-32b", name: "Qwen: QwQ 32B", @@ -10207,6 +10514,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, + "rekaai/reka-edge": { + id: "rekaai/reka-edge", + name: "Reka Edge", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 16384, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "relace/relace-search": { id: "relace/relace-search", name: "Relace: Relace Search", @@ -10269,28 +10593,11 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0.02, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 256000, - } satisfies Model<"openai-completions">, - "stepfun/step-3.5-flash:free": { - id: "stepfun/step-3.5-flash:free", - name: "StepFun: Step 3.5 Flash (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 256000, + contextWindow: 262144, + maxTokens: 65536, } satisfies Model<"openai-completions">, "thedrummer/rocinante-12b": { id: "thedrummer/rocinante-12b", @@ -10479,9 +10786,9 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"openai-completions">, - "x-ai/grok-4.20-beta": { - id: "x-ai/grok-4.20-beta", - name: "xAI: Grok 4.20 Beta", + "x-ai/grok-4.20": { + id: "x-ai/grok-4.20", + name: "xAI: Grok 4.20", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -10743,9 +11050,43 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.96, - output: 3.1999999999999997, - cacheRead: 0.192, + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "z-ai/glm-5.1": { + id: "z-ai/glm-5.1", + name: "Z.ai: GLM 5.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.95, + output: 3.15, + cacheRead: 0.475, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 65535, + } satisfies Model<"openai-completions">, + "z-ai/glm-5v-turbo": { + id: "z-ai/glm-5v-turbo", + name: "Z.ai: GLM 5V Turbo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, cacheWrite: 0, }, contextWindow: 202752, @@ -10772,20 +11113,20 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "alibaba/qwen-3-235b": { id: "alibaba/qwen-3-235b", - name: "Qwen3-235B-A22B", + name: "Qwen3 235B A22b Instruct 2507", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: false, input: ["text"], cost: { - input: 0.071, - output: 0.463, - cacheRead: 0, + input: 0.6, + output: 1.2, + cacheRead: 0.6, cacheWrite: 0, }, - contextWindow: 40960, - maxTokens: 16384, + contextWindow: 131000, + maxTokens: 40000, } satisfies Model<"anthropic-messages">, "alibaba/qwen-3-30b": { id: "alibaba/qwen-3-30b", @@ -10813,13 +11154,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.29, - output: 0.59, - cacheRead: 0.145, + input: 0.16, + output: 0.64, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 40960, + contextWindow: 128000, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-235b-a22b-thinking": { id: "alibaba/qwen3-235b-a22b-thinking", @@ -10847,13 +11188,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.39999999999999997, - output: 1.5999999999999999, - cacheRead: 0.022, + input: 1.5, + output: 7.5, + cacheRead: 0.3, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 66536, + maxTokens: 65536, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-coder-30b-a3b": { id: "alibaba/qwen3-coder-30b-a3b", @@ -10966,13 +11307,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.22, - output: 0.88, + input: 0.39999999999999997, + output: 4, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 256000, + contextWindow: 131072, + maxTokens: 32768, } satisfies Model<"anthropic-messages">, "alibaba/qwen3.5-flash": { id: "alibaba/qwen3.5-flash", @@ -11008,6 +11349,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "alibaba/qwen3.6-plus": { + id: "alibaba/qwen3.6-plus", + name: "Qwen 3.6 Plus", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 3, + cacheRead: 0.09999999999999999, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, "anthropic/claude-3-haiku": { id: "anthropic/claude-3-haiku", name: "Claude 3 Haiku", @@ -11042,40 +11400,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 8192, } satisfies Model<"anthropic-messages">, - "anthropic/claude-3.5-sonnet": { - id: "anthropic/claude-3.5-sonnet", - name: "Claude 3.5 Sonnet", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, - "anthropic/claude-3.5-sonnet-20240620": { - id: "anthropic/claude-3.5-sonnet-20240620", - name: "Claude 3.5 Sonnet (2024-06-20)", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, "anthropic/claude-3.7-sonnet": { id: "anthropic/claude-3.7-sonnet", name: "Claude 3.7 Sonnet", @@ -11246,6 +11570,23 @@ export const MODELS = { contextWindow: 131000, maxTokens: 131000, } satisfies Model<"anthropic-messages">, + "arcee-ai/trinity-large-thinking": { + id: "arcee-ai/trinity-large-thinking", + name: "Trinity Large Thinking", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 0.25, + output: 0.8999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262100, + maxTokens: 80000, + } satisfies Model<"anthropic-messages">, "bytedance/seed-1.6": { id: "bytedance/seed-1.6", name: "Seed 1.6", @@ -11323,13 +11664,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, + input: 0.56, + output: 1.68, + cacheRead: 0.28, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 16384, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "deepseek/deepseek-v3.1-terminus": { id: "deepseek/deepseek-v3.1-terminus", @@ -11512,7 +11853,7 @@ export const MODELS = { cost: { input: 0.25, output: 1.5, - cacheRead: 0, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 1000000, @@ -11535,6 +11876,40 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "google/gemma-4-26b-a4b-it": { + id: "google/gemma-4-26b-a4b-it", + name: "Gemma 4 26B A4B IT", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.13, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "google/gemma-4-31b-it": { + id: "google/gemma-4-31b-it", + name: "Gemma 4 31B IT", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.14, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, "inception/mercury-2": { id: "inception/mercury-2", name: "Mercury 2", @@ -11569,6 +11944,23 @@ export const MODELS = { contextWindow: 32000, maxTokens: 16384, } satisfies Model<"anthropic-messages">, + "kwaipilot/kat-coder-pro-v2": { + id: "kwaipilot/kat-coder-pro-v2", + name: "Kat Coder Pro V2", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 256000, + } satisfies Model<"anthropic-messages">, "meituan/longcat-flash-chat": { id: "meituan/longcat-flash-chat", name: "LongCat Flash Chat", @@ -11586,23 +11978,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 100000, } satisfies Model<"anthropic-messages">, - "meituan/longcat-flash-thinking": { - id: "meituan/longcat-flash-thinking", - name: "LongCat Flash Thinking", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text"], - cost: { - input: 0.15, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, "meta/llama-3.1-70b": { id: "meta/llama-3.1-70b", name: "Llama 3.1 70B Instruct", @@ -11629,13 +12004,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0.09999999999999999, + input: 0.22, + output: 0.22, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 128000, - maxTokens: 16384, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "meta/llama-3.2-11b": { id: "meta/llama-3.2-11b", @@ -12013,20 +12388,20 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2": { id: "moonshotai/kimi-k2", - name: "Kimi K2", + name: "Kimi K2 Instruct", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: false, input: ["text"], cost: { - input: 0.6, - output: 2.5, - cacheRead: 0.15, + input: 0.5700000000000001, + output: 2.3, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 16384, + maxTokens: 131072, } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-0905": { id: "moonshotai/kimi-k2-0905", @@ -12039,11 +12414,11 @@ export const MODELS = { cost: { input: 0.6, output: 2.5, - cacheRead: 0.15, + cacheRead: 0.3, cacheWrite: 0, }, contextWindow: 256000, - maxTokens: 16384, + maxTokens: 128000, } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", @@ -12615,12 +12990,12 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.07, - output: 0.3, + input: 0.049999999999999996, + output: 0.19999999999999998, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 131072, maxTokens: 8192, } satisfies Model<"anthropic-messages">, "openai/gpt-oss-safeguard-20b": { @@ -12751,8 +13126,8 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 1, - output: 1, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, @@ -12768,8 +13143,8 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 3, - output: 15, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, @@ -12793,23 +13168,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"anthropic-messages">, - "xai/grok-2-vision": { - id: "xai/grok-2-vision", - name: "Grok 2 Vision", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 10, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 32768, - } satisfies Model<"anthropic-messages">, "xai/grok-3": { id: "xai/grok-3", name: "Grok 3 Beta", @@ -12963,6 +13321,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-multi-agent": { + id: "xai/grok-4.20-multi-agent", + name: "Grok 4.20 Multi-Agent", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-4.20-multi-agent-beta": { id: "xai/grok-4.20-multi-agent-beta", name: "Grok 4.20 Multi Agent Beta", @@ -12980,6 +13355,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 2000000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-non-reasoning": { + id: "xai/grok-4.20-non-reasoning", + name: "Grok 4.20 Non-Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-4.20-non-reasoning-beta": { id: "xai/grok-4.20-non-reasoning-beta", name: "Grok 4.20 Beta Non-Reasoning", @@ -12997,6 +13389,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 2000000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-reasoning": { + id: "xai/grok-4.20-reasoning", + name: "Grok 4.20 Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-4.20-reasoning-beta": { id: "xai/grok-4.20-reasoning-beta", name: "Grok 4.20 Beta Reasoning", @@ -13040,9 +13449,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.3, - cacheRead: 0.02, + input: 0.09, + output: 0.29, + cacheRead: 0.045, cacheWrite: 0, }, contextWindow: 262144, @@ -13176,13 +13585,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.6, - output: 2.2, - cacheRead: 0, + input: 2.25, + output: 2.75, + cacheRead: 2.25, cacheWrite: 0, }, - contextWindow: 200000, - maxTokens: 120000, + contextWindow: 131000, + maxTokens: 40000, } satisfies Model<"anthropic-messages">, "zai/glm-4.7-flash": { id: "zai/glm-4.7-flash", @@ -13252,6 +13661,40 @@ export const MODELS = { contextWindow: 202800, maxTokens: 131100, } satisfies Model<"anthropic-messages">, + "zai/glm-5.1": { + id: "zai/glm-5.1", + name: "GLM 5.1", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 202800, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, + "zai/glm-5v-turbo": { + id: "zai/glm-5v-turbo", + name: "GLM 5V Turbo", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, }, "xai": { "grok-2": { @@ -13808,6 +14251,24 @@ export const MODELS = { contextWindow: 200000, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-4.7-flashx": { + id: "glm-4.7-flashx", + name: "GLM-4.7-FlashX", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, + reasoning: true, + input: ["text"], + cost: { + input: 0.07, + output: 0.4, + cacheRead: 0.01, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "glm-5": { id: "glm-5", name: "GLM-5", @@ -13844,5 +14305,41 @@ export const MODELS = { contextWindow: 200000, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-5.1": { + id: "glm-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, + reasoning: true, + input: ["text"], + cost: { + input: 1.4, + output: 4.4, + cacheRead: 0.26, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "glm-5v-turbo": { + id: "glm-5v-turbo", + name: "glm-5v-turbo", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, } as const; diff --git a/packages/pi-ai/src/models.test.ts b/packages/pi-ai/src/models.test.ts index 068004ad3..d8a3a20af 100644 --- a/packages/pi-ai/src/models.test.ts +++ b/packages/pi-ai/src/models.test.ts @@ -109,6 +109,141 @@ describe("model registry — custom zai provider (GLM-5.1)", () => { }); }); +// ═══════════════════════════════════════════════════════════════════════════ +// New provider: alibaba-dashscope (feat: #3891) +// +// Regular DashScope API for users without the Coding Plan. +// Separate from alibaba-coding-plan — different endpoint, auth, and pricing. +// ═══════════════════════════════════════════════════════════════════════════ + +describe("model registry — alibaba-dashscope provider", () => { + it("alibaba-dashscope is a registered provider", () => { + const providers = getProviders(); + assert.ok( + providers.includes("alibaba-dashscope"), + `Expected "alibaba-dashscope" in providers, got: ${providers.join(", ")}`, + ); + }); + + it("alibaba-dashscope has all expected models", () => { + const models = getModels("alibaba-dashscope"); + const ids = models.map((m) => m.id).sort(); + const expected = [ + "qwen3-coder-plus", + "qwen3-max", + "qwen3.5-flash", + "qwen3.5-plus", + "qwen3.6-plus", + ]; + assert.deepEqual(ids, expected); + }); + + it("alibaba-dashscope models use the international DashScope base URL", () => { + const models = getModels("alibaba-dashscope"); + for (const model of models) { + assert.equal( + model.baseUrl, + "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + `Model ${model.id} has wrong baseUrl: ${model.baseUrl}`, + ); + } + }); + + it("alibaba-dashscope models use openai-completions API", () => { + const models = getModels("alibaba-dashscope"); + for (const model of models) { + assert.equal(model.api, "openai-completions", `Model ${model.id} has wrong api: ${model.api}`); + } + }); + + it("alibaba-dashscope models have provider set correctly", () => { + const models = getModels("alibaba-dashscope"); + for (const model of models) { + assert.equal( + model.provider, + "alibaba-dashscope", + `Model ${model.id} has wrong provider: ${model.provider}`, + ); + } + }); + + it("alibaba-dashscope models all have 1M context window", () => { + const models = getModels("alibaba-dashscope"); + for (const model of models) { + assert.equal(model.contextWindow, 1_000_000, `Model ${model.id} has wrong contextWindow: ${model.contextWindow}`); + } + }); + + it("alibaba-dashscope models have positive paid costs (not free-tier)", () => { + const models = getModels("alibaba-dashscope"); + for (const model of models) { + assert.ok(model.cost.input > 0, `${model.id}: input cost should be > 0 (paid tier)`); + assert.ok(model.cost.output > 0, `${model.id}: output cost should be > 0 (paid tier)`); + } + }); + + it("qwen3-max is a reasoning model with correct pricing", () => { + const model = getModel("alibaba-dashscope" as any, "qwen3-max" as any); + assert.ok(model, "Expected getModel to return qwen3-max for alibaba-dashscope"); + assert.equal(model.reasoning, true); + assert.equal(model.cost.input, 1.2); + assert.equal(model.cost.output, 6); + assert.equal(model.maxTokens, 32768); + }); + + it("qwen3.5-plus is a reasoning model with correct pricing", () => { + const model = getModel("alibaba-dashscope" as any, "qwen3.5-plus" as any); + assert.ok(model, "Expected getModel to return qwen3.5-plus for alibaba-dashscope"); + assert.equal(model.reasoning, true); + assert.equal(model.cost.input, 0.4); + assert.equal(model.cost.output, 1.2); + assert.equal(model.maxTokens, 65536); + }); + + it("qwen3.5-flash is not a reasoning model", () => { + const model = getModel("alibaba-dashscope" as any, "qwen3.5-flash" as any); + assert.ok(model, "Expected getModel to return qwen3.5-flash for alibaba-dashscope"); + assert.equal(model.reasoning, false); + assert.equal(model.cost.input, 0.1); + assert.equal(model.cost.output, 0.4); + }); + + it("qwen3-coder-plus is not a reasoning model", () => { + const model = getModel("alibaba-dashscope" as any, "qwen3-coder-plus" as any); + assert.ok(model, "Expected getModel to return qwen3-coder-plus for alibaba-dashscope"); + assert.equal(model.reasoning, false); + assert.equal(model.cost.input, 1.0); + assert.equal(model.cost.output, 5.0); + }); + + it("qwen3.6-plus is a reasoning model", () => { + const model = getModel("alibaba-dashscope" as any, "qwen3.6-plus" as any); + assert.ok(model, "Expected getModel to return qwen3.6-plus for alibaba-dashscope"); + assert.equal(model.reasoning, true); + assert.equal(model.cost.input, 0.5); + assert.equal(model.cost.output, 3.0); + }); + + it("alibaba-dashscope is independent of alibaba-coding-plan (different endpoint)", () => { + const dashscope = getModels("alibaba-dashscope"); + const codingPlan = getModels("alibaba-coding-plan"); + for (const m of dashscope) { + assert.notEqual( + m.baseUrl, + "https://coding-intl.dashscope.aliyuncs.com/v1", + `${m.id} must not use the Coding Plan endpoint`, + ); + } + // Both providers must coexist — coding-plan must not have been overwritten + assert.ok(codingPlan.length > 0, "alibaba-coding-plan must still have models"); + }); + + it("getModel returns undefined for unknown model in alibaba-dashscope (failure path)", () => { + const model = getModel("alibaba-dashscope" as any, "does-not-exist" as any); + assert.equal(model, undefined); + }); +}); + describe("model registry — custom models do not collide with generated models", () => { it("generated providers still exist alongside custom providers", () => { const providers = getProviders(); diff --git a/packages/pi-ai/src/providers/amazon-bedrock.ts b/packages/pi-ai/src/providers/amazon-bedrock.ts index 52b42b4d1..dee0c363e 100644 --- a/packages/pi-ai/src/providers/amazon-bedrock.ts +++ b/packages/pi-ai/src/providers/amazon-bedrock.ts @@ -43,7 +43,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { adjustMaxTokensForThinking, buildBaseOptions, clampReasoning } from "./simple-options.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; export interface BedrockOptions extends StreamOptions { region?: string; @@ -151,7 +151,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt messages: convertMessages(context, model, cacheRetention), system: buildSystemPrompt(context.systemPrompt, model, cacheRetention), inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature }, - toolConfig: convertToolConfig(context.tools, options.toolChoice), + toolConfig: convertToolConfig(context.tools, options.toolChoice, model, cacheRetention), additionalModelRequestFields: buildAdditionalModelRequestFields(model, options), }; const nextCommandInput = await options?.onPayload?.(commandInput, model); @@ -487,7 +487,7 @@ function convertMessages( cacheRetention: CacheRetention, ): Message[] { const result: Message[] = []; - const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "bedrock-converse-stream"); for (let i = 0; i < transformedMessages.length; i++) { const m = transformedMessages[i]; @@ -633,6 +633,8 @@ function convertMessages( function convertToolConfig( tools: Tool[] | undefined, toolChoice: BedrockOptions["toolChoice"], + model: Model<"bedrock-converse-stream">, + cacheRetention: CacheRetention, ): ToolConfiguration | undefined { if (!tools?.length || toolChoice === "none") return undefined; @@ -644,6 +646,16 @@ function convertToolConfig( }, })); + // Add cachePoint after last tool for supported models + if (cacheRetention !== "none" && supportsPromptCaching(model)) { + bedrockTools.push({ + cachePoint: { + type: CachePointType.DEFAULT, + ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}), + }, + } as any); + } + let bedrockToolChoice: ToolChoice | undefined; switch (toolChoice) { case "auto": diff --git a/packages/pi-ai/src/providers/anthropic-auth.test.ts b/packages/pi-ai/src/providers/anthropic-auth.test.ts new file mode 100644 index 000000000..4593e1a5d --- /dev/null +++ b/packages/pi-ai/src/providers/anthropic-auth.test.ts @@ -0,0 +1,32 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { usesAnthropicBearerAuth } from "./anthropic.js"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +test("usesAnthropicBearerAuth covers Bearer-only Anthropic-compatible providers (#3783)", () => { + assert.equal(usesAnthropicBearerAuth("alibaba-coding-plan"), true); + assert.equal(usesAnthropicBearerAuth("minimax"), true); + assert.equal(usesAnthropicBearerAuth("minimax-cn"), true); + assert.equal(usesAnthropicBearerAuth("anthropic"), false); +}); + +test("createClient routes Bearer-auth providers through authToken (#3783)", () => { + const source = readFileSync(join(__dirname, "..", "..", "src", "providers", "anthropic.ts"), "utf-8"); + assert.ok( + source.includes("const usesBearerAuth = usesAnthropicBearerAuth(model.provider);"), + "createClient should derive auth mode from usesAnthropicBearerAuth", + ); + assert.ok( + source.includes("apiKey: usesBearerAuth ? null : apiKey"), + "Bearer-auth providers should skip x-api-key auth", + ); + assert.ok( + source.includes("authToken: usesBearerAuth ? apiKey : undefined"), + "Bearer-auth providers should send authToken instead", + ); +}); diff --git a/packages/pi-ai/src/providers/anthropic-shared.test.ts b/packages/pi-ai/src/providers/anthropic-shared.test.ts index 9b6718570..6e08bc52e 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.test.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.test.ts @@ -1,6 +1,60 @@ import { describe, it } from "node:test"; import assert from "node:assert/strict"; -import { mapStopReason } from "./anthropic-shared.js"; +import { convertTools, mapStopReason } from "./anthropic-shared.js"; + +const makeTool = (name: string) => + ({ + name, + description: `desc for ${name}`, + parameters: { + type: "object" as const, + properties: { arg: { type: "string" } }, + required: ["arg"], + }, + }) as any; + +describe("convertTools cache_control", () => { + it("adds cache_control to the last tool when cacheControl is provided", () => { + const tools = [makeTool("Read"), makeTool("Write"), makeTool("Edit")]; + const cacheControl = { type: "ephemeral" as const }; + const result = convertTools(tools, false, cacheControl); + + assert.equal(result.length, 3); + assert.equal((result[0] as any).cache_control, undefined); + assert.equal((result[1] as any).cache_control, undefined); + assert.deepEqual((result[2] as any).cache_control, { type: "ephemeral" }); + }); + + it("does not add cache_control when cacheControl is undefined", () => { + const tools = [makeTool("Read"), makeTool("Write")]; + const result = convertTools(tools, false); + + for (const tool of result) { + assert.equal((tool as any).cache_control, undefined); + } + }); + + it("handles empty tools array without error", () => { + const result = convertTools([], false, { type: "ephemeral" }); + assert.equal(result.length, 0); + }); + + it("passes through ttl when provided", () => { + const tools = [makeTool("Read")]; + const cacheControl = { type: "ephemeral" as const, ttl: "1h" as const }; + const result = convertTools(tools, false, cacheControl); + + assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral", ttl: "1h" }); + }); + + it("single tool gets cache_control", () => { + const tools = [makeTool("Read")]; + const result = convertTools(tools, false, { type: "ephemeral" }); + + assert.equal(result.length, 1); + assert.deepEqual((result[0] as any).cache_control, { type: "ephemeral" }); + }); +}); describe("mapStopReason", () => { it("maps end_turn to stop", () => { diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts index 098f50721..567609147 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -33,7 +33,7 @@ import type { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { hasXmlParameterTags, repairToolJson } from "../utils/repair-tool-json.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; export type AnthropicEffort = "low" | "medium" | "high" | "max"; @@ -235,7 +235,7 @@ export function convertMessages( ): MessageParam[] { const params: MessageParam[] = []; - const transformedMessages = transformMessages(messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(messages, model, normalizeToolCallId, "anthropic-messages"); for (let i = 0; i < transformedMessages.length; i++) { const msg = transformedMessages[i]; @@ -394,10 +394,14 @@ export function convertMessages( return params; } -export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] { +export function convertTools( + tools: Tool[], + isOAuthToken: boolean, + cacheControl?: { type: "ephemeral"; ttl?: "1h" }, +): Anthropic.Messages.Tool[] { if (!tools) return []; - return tools.map((tool) => { + const result = tools.map((tool) => { const jsonSchema = tool.parameters as any; return { @@ -410,6 +414,13 @@ export function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Me }, }; }); + + // Add cache breakpoint to last tool — covers entire tool block + if (cacheControl && result.length > 0) { + (result[result.length - 1] as any).cache_control = cacheControl; + } + + return result; } export function buildParams( @@ -457,7 +468,7 @@ export function buildParams( } if (context.tools) { - params.tools = convertTools(context.tools, isOAuthToken); + params.tools = convertTools(context.tools, isOAuthToken, cacheControl); } if (options?.thinkingEnabled && model.reasoning) { diff --git a/packages/pi-ai/src/providers/anthropic.ts b/packages/pi-ai/src/providers/anthropic.ts index 21c0da707..ec9b21fde 100644 --- a/packages/pi-ai/src/providers/anthropic.ts +++ b/packages/pi-ai/src/providers/anthropic.ts @@ -34,9 +34,6 @@ async function getAnthropicClass(): Promise { return _AnthropicClass; } -// Stealth mode: Mimic Claude Code's tool naming exactly -const claudeCodeVersion = "2.1.62"; - function mergeHeaders(...headerSources: (Record | undefined)[]): Record { const merged: Record = {}; for (const headers of headerSources) { @@ -47,8 +44,8 @@ function mergeHeaders(...headerSources: (Record | undefined)[]): return merged; } -function isOAuthToken(apiKey: string): boolean { - return apiKey.includes("sk-ant-oat"); +export function usesAnthropicBearerAuth(provider: Model<"anthropic-messages">["provider"]): boolean { + return provider === "alibaba-coding-plan" || provider === "minimax" || provider === "minimax-cn"; } async function createClient( @@ -97,35 +94,12 @@ async function createClient( betaFeatures.push("interleaved-thinking-2025-05-14"); } - // OAuth: Bearer auth, Claude Code identity headers - if (isOAuthToken(apiKey)) { - const client = new AnthropicClass({ - apiKey: null, - authToken: apiKey, - baseURL: model.baseUrl, - dangerouslyAllowBrowser: true, - defaultHeaders: mergeHeaders( - { - accept: "application/json", - "anthropic-dangerous-direct-browser-access": "true", - ...(betaFeatures.length > 0 ? { "anthropic-beta": `claude-code-20250219,oauth-2025-04-20,${betaFeatures.join(",")}` } : {}), - "user-agent": `claude-cli/${claudeCodeVersion}`, - "x-app": "cli", - }, - model.headers, - optionsHeaders, - ), - }); - - return { client, isOAuthToken: true }; - } - - // API key auth - // Alibaba Coding Plan uses Bearer token auth instead of x-api-key - const isAlibabaProvider = model.provider === "alibaba-coding-plan"; + // API key auth (Anthropic OAuth removed per TOS compliance — use API keys or Claude CLI) + // Some Anthropic-compatible providers require Bearer auth instead of x-api-key. + const usesBearerAuth = usesAnthropicBearerAuth(model.provider); const client = new AnthropicClass({ - apiKey: isAlibabaProvider ? null : apiKey, - authToken: isAlibabaProvider ? apiKey : undefined, + apiKey: usesBearerAuth ? null : apiKey, + authToken: usesBearerAuth ? apiKey : undefined, baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders: mergeHeaders( diff --git a/packages/pi-ai/src/providers/google-shared.ts b/packages/pi-ai/src/providers/google-shared.ts index e6a31771f..7984bdd4b 100644 --- a/packages/pi-ai/src/providers/google-shared.ts +++ b/packages/pi-ai/src/providers/google-shared.ts @@ -5,7 +5,7 @@ import { type Content, FinishReason, FunctionCallingConfigMode, type Part } from "@google/genai"; import type { Context, ImageContent, Model, StopReason, TextContent, Tool } from "../types.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex"; @@ -80,7 +80,7 @@ export function convertMessages(model: Model, contex return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); }; - const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "google-generative-ai"); for (const msg of transformedMessages) { if (msg.role === "user") { diff --git a/packages/pi-ai/src/providers/mistral.ts b/packages/pi-ai/src/providers/mistral.ts index 7c9b54b91..0a6a28e5c 100644 --- a/packages/pi-ai/src/providers/mistral.ts +++ b/packages/pi-ai/src/providers/mistral.ts @@ -39,7 +39,7 @@ import { shortHash } from "../utils/hash.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { buildBaseOptions, clampReasoning } from "./simple-options.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; const MISTRAL_TOOL_CALL_ID_LENGTH = 9; const MAX_MISTRAL_ERROR_BODY_CHARS = 4000; @@ -79,7 +79,7 @@ export const streamMistral: StreamFunction<"mistral-conversations", MistralOptio }); const normalizeMistralToolCallId = createMistralToolCallIdNormalizer(); - const transformedMessages = transformMessages(context.messages, model, (id) => normalizeMistralToolCallId(id)); + const transformedMessages = transformMessagesWithReport(context.messages, model, (id) => normalizeMistralToolCallId(id), "mistral-conversations"); let payload = buildChatPayload(model, context, transformedMessages, options); const nextPayload = await options?.onPayload?.(payload, model); diff --git a/packages/pi-ai/src/providers/openai-completions.ts b/packages/pi-ai/src/providers/openai-completions.ts index 4d6e1a3cf..51213ad39 100644 --- a/packages/pi-ai/src/providers/openai-completions.ts +++ b/packages/pi-ai/src/providers/openai-completions.ts @@ -39,7 +39,7 @@ import { finalizeStream, handleStreamError, } from "./openai-shared.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; /** * Check if conversation messages contain tool calls or tool results. @@ -343,6 +343,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio if (context.tools) { params.tools = convertTools(context.tools, compat); + maybeAddOpenRouterAnthropicToolCacheControl(model, params.tools); } else if (hasToolHistory(context.messages)) { // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results params.tools = []; @@ -379,6 +380,19 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio return params; } +function maybeAddOpenRouterAnthropicToolCacheControl( + model: Model<"openai-completions">, + tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined, +): void { + if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return; + if (!tools?.length) return; + + const lastTool = tools[tools.length - 1]; + if ("function" in lastTool) { + Object.assign(lastTool.function, { cache_control: { type: "ephemeral" } }); + } +} + function mapReasoningEffort( effort: NonNullable, reasoningEffortMap: Partial, string>>, @@ -441,7 +455,7 @@ export function convertMessages( return id; }; - const transformedMessages = transformMessages(context.messages, model, (id) => normalizeToolCallId(id)); + const transformedMessages = transformMessagesWithReport(context.messages, model, (id) => normalizeToolCallId(id), "openai-completions"); if (context.systemPrompt) { const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole; diff --git a/packages/pi-ai/src/providers/openai-responses-shared.ts b/packages/pi-ai/src/providers/openai-responses-shared.ts index 10ac5ee1b..8227dcff5 100644 --- a/packages/pi-ai/src/providers/openai-responses-shared.ts +++ b/packages/pi-ai/src/providers/openai-responses-shared.ts @@ -30,7 +30,7 @@ import type { AssistantMessageEventStream } from "../utils/event-stream.js"; import { shortHash } from "../utils/hash.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { transformMessages } from "./transform-messages.js"; +import { transformMessagesWithReport } from "./transform-messages.js"; // ============================================================================= // Utilities @@ -108,7 +108,7 @@ export function convertResponsesMessages( return `${normalizedCallId}|${normalizedItemId}`; }; - const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); + const transformedMessages = transformMessagesWithReport(context.messages, model, normalizeToolCallId, "openai-responses"); const includeSystemPrompt = options?.includeSystemPrompt ?? true; if (includeSystemPrompt && context.systemPrompt) { diff --git a/packages/pi-ai/src/providers/provider-capabilities.test.ts b/packages/pi-ai/src/providers/provider-capabilities.test.ts new file mode 100644 index 000000000..7b8728975 --- /dev/null +++ b/packages/pi-ai/src/providers/provider-capabilities.test.ts @@ -0,0 +1,174 @@ +// GSD-2 — Provider Capabilities Registry Tests (ADR-005 Phase 1) +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; + +import { + PROVIDER_CAPABILITIES, + getProviderCapabilities, + getUnsupportedFeatures, + mergeCapabilityOverrides, + getRegisteredApis, +} from "./provider-capabilities.js"; + +// ─── Registry Completeness ────────────────────────────────────────────────── + +describe("PROVIDER_CAPABILITIES registry", () => { + const EXPECTED_APIS = [ + "anthropic-messages", + "anthropic-vertex", + "openai-responses", + "azure-openai-responses", + "openai-codex-responses", + "openai-completions", + "google-generative-ai", + "google-gemini-cli", + "google-vertex", + "mistral-conversations", + "bedrock-converse-stream", + "ollama-chat", + ]; + + test("covers all expected API providers", () => { + for (const api of EXPECTED_APIS) { + assert.ok( + PROVIDER_CAPABILITIES[api], + `Missing capability entry for API: ${api}`, + ); + } + }); + + test("getRegisteredApis returns all entries", () => { + const registered = getRegisteredApis(); + for (const api of EXPECTED_APIS) { + assert.ok(registered.includes(api), `getRegisteredApis missing: ${api}`); + } + }); + + test("all entries have required fields", () => { + for (const [api, caps] of Object.entries(PROVIDER_CAPABILITIES)) { + assert.equal(typeof caps.toolCalling, "boolean", `${api}.toolCalling`); + assert.equal(typeof caps.maxTools, "number", `${api}.maxTools`); + assert.equal(typeof caps.imageToolResults, "boolean", `${api}.imageToolResults`); + assert.equal(typeof caps.structuredOutput, "boolean", `${api}.structuredOutput`); + assert.ok(caps.toolCallIdFormat, `${api}.toolCallIdFormat`); + assert.equal(typeof caps.toolCallIdFormat.maxLength, "number", `${api}.toolCallIdFormat.maxLength`); + assert.ok(caps.toolCallIdFormat.allowedChars instanceof RegExp, `${api}.toolCallIdFormat.allowedChars`); + assert.ok( + ["full", "text-only", "none"].includes(caps.thinkingPersistence), + `${api}.thinkingPersistence is "${caps.thinkingPersistence}"`, + ); + assert.ok(Array.isArray(caps.unsupportedSchemaFeatures), `${api}.unsupportedSchemaFeatures`); + } + }); +}); + +// ─── Provider-specific Values ─────────────────────────────────────────────── + +describe("provider-specific capabilities", () => { + test("Anthropic supports full thinking persistence", () => { + assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].thinkingPersistence, "full"); + }); + + test("Anthropic supports image tool results", () => { + assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].imageToolResults, true); + }); + + test("Anthropic tool call ID is 64 chars max", () => { + assert.equal(PROVIDER_CAPABILITIES["anthropic-messages"].toolCallIdFormat.maxLength, 64); + }); + + test("Mistral tool call ID is 9 chars max", () => { + assert.equal(PROVIDER_CAPABILITIES["mistral-conversations"].toolCallIdFormat.maxLength, 9); + }); + + test("Mistral has no thinking persistence", () => { + assert.equal(PROVIDER_CAPABILITIES["mistral-conversations"].thinkingPersistence, "none"); + }); + + test("Google does not support patternProperties", () => { + assert.ok( + PROVIDER_CAPABILITIES["google-generative-ai"].unsupportedSchemaFeatures.includes("patternProperties"), + ); + }); + + test("Google does not support const", () => { + assert.ok( + PROVIDER_CAPABILITIES["google-generative-ai"].unsupportedSchemaFeatures.includes("const"), + ); + }); + + test("OpenAI Responses does not support image tool results", () => { + assert.equal(PROVIDER_CAPABILITIES["openai-responses"].imageToolResults, false); + }); + + test("OpenAI Responses has text-only thinking persistence", () => { + assert.equal(PROVIDER_CAPABILITIES["openai-responses"].thinkingPersistence, "text-only"); + }); +}); + +// ─── getProviderCapabilities ──────────────────────────────────────────────── + +describe("getProviderCapabilities", () => { + test("returns known provider capabilities", () => { + const caps = getProviderCapabilities("anthropic-messages"); + assert.equal(caps.toolCalling, true); + assert.equal(caps.thinkingPersistence, "full"); + }); + + test("returns permissive defaults for unknown providers", () => { + const caps = getProviderCapabilities("unknown-provider-xyz"); + assert.equal(caps.toolCalling, true); + assert.equal(caps.imageToolResults, true); + assert.deepEqual(caps.unsupportedSchemaFeatures, []); + }); +}); + +// ─── getUnsupportedFeatures ───────────────────────────────────────────────── + +describe("getUnsupportedFeatures", () => { + test("returns unsupported features for Google", () => { + const unsupported = getUnsupportedFeatures("google-generative-ai", ["patternProperties", "const"]); + assert.deepEqual(unsupported, ["patternProperties", "const"]); + }); + + test("returns empty for Anthropic with any features", () => { + const unsupported = getUnsupportedFeatures("anthropic-messages", ["patternProperties", "const"]); + assert.deepEqual(unsupported, []); + }); + + test("returns empty for unknown provider", () => { + const unsupported = getUnsupportedFeatures("unknown-xyz", ["patternProperties"]); + assert.deepEqual(unsupported, []); + }); +}); + +// ─── mergeCapabilityOverrides ─────────────────────────────────────────────── + +describe("mergeCapabilityOverrides", () => { + test("overrides individual fields", () => { + const merged = mergeCapabilityOverrides("openai-responses", { + imageToolResults: true, + }); + assert.equal(merged.imageToolResults, true); + // Non-overridden fields preserved + assert.equal(merged.toolCalling, true); + assert.equal(merged.thinkingPersistence, "text-only"); + }); + + test("deep-merges toolCallIdFormat", () => { + const merged = mergeCapabilityOverrides("anthropic-messages", { + toolCallIdFormat: { maxLength: 128 }, + }); + assert.equal(merged.toolCallIdFormat.maxLength, 128); + // allowedChars preserved from base + assert.ok(merged.toolCallIdFormat.allowedChars instanceof RegExp); + }); + + test("uses permissive defaults for unknown provider", () => { + const merged = mergeCapabilityOverrides("unknown-xyz", { + imageToolResults: false, + }); + assert.equal(merged.imageToolResults, false); + assert.equal(merged.toolCalling, true); // from default + }); +}); diff --git a/packages/pi-ai/src/providers/provider-capabilities.ts b/packages/pi-ai/src/providers/provider-capabilities.ts new file mode 100644 index 000000000..b49a1f319 --- /dev/null +++ b/packages/pi-ai/src/providers/provider-capabilities.ts @@ -0,0 +1,215 @@ +// GSD-2 — Provider Capabilities Registry (ADR-005 Phase 1) +// Declarative registry of what each API provider supports, consolidating +// scattered knowledge from *-shared.ts files into a queryable data structure. + +import type { Api } from "../types.js"; + +// ─── Types ────────────────────────────────────────────────────────────────── + +/** + * Declarative capability profile for an API provider. + * Used by the model router to filter incompatible models and by the tool + * system to adjust tool sets per provider. + */ +export interface ProviderCapabilities { + /** Whether models from this provider support tool/function calling */ + toolCalling: boolean; + /** Maximum number of tools the provider handles well (0 = unlimited) */ + maxTools: number; + /** Whether tool results can contain images */ + imageToolResults: boolean; + /** Whether the provider supports structured JSON output */ + structuredOutput: boolean; + /** Tool call ID format constraints */ + toolCallIdFormat: { + maxLength: number; + allowedChars: RegExp; + }; + /** Whether thinking/reasoning blocks are preserved cross-turn */ + thinkingPersistence: "full" | "text-only" | "none"; + /** Schema features NOT supported (tools using these get filtered) */ + unsupportedSchemaFeatures: string[]; +} + +// ─── Registry ─────────────────────────────────────────────────────────────── + +/** + * Built-in provider capability profiles. + * + * Sources (consolidated from scattered *-shared.ts files): + * - anthropic-shared.ts: normalizeToolCallId (64-char, [a-zA-Z0-9_-]) + * - openai-responses-shared.ts: ID normalization (64-char, fc_ prefix), image-in-tool-result workaround + * - google-shared.ts: sanitizeSchemaForGoogle (patternProperties, const), requiresToolCallId + * - mistral.ts: MISTRAL_TOOL_CALL_ID_LENGTH = 9 + * - amazon-bedrock.ts: normalizeToolCallId (64-char, [a-zA-Z0-9_-]) + */ +export const PROVIDER_CAPABILITIES: Record = { + "anthropic-messages": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "full", + unsupportedSchemaFeatures: [], + }, + "anthropic-vertex": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "full", + unsupportedSchemaFeatures: [], + }, + "openai-responses": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, // images sent as separate user message, not in tool result + structuredOutput: true, + toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "azure-openai-responses": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "openai-codex-responses": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "openai-completions": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "google-generative-ai": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: ["patternProperties", "const"], + }, + "google-gemini-cli": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: ["patternProperties", "const"], + }, + "google-vertex": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: ["patternProperties", "const"], + }, + "mistral-conversations": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: true, + toolCallIdFormat: { maxLength: 9, allowedChars: /^[a-zA-Z0-9]+$/ }, + thinkingPersistence: "none", + unsupportedSchemaFeatures: [], + }, + "bedrock-converse-stream": { + toolCalling: true, + maxTools: 0, + imageToolResults: true, // Bedrock supports image content blocks in tool results + structuredOutput: true, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], + }, + "ollama-chat": { + toolCalling: true, + maxTools: 0, + imageToolResults: false, + structuredOutput: false, + toolCallIdFormat: { maxLength: 64, allowedChars: /^[a-zA-Z0-9_-]+$/ }, + thinkingPersistence: "none", + unsupportedSchemaFeatures: [], + }, +}; + +// ─── Default (permissive) profile for unknown providers ───────────────────── + +const DEFAULT_CAPABILITIES: ProviderCapabilities = { + toolCalling: true, + maxTools: 0, + imageToolResults: true, + structuredOutput: true, + toolCallIdFormat: { maxLength: 512, allowedChars: /^.+$/ }, + thinkingPersistence: "text-only", + unsupportedSchemaFeatures: [], +}; + +// ─── Public API ───────────────────────────────────────────────────────────── + +/** + * Get capabilities for a provider API. Returns a permissive default for + * unknown providers (preserving existing behavior per ADR-005 principle 5). + */ +export function getProviderCapabilities(api: string): ProviderCapabilities { + return PROVIDER_CAPABILITIES[api] ?? DEFAULT_CAPABILITIES; +} + +/** + * Check if a provider supports all required schema features. + * Returns the list of unsupported features (empty if all supported). + */ +export function getUnsupportedFeatures(api: string, requiredFeatures: string[]): string[] { + const caps = getProviderCapabilities(api); + return requiredFeatures.filter(f => caps.unsupportedSchemaFeatures.includes(f)); +} + +/** + * Deep-merge user-provided capability overrides with built-in defaults. + * Partial overrides merge with the built-in profile for the given API. + */ +export function mergeCapabilityOverrides( + api: string, + overrides: Partial> & { + toolCallIdFormat?: Partial; + }, +): ProviderCapabilities { + const base = getProviderCapabilities(api); + return { + ...base, + ...overrides, + toolCallIdFormat: overrides.toolCallIdFormat + ? { ...base.toolCallIdFormat, ...overrides.toolCallIdFormat } + : base.toolCallIdFormat, + }; +} + +/** + * Get all registered API names in the capability registry. + * Used by lint rules to verify all providers in register-builtins.ts + * have corresponding capability entries. + */ +export function getRegisteredApis(): string[] { + return Object.keys(PROVIDER_CAPABILITIES); +} diff --git a/packages/pi-ai/src/providers/transform-messages-report.test.ts b/packages/pi-ai/src/providers/transform-messages-report.test.ts new file mode 100644 index 000000000..85ae585ba --- /dev/null +++ b/packages/pi-ai/src/providers/transform-messages-report.test.ts @@ -0,0 +1,189 @@ +// GSD-2 — ProviderSwitchReport Tests (ADR-005 Phase 3) +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; + +import { transformMessages, createEmptyReport, hasTransformations } from "./transform-messages.js"; +import type { ProviderSwitchReport } from "./transform-messages.js"; +import type { Message, Model, AssistantMessage, ToolCall } from "../types.js"; + +// ─── Helpers ──────────────────────────────────────────────────────────────── + +function makeModel(overrides: Partial> = {}): Model { + return { + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + ...overrides, + } as Model; +} + +function makeAssistantMsg(overrides: Partial = {}): AssistantMessage { + return { + role: "assistant", + content: [], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-sonnet-4-6", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + ...overrides, + }; +} + +// ─── createEmptyReport / hasTransformations ───────────────────────────────── + +describe("createEmptyReport", () => { + test("creates report with zero counters", () => { + const report = createEmptyReport("anthropic-messages", "openai-responses"); + assert.equal(report.fromApi, "anthropic-messages"); + assert.equal(report.toApi, "openai-responses"); + assert.equal(report.thinkingBlocksDropped, 0); + assert.equal(report.thinkingBlocksDowngraded, 0); + assert.equal(report.toolCallIdsRemapped, 0); + assert.equal(report.syntheticToolResultsInserted, 0); + assert.equal(report.thoughtSignaturesDropped, 0); + }); +}); + +describe("hasTransformations", () => { + test("returns false for empty report", () => { + const report = createEmptyReport("a", "b"); + assert.equal(hasTransformations(report), false); + }); + + test("returns true when any counter is non-zero", () => { + const report = createEmptyReport("a", "b"); + report.thinkingBlocksDropped = 1; + assert.equal(hasTransformations(report), true); + }); +}); + +// ─── Report Tracking in transformMessages ─────────────────────────────────── + +describe("transformMessages with report tracking", () => { + test("tracks thinking blocks dropped for redacted cross-model", () => { + const model = makeModel({ id: "gpt-5", api: "openai-responses", provider: "openai" }); + const messages: Message[] = [ + makeAssistantMsg({ + content: [ + { type: "thinking", thinking: "", redacted: true }, + { type: "text", text: "Hello" }, + ], + }), + ]; + const report = createEmptyReport("anthropic-messages", "openai-responses"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thinkingBlocksDropped, 1); + }); + + test("tracks thinking blocks downgraded to plain text", () => { + const model = makeModel({ id: "gpt-5", api: "openai-responses", provider: "openai" }); + const messages: Message[] = [ + makeAssistantMsg({ + content: [ + { type: "thinking", thinking: "Let me think about this..." }, + { type: "text", text: "Here is my answer" }, + ], + }), + ]; + const report = createEmptyReport("anthropic-messages", "openai-responses"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thinkingBlocksDowngraded, 1); + }); + + test("tracks tool call IDs remapped", () => { + const model = makeModel({ id: "claude-sonnet-4-6", api: "anthropic-messages", provider: "anthropic" }); + const toolCall: ToolCall = { + type: "toolCall", + id: "original-long-id-that-needs-normalization|with-special-chars", + name: "bash", + arguments: { command: "ls" }, + }; + const messages: Message[] = [ + makeAssistantMsg({ + provider: "openai", + api: "openai-responses", + model: "gpt-5", + content: [toolCall], + }), + ]; + const normalizer = (id: string) => id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); + const report = createEmptyReport("openai-responses", "anthropic-messages"); + transformMessages(messages, model, normalizer, report); + assert.equal(report.toolCallIdsRemapped, 1); + }); + + test("tracks thought signatures dropped", () => { + const model = makeModel({ id: "claude-sonnet-4-6", api: "anthropic-messages", provider: "anthropic" }); + const toolCall: ToolCall = { + type: "toolCall", + id: "tc_001", + name: "bash", + arguments: { command: "ls" }, + thoughtSignature: "some-opaque-signature", + }; + const messages: Message[] = [ + makeAssistantMsg({ + provider: "google", + api: "google-generative-ai", + model: "gemini-2.5-pro", + content: [toolCall], + }), + ]; + const report = createEmptyReport("google-generative-ai", "anthropic-messages"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thoughtSignaturesDropped, 1); + }); + + test("tracks synthetic tool results inserted", () => { + const model = makeModel(); + const toolCall: ToolCall = { + type: "toolCall", + id: "tc_orphan", + name: "bash", + arguments: { command: "ls" }, + }; + // Assistant message with tool call followed by another assistant (no tool result) + const messages: Message[] = [ + makeAssistantMsg({ content: [toolCall, { type: "text", text: "Using bash" }] }), + makeAssistantMsg({ content: [{ type: "text", text: "Next message" }] }), + ]; + const report = createEmptyReport("anthropic-messages", "anthropic-messages"); + transformMessages(messages, model, undefined, report); + assert.equal(report.syntheticToolResultsInserted, 1); + }); + + test("does not count transformations for same-model messages", () => { + const model = makeModel(); + const messages: Message[] = [ + makeAssistantMsg({ + content: [ + { type: "thinking", thinking: "Let me think..." }, + { type: "text", text: "Answer" }, + ], + }), + ]; + const report = createEmptyReport("anthropic-messages", "anthropic-messages"); + transformMessages(messages, model, undefined, report); + assert.equal(report.thinkingBlocksDowngraded, 0); + assert.equal(report.thinkingBlocksDropped, 0); + }); + + test("works without report parameter (backward compatible)", () => { + const model = makeModel(); + const messages: Message[] = [ + makeAssistantMsg({ content: [{ type: "text", text: "Hello" }] }), + ]; + // Should not throw + const result = transformMessages(messages, model); + assert.ok(Array.isArray(result)); + }); +}); diff --git a/packages/pi-ai/src/providers/transform-messages.ts b/packages/pi-ai/src/providers/transform-messages.ts index f61f08037..bcfd5234a 100644 --- a/packages/pi-ai/src/providers/transform-messages.ts +++ b/packages/pi-ai/src/providers/transform-messages.ts @@ -1,5 +1,87 @@ import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage } from "../types.js"; +/** + * Report of context transformations during a cross-provider switch (ADR-005 Phase 3). + * Tracks what was lost or downgraded when replaying conversation history to a different provider. + */ +export interface ProviderSwitchReport { + /** API of the messages being transformed from */ + fromApi: string; + /** API of the target model */ + toApi: string; + /** Number of thinking blocks completely dropped (redacted/encrypted, cross-model) */ + thinkingBlocksDropped: number; + /** Number of thinking blocks downgraded from structured to plain text */ + thinkingBlocksDowngraded: number; + /** Number of tool call IDs that were remapped/normalized */ + toolCallIdsRemapped: number; + /** Number of synthetic tool results inserted for orphaned tool calls */ + syntheticToolResultsInserted: number; + /** Number of thought signatures dropped (Google-specific opaque context) */ + thoughtSignaturesDropped: number; +} + +/** + * Create an empty provider switch report. + */ +export function createEmptyReport(fromApi: string, toApi: string): ProviderSwitchReport { + return { + fromApi, + toApi, + thinkingBlocksDropped: 0, + thinkingBlocksDowngraded: 0, + toolCallIdsRemapped: 0, + syntheticToolResultsInserted: 0, + thoughtSignaturesDropped: 0, + }; +} + +/** + * Check if a provider switch report has any non-zero transformations. + */ +export function hasTransformations(report: ProviderSwitchReport): boolean { + return ( + report.thinkingBlocksDropped > 0 || + report.thinkingBlocksDowngraded > 0 || + report.toolCallIdsRemapped > 0 || + report.syntheticToolResultsInserted > 0 || + report.thoughtSignaturesDropped > 0 + ); +} + +/** + * Create a report, run transformMessages, and log if non-empty. + * Convenience wrapper for provider adapters (ADR-005). + */ +export function transformMessagesWithReport( + messages: Message[], + model: Model, + normalizeToolCallId?: (id: string, model: Model, source: AssistantMessage) => string, + sourceApi?: string, +): Message[] { + const report = createEmptyReport(sourceApi ?? "unknown", model.api); + const result = transformMessages(messages, model, normalizeToolCallId, report); + if (hasTransformations(report)) { + logProviderSwitchReport(report); + } + return result; +} + +/** Log a non-empty ProviderSwitchReport as a debug-level warning. */ +function logProviderSwitchReport(report: ProviderSwitchReport): void { + const parts: string[] = [`Provider switch ${report.fromApi} → ${report.toApi}:`]; + if (report.thinkingBlocksDropped > 0) parts.push(`${report.thinkingBlocksDropped} thinking blocks dropped`); + if (report.thinkingBlocksDowngraded > 0) parts.push(`${report.thinkingBlocksDowngraded} thinking blocks downgraded`); + if (report.toolCallIdsRemapped > 0) parts.push(`${report.toolCallIdsRemapped} tool call IDs remapped`); + if (report.syntheticToolResultsInserted > 0) parts.push(`${report.syntheticToolResultsInserted} synthetic tool results inserted`); + if (report.thoughtSignaturesDropped > 0) parts.push(`${report.thoughtSignaturesDropped} thought signatures dropped`); + // Use process.stderr for debug output — this is observable in verbose/debug modes + // without polluting stdout which may be used for structured output (RPC/MCP). + if (process.env.GSD_VERBOSE === "1" || process.env.PI_VERBOSE === "1") { + process.stderr.write(`[provider-switch] ${parts.join(", ")}\n`); + } +} + /** * Normalize tool call ID for cross-provider compatibility. * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`. @@ -9,6 +91,7 @@ export function transformMessages( messages: Message[], model: Model, normalizeToolCallId?: (id: string, model: Model, source: AssistantMessage) => string, + report?: ProviderSwitchReport, ): Message[] { // Build a map of original tool call IDs to normalized IDs const toolCallIdMap = new Map(); @@ -42,14 +125,20 @@ export function transformMessages( // Redacted thinking is opaque encrypted content, only valid for the same model. // Drop it for cross-model to avoid API errors. if (block.redacted) { + if (!isSameModel && report) report.thinkingBlocksDropped++; return isSameModel ? block : []; } // For same model: keep thinking blocks with signatures (needed for replay) // even if the thinking text is empty (OpenAI encrypted reasoning) if (isSameModel && block.thinkingSignature) return block; // Skip empty thinking blocks, convert others to plain text - if (!block.thinking || block.thinking.trim() === "") return []; + if (!block.thinking || block.thinking.trim() === "") { + if (!isSameModel && report) report.thinkingBlocksDropped++; + return []; + } if (isSameModel) return block; + // Downgrade: structured thinking → plain text + if (report) report.thinkingBlocksDowngraded++; return { type: "text" as const, text: block.thinking, @@ -71,6 +160,7 @@ export function transformMessages( if (!isSameModel && toolCall.thoughtSignature) { normalizedToolCall = { ...toolCall }; delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature; + if (report) report.thoughtSignaturesDropped++; } if (!isSameModel && normalizeToolCallId) { @@ -78,6 +168,7 @@ export function transformMessages( if (normalizedId !== toolCall.id) { toolCallIdMap.set(toolCall.id, normalizedId); normalizedToolCall = { ...normalizedToolCall, id: normalizedId }; + if (report) report.toolCallIdsRemapped++; } } @@ -117,6 +208,7 @@ export function transformMessages( isError: true, timestamp: Date.now(), } as ToolResultMessage); + if (report) report.syntheticToolResultsInserted++; } } pendingToolCalls = []; @@ -157,6 +249,7 @@ export function transformMessages( isError: true, timestamp: Date.now(), } as ToolResultMessage); + if (report) report.syntheticToolResultsInserted++; } } pendingToolCalls = []; diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts index 661b58b57..9ec6ad85f 100644 --- a/packages/pi-ai/src/types.ts +++ b/packages/pi-ai/src/types.ts @@ -44,6 +44,7 @@ export type KnownProvider = | "opencode-go" | "kimi-coding" | "alibaba-coding-plan" + | "alibaba-dashscope" | "ollama" | "ollama-cloud"; export type Provider = KnownProvider | string; diff --git a/packages/pi-ai/src/utils/oauth/anthropic.ts b/packages/pi-ai/src/utils/oauth/anthropic.ts deleted file mode 100644 index 861e26409..000000000 --- a/packages/pi-ai/src/utils/oauth/anthropic.ts +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Anthropic OAuth flow (Claude Pro/Max) - */ - -import { generatePKCE } from "./pkce.js"; -import type { OAuthCredentials, OAuthLoginCallbacks, OAuthProviderInterface } from "./types.js"; - -const decode = (s: string) => atob(s); -const CLIENT_ID = decode("OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl"); -const AUTHORIZE_URL = "https://claude.ai/oauth/authorize"; -const TOKEN_URL = "https://platform.claude.com/v1/oauth/token"; -const REDIRECT_URI = "https://platform.claude.com/oauth/code/callback"; -const SCOPES = "org:create_api_key user:profile user:inference"; - -/** - * Login with Anthropic OAuth (device code flow) - * - * @param onAuthUrl - Callback to handle the authorization URL (e.g., open browser) - * @param onPromptCode - Callback to prompt user for the authorization code - */ -export async function loginAnthropic( - onAuthUrl: (url: string) => void, - onPromptCode: () => Promise, -): Promise { - const { verifier, challenge } = await generatePKCE(); - - // Build authorization URL - const authParams = new URLSearchParams({ - code: "true", - client_id: CLIENT_ID, - response_type: "code", - redirect_uri: REDIRECT_URI, - scope: SCOPES, - code_challenge: challenge, - code_challenge_method: "S256", - state: verifier, - }); - - const authUrl = `${AUTHORIZE_URL}?${authParams.toString()}`; - - // Notify caller with URL to open - onAuthUrl(authUrl); - - // Wait for user to paste authorization code (format: code#state) - const authCode = await onPromptCode(); - const splits = authCode.split("#"); - const code = splits[0]; - const state = splits[1]; - - // Exchange code for tokens - const tokenResponse = await fetch(TOKEN_URL, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - grant_type: "authorization_code", - client_id: CLIENT_ID, - code: code, - state: state, - redirect_uri: REDIRECT_URI, - code_verifier: verifier, - }), - signal: AbortSignal.timeout(30_000), - }); - - if (!tokenResponse.ok) { - const error = await tokenResponse.text(); - throw new Error(`Token exchange failed: ${error}`); - } - - const tokenData = (await tokenResponse.json()) as { - access_token: string; - refresh_token: string; - expires_in: number; - }; - - // Calculate expiry time (current time + expires_in seconds - 5 min buffer) - const expiresAt = Date.now() + tokenData.expires_in * 1000 - 5 * 60 * 1000; - - // Save credentials - return { - refresh: tokenData.refresh_token, - access: tokenData.access_token, - expires: expiresAt, - }; -} - -/** - * Refresh Anthropic OAuth token - */ -export async function refreshAnthropicToken(refreshToken: string): Promise { - const response = await fetch(TOKEN_URL, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - grant_type: "refresh_token", - client_id: CLIENT_ID, - refresh_token: refreshToken, - }), - signal: AbortSignal.timeout(30_000), - }); - - if (!response.ok) { - const error = await response.text(); - throw new Error(`Anthropic token refresh failed: ${error}`); - } - - const data = (await response.json()) as { - access_token: string; - refresh_token: string; - expires_in: number; - }; - - return { - refresh: data.refresh_token, - access: data.access_token, - expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000, - }; -} - -export const anthropicOAuthProvider: OAuthProviderInterface = { - id: "anthropic", - name: "Anthropic (Claude Pro/Max)", - - async login(callbacks: OAuthLoginCallbacks): Promise { - return loginAnthropic( - (url) => callbacks.onAuth({ url }), - () => callbacks.onPrompt({ message: "Paste the authorization code:" }), - ); - }, - - async refreshToken(credentials: OAuthCredentials): Promise { - return refreshAnthropicToken(credentials.refresh); - }, - - getApiKey(credentials: OAuthCredentials): string { - return credentials.access; - }, -}; diff --git a/packages/pi-ai/src/utils/oauth/github-copilot.test.ts b/packages/pi-ai/src/utils/oauth/github-copilot.test.ts new file mode 100644 index 000000000..fabe2c09f --- /dev/null +++ b/packages/pi-ai/src/utils/oauth/github-copilot.test.ts @@ -0,0 +1,71 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import type { Api, Model } from "../../types.js"; +import type { OAuthCredentials } from "./index.js"; +import { githubCopilotOAuthProvider } from "./github-copilot.js"; + +function makeModel(provider: string, id: string): Model { + return { + id, + name: id, + api: "openai-completions", + provider, + baseUrl: `${provider}:`, + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 16384, + }; +} + +function makeCredentials(overrides: Partial }> = {}) { + return { + type: "oauth" as const, + access: "copilot-token", + refresh: "refresh-token", + expires: Date.now() + 60_000, + ...overrides, + }; +} + +test("githubCopilotOAuthProvider.modifyModels filters unavailable copilot models (#3849)", () => { + const models = [ + makeModel("github-copilot", "gpt-5"), + makeModel("github-copilot", "claude-sonnet-4"), + makeModel("openai", "gpt-4.1"), + ]; + + assert.ok(githubCopilotOAuthProvider.modifyModels, "github copilot provider should expose modifyModels"); + const modified = githubCopilotOAuthProvider.modifyModels(models, makeCredentials({ + modelLimits: { + "gpt-5": { contextWindow: 256000, maxTokens: 32000 }, + }, + })); + + assert.deepEqual( + modified.map((model) => `${model.provider}/${model.id}`), + ["github-copilot/gpt-5", "openai/gpt-4.1"], + ); + + const copilotModel = modified.find((model) => model.provider === "github-copilot" && model.id === "gpt-5"); + assert.ok(copilotModel, "available copilot model should remain"); + assert.equal(copilotModel.contextWindow, 256000); + assert.equal(copilotModel.maxTokens, 32000); + assert.match(copilotModel.baseUrl, /githubcopilot\.com/); +}); + +test("githubCopilotOAuthProvider.modifyModels keeps all copilot models when limits are unavailable", () => { + const models = [ + makeModel("github-copilot", "gpt-5"), + makeModel("github-copilot", "claude-sonnet-4"), + ]; + + assert.ok(githubCopilotOAuthProvider.modifyModels, "github copilot provider should expose modifyModels"); + const modified = githubCopilotOAuthProvider.modifyModels(models, makeCredentials()); + + assert.equal(modified.length, 2, "lack of limits should not hide every copilot model"); + assert.ok(modified.every((model) => model.provider === "github-copilot")); + assert.ok(modified.every((model) => model.baseUrl.includes("githubcopilot.com"))); +}); diff --git a/packages/pi-ai/src/utils/oauth/github-copilot.ts b/packages/pi-ai/src/utils/oauth/github-copilot.ts index eae8e9a5f..6e01295c4 100644 --- a/packages/pi-ai/src/utils/oauth/github-copilot.ts +++ b/packages/pi-ai/src/utils/oauth/github-copilot.ts @@ -441,8 +441,11 @@ export const githubCopilotOAuthProvider: OAuthProviderInterface = { const domain = creds.enterpriseUrl ? (normalizeDomain(creds.enterpriseUrl) ?? undefined) : undefined; const baseUrl = getGitHubCopilotBaseUrl(creds.access, domain); const limits = creds.modelLimits; - return models.map((m) => { + const availableModelIds = limits ? new Set(Object.keys(limits)) : null; + const shouldFilterByAvailability = !!availableModelIds && availableModelIds.size > 0; + return models.flatMap((m) => { if (m.provider !== "github-copilot") return m; + if (shouldFilterByAvailability && !availableModelIds.has(m.id)) return []; const modelLimits = limits?.[m.id]; return { ...m, diff --git a/packages/pi-ai/src/utils/oauth/index.ts b/packages/pi-ai/src/utils/oauth/index.ts index a91decf4a..715b4910c 100644 --- a/packages/pi-ai/src/utils/oauth/index.ts +++ b/packages/pi-ai/src/utils/oauth/index.ts @@ -3,14 +3,14 @@ * * This module handles login, token refresh, and credential storage * for OAuth-based providers: - * - Anthropic (Claude Pro/Max) * - GitHub Copilot * - Google Cloud Code Assist (Gemini CLI) * - Antigravity (Gemini 3, Claude, GPT-OSS via Google Cloud) + * + * Note: Anthropic OAuth was removed per TOS compliance (see docs/user-docs/claude-code-auth-compliance.md). + * Use API keys or the local Claude Code CLI for Anthropic access. */ -// Anthropic -export { anthropicOAuthProvider, loginAnthropic, refreshAnthropicToken } from "./anthropic.js"; // GitHub Copilot export { getGitHubCopilotBaseUrl, @@ -32,7 +32,6 @@ export * from "./types.js"; // Provider Registry // ============================================================================ -import { anthropicOAuthProvider } from "./anthropic.js"; import { githubCopilotOAuthProvider } from "./github-copilot.js"; import { antigravityOAuthProvider } from "./google-antigravity.js"; import { geminiCliOAuthProvider } from "./google-gemini-cli.js"; @@ -40,7 +39,6 @@ import { openaiCodexOAuthProvider } from "./openai-codex.js"; import type { OAuthCredentials, OAuthProviderId, OAuthProviderInterface } from "./types.js"; const BUILT_IN_OAUTH_PROVIDERS: OAuthProviderInterface[] = [ - anthropicOAuthProvider, githubCopilotOAuthProvider, geminiCliOAuthProvider, antigravityOAuthProvider, diff --git a/packages/pi-coding-agent/package.json b/packages/pi-coding-agent/package.json index 0fa7e909f..0329f69b6 100644 --- a/packages/pi-coding-agent/package.json +++ b/packages/pi-coding-agent/package.json @@ -1,6 +1,6 @@ { "name": "@gsd/pi-coding-agent", - "version": "2.68.0", + "version": "2.73.0", "description": "Coding agent CLI (vendored from pi-mono)", "type": "module", "piConfig": { diff --git a/packages/pi-coding-agent/src/core/agent-session-renderable-tools.test.ts b/packages/pi-coding-agent/src/core/agent-session-renderable-tools.test.ts new file mode 100644 index 000000000..51f7ae7fc --- /dev/null +++ b/packages/pi-coding-agent/src/core/agent-session-renderable-tools.test.ts @@ -0,0 +1,70 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, it } from "node:test"; + +import { Agent } from "@gsd/pi-agent-core"; +import { Type } from "@sinclair/typebox"; +import type { ToolDefinition } from "./extensions/types.js"; +import { AgentSession } from "./agent-session.js"; +import { AuthStorage } from "./auth-storage.js"; +import { ModelRegistry } from "./model-registry.js"; +import { DefaultResourceLoader } from "./resource-loader.js"; +import { SessionManager } from "./session-manager.js"; +import { SettingsManager } from "./settings-manager.js"; + +let testDir: string; + +async function createSession() { + const agentDir = join(testDir, "agent-home"); + const authStorage = AuthStorage.inMemory({}); + const modelRegistry = new ModelRegistry(authStorage, join(agentDir, "models.json")); + const settingsManager = SettingsManager.inMemory(); + const resourceLoader = new DefaultResourceLoader({ + cwd: testDir, + agentDir, + settingsManager, + noExtensions: true, + noPromptTemplates: true, + noThemes: true, + }); + await resourceLoader.reload(); + + return new AgentSession({ + agent: new Agent(), + sessionManager: SessionManager.inMemory(testDir), + settingsManager, + cwd: testDir, + resourceLoader, + modelRegistry, + }); +} + +describe("AgentSession renderable tool lookup", () => { + beforeEach(() => { + testDir = mkdtempSync(join(tmpdir(), "agent-session-tools-")); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it("matches registered tool definitions case-insensitively (#3780)", async () => { + const session = await createSession(); + const bashDefinition = { + name: "bash", + label: "bash", + description: "Execute a shell command", + parameters: Type.Object({}), + execute: async () => ({ content: [], details: undefined }), + } satisfies ToolDefinition; + + (session as any)._extensionRunner = { + getAllRegisteredTools: () => [{ definition: bashDefinition }], + }; + + assert.equal(session.getRenderableToolDefinition("Bash"), bashDefinition); + assert.equal(session.getRenderableToolDefinition("BASH"), bashDefinition); + }); +}); diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts index 782ecd04e..f5d6c3c35 100644 --- a/packages/pi-coding-agent/src/core/agent-session.ts +++ b/packages/pi-coding-agent/src/core/agent-session.ts @@ -1275,8 +1275,9 @@ export class AgentSession { } getRenderableToolDefinition(toolName: string): ToolDefinition | undefined { + const normalizedToolName = toolName.toLowerCase(); return [...this._getBuiltinToolDefinitions(), ...this._getRegisteredToolDefinitions()].find( - (tool) => tool.name === toolName, + (tool) => tool.name.toLowerCase() === normalizedToolName, ); } diff --git a/packages/pi-coding-agent/src/core/auth-storage.test.ts b/packages/pi-coding-agent/src/core/auth-storage.test.ts index a0d2cab20..646162f2b 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.test.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.test.ts @@ -423,3 +423,149 @@ describe("AuthStorage — getAll()", () => { assert.equal((all["openai"] as any).key, "sk-openai"); }); }); + +// ─── getEarliestBackoffExpiry ───────────────────────────────────────────────── + +describe("AuthStorage — getEarliestBackoffExpiry", () => { + it("returns undefined when no credentials are configured for the provider", () => { + const storage = inMemory({}); + assert.equal(storage.getEarliestBackoffExpiry("anthropic"), undefined); + }); + + it("returns undefined when credentials exist but none are backed off", () => { + const storage = inMemory({ anthropic: makeKey("sk-only") }); + // No markUsageLimitReached call — credentialBackoff map is empty + assert.equal(storage.getEarliestBackoffExpiry("anthropic"), undefined); + }); + + it("returns a future timestamp when a single credential is backed off", async () => { + const storage = inMemory({ anthropic: makeKey("sk-only") }); + await storage.getApiKey("anthropic"); + storage.markUsageLimitReached("anthropic"); + + const expiry = storage.getEarliestBackoffExpiry("anthropic"); + assert.ok(expiry !== undefined, "should return a timestamp"); + assert.ok(expiry > Date.now(), "expiry should be in the future"); + }); + + it("returns the earliest expiry when multiple credentials are backed off", async () => { + const storage = inMemory({ + anthropic: [makeKey("sk-1"), makeKey("sk-2")], + }); + + // Back off both credentials with the default rate_limit backoff (30 s) + await storage.getApiKey("anthropic"); // uses index 0 + storage.markUsageLimitReached("anthropic"); // backs off index 0 + await storage.getApiKey("anthropic"); // uses index 1 + storage.markUsageLimitReached("anthropic"); // backs off index 1 + + const expiry = storage.getEarliestBackoffExpiry("anthropic"); + assert.ok(expiry !== undefined, "should return a timestamp"); + assert.ok(expiry > Date.now(), "expiry should be in the future"); + }); + + it("returns undefined after backed-off credentials expire (cleans up entries)", () => { + // Manually inject an already-expired backoff entry so we can test + // the cleanup path without actually waiting 30 seconds. + const storage = inMemory({ anthropic: makeKey("sk-only") }); + + // Access private credentialBackoff map via type assertion to inject expired entry + const credentialBackoff: Map> = + (storage as any).credentialBackoff; + const providerMap = new Map(); + // expiresAt in the past + providerMap.set(0, Date.now() - 1_000); + credentialBackoff.set("anthropic", providerMap); + + // getEarliestBackoffExpiry should clean up the expired entry and return undefined + const expiry = storage.getEarliestBackoffExpiry("anthropic"); + assert.equal(expiry, undefined); + + // Confirm the expired entry was removed from the map + assert.equal(providerMap.size, 0, "expired entry should have been deleted"); + }); + + it("returns undefined when provider is not in credentialBackoff map at all", () => { + const storage = inMemory({ openai: makeKey("sk-openai") }); + // anthropic has no backoff map entry at all + assert.equal(storage.getEarliestBackoffExpiry("anthropic"), undefined); + }); + + it("only returns expiry for the requested provider, not other providers", async () => { + const storage = inMemory({ + anthropic: makeKey("sk-ant"), + openai: makeKey("sk-oai"), + }); + + // Back off anthropic + await storage.getApiKey("anthropic"); + storage.markUsageLimitReached("anthropic"); + + // openai is not backed off + assert.equal(storage.getEarliestBackoffExpiry("openai"), undefined); + + // anthropic is backed off + const expiry = storage.getEarliestBackoffExpiry("anthropic"); + assert.ok(expiry !== undefined); + assert.ok(expiry > Date.now()); + }); + + it("returns the minimum expiry when one credential expires sooner than another", () => { + const storage = inMemory({ + anthropic: [makeKey("sk-1"), makeKey("sk-2")], + }); + + const now = Date.now(); + const nearExpiry = now + 5_000; // expires in 5 s + const farExpiry = now + 30_000; // expires in 30 s + + // Inject two different backoff expiries manually + const credentialBackoff: Map> = + (storage as any).credentialBackoff; + const providerMap = new Map(); + providerMap.set(0, nearExpiry); + providerMap.set(1, farExpiry); + credentialBackoff.set("anthropic", providerMap); + + const expiry = storage.getEarliestBackoffExpiry("anthropic"); + assert.equal(expiry, nearExpiry, "should return the nearest (smallest) expiry"); + }); +}); + +// ─── localhost baseUrl shortcut ──────────────────────────────────────────────── + +describe("AuthStorage — localhost baseUrl shortcut", () => { + it("returns 'local-no-key-needed' for localhost provider with no configured key", async () => { + const storage = inMemory({}); + const key = await storage.getApiKey("ollama", undefined, { baseUrl: "http://localhost:11434" }); + assert.equal(key, "local-no-key-needed"); + }); + + it("returns 'local-no-key-needed' for 127.0.0.1 provider with no configured key", async () => { + const storage = inMemory({}); + const key = await storage.getApiKey("custom", undefined, { baseUrl: "http://127.0.0.1:8080/v1" }); + assert.equal(key, "local-no-key-needed"); + }); + + it("returns configured key from fallback resolver for localhost custom provider (#4106)", async () => { + // Regression test: compaction called getApiKey(model) where model.baseUrl is localhost. + // The localhost shortcut must NOT override an explicitly configured apiKey from models.json. + const storage = inMemory({}); + storage.setFallbackResolver((provider) => + provider === "cliproxy" ? "sk-real-proxy-key" : undefined, + ); + + const key = await storage.getApiKey("cliproxy", undefined, { baseUrl: "http://localhost:8317/v1" }); + assert.equal(key, "sk-real-proxy-key"); + }); + + it("returns configured key from fallback resolver when baseUrl uses 127.0.0.1 (#4106)", async () => { + const storage = inMemory({}); + storage.setFallbackResolver((provider) => + provider === "myproxy" ? "sk-myproxy-key" : undefined, + ); + + const key = await storage.getApiKey("myproxy", undefined, { baseUrl: "http://127.0.0.1:9000/v1" }); + assert.equal(key, "sk-myproxy-key"); + }); +}); diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index fb1532252..c604fc801 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -559,6 +559,36 @@ export class AuthStorage { return remaining; } + /** + * Get the earliest timestamp at which any credential for this provider + * will become available again. Returns `undefined` when no credentials + * are backed off (i.e. all are immediately available). + * + * Callers can use this to sleep exactly long enough for the cooldown to + * clear instead of using a fixed retry delay that may be shorter than the + * backoff window. + */ + getEarliestBackoffExpiry(provider: string): number | undefined { + const providerMap = this.credentialBackoff.get(provider); + if (!providerMap || providerMap.size === 0) return undefined; + + const now = Date.now(); + let earliest: number | undefined; + + for (const [index, expiresAt] of providerMap) { + if (expiresAt <= now) { + // Already expired — clean up + providerMap.delete(index); + continue; + } + if (earliest === undefined || expiresAt < earliest) { + earliest = expiresAt; + } + } + + return earliest; + } + /** * Check if a credential index is currently backed off. */ @@ -789,7 +819,7 @@ export class AuthStorage { */ async getApiKey(providerId: string, sessionId?: string, options?: { baseUrl?: string }): Promise { // If the model has a local baseUrl, return a dummy key to avoid auth blocking - if (options?.baseUrl) { + if (options?.baseUrl && !this.fallbackResolver?.(providerId)) { try { const hostname = new URL(options.baseUrl).hostname; if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "0.0.0.0" || hostname === "::1") { diff --git a/packages/pi-coding-agent/src/core/chat-controller-ordering.test.ts b/packages/pi-coding-agent/src/core/chat-controller-ordering.test.ts new file mode 100644 index 000000000..eb7795508 --- /dev/null +++ b/packages/pi-coding-agent/src/core/chat-controller-ordering.test.ts @@ -0,0 +1,468 @@ +import assert from "node:assert/strict"; +import { test } from "node:test"; + +import { handleAgentEvent } from "../modes/interactive/controllers/chat-controller.js"; + +function makeUsage() { + return { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }; +} + +function makeAssistant(content: any[]) { + return { + role: "assistant", + content, + api: "anthropic-messages", + provider: "claude-code", + model: "claude-sonnet-4", + usage: makeUsage(), + stopReason: "stop", + timestamp: Date.now(), + }; +} + +function createHost() { + const chatContainer = { + children: [] as any[], + addChild(component: any) { + this.children.push(component); + }, + removeChild(component: any) { + const idx = this.children.indexOf(component); + if (idx !== -1) this.children.splice(idx, 1); + }, + clear() { + this.children = []; + }, + }; + + const pinnedMessageContainer = { + children: [] as any[], + addChild(component: any) { + this.children.push(component); + }, + removeChild(component: any) { + const idx = this.children.indexOf(component); + if (idx !== -1) this.children.splice(idx, 1); + }, + clear() { + this.children = []; + }, + }; + + const host: any = { + isInitialized: true, + init: async () => {}, + defaultEditor: { onEscape: undefined }, + editor: {}, + session: { retryAttempt: 0, abortCompaction: () => {}, abortRetry: () => {} }, + ui: { requestRender: () => {}, terminal: { rows: 50 } }, + footer: { invalidate: () => {} }, + keybindings: {}, + statusContainer: { clear: () => {}, addChild: () => {} }, + chatContainer, + settingsManager: { getTimestampFormat: () => "date-time-iso", getShowImages: () => false }, + pendingTools: new Map(), + toolOutputExpanded: false, + hideThinkingBlock: false, + isBashMode: false, + defaultWorkingMessage: "Working...", + compactionQueuedMessages: [], + editorContainer: {}, + pendingMessagesContainer: { clear: () => {} }, + pinnedMessageContainer, + addMessageToChat: () => {}, + getMarkdownThemeWithSettings: () => ({}), + formatWebSearchResult: () => "", + getRegisteredToolDefinition: () => undefined, + checkShutdownRequested: async () => {}, + rebuildChatFromMessages: () => {}, + flushCompactionQueue: async () => {}, + showStatus: () => {}, + showError: () => {}, + updatePendingMessagesDisplay: () => {}, + updateTerminalTitle: () => {}, + updateEditorBorderColor: () => {}, + }; + + return host; +} + +test("chat-controller keeps tool output ahead of delayed assistant text for external tool streams", async () => { + // ToolExecutionComponent uses the global theme singleton. + // Install a minimal no-op theme implementation for this unit test. + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolId = "mcp-tool-1"; + const toolCall = { + type: "toolCall", + id: toolId, + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + assert.equal(host.streamingComponent, undefined, "assistant component should be deferred at message_start"); + assert.equal(host.chatContainer.children.length, 0, "nothing should render before content arrives"); + + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([toolCall]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 0, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "tool output" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([toolCall]), + }, + } as any, + ); + + assert.equal(host.streamingComponent, undefined, "assistant text container should remain deferred for tool-only updates"); + assert.equal(host.chatContainer.children.length, 1, "tool execution block should render immediately"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + + // Re-assert required host method before the text-bearing update path. + host.getMarkdownThemeWithSettings = () => ({}); + + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([toolCall, { type: "text", text: "done" }]), + assistantMessageEvent: { + type: "text_delta", + contentIndex: 1, + delta: "done", + partial: makeAssistant([toolCall, { type: "text", text: "done" }]), + }, + } as any, + ); + + assert.equal(host.chatContainer.children.length, 2, "assistant content should render after existing tool output"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + assert.equal(host.chatContainer.children[1]?.constructor?.name, "AssistantMessageComponent"); +}); + +test("chat-controller keeps serverToolUse output ahead of assistant text when external results arrive", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolId = "mcp-secure-1"; + const serverToolUse = { + type: "serverToolUse", + id: toolId, + name: "mcp__gsd-workflow__secure_env_collect", + input: { projectDir: "/tmp/project", keys: [{ key: "SECURE_PASSWORD" }], destination: "dotenv" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([serverToolUse]), + assistantMessageEvent: { + type: "server_tool_use", + contentIndex: 0, + partial: makeAssistant([serverToolUse]), + }, + } as any, + ); + + assert.equal(host.streamingComponent, undefined, "assistant content should stay deferred while only tool content streams"); + assert.equal(host.chatContainer.children.length, 1, "server tool block should render immediately"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + + host.getMarkdownThemeWithSettings = () => ({}); + const resultMessage = makeAssistant([ + { + ...serverToolUse, + externalResult: { + content: [{ type: "text", text: "secure_env_collect was cancelled by user." }], + details: {}, + isError: true, + }, + }, + { type: "text", text: "The secure password collection was cancelled." }, + ]); + + await handleAgentEvent( + host, + { + type: "message_update", + message: resultMessage, + assistantMessageEvent: { + type: "server_tool_use", + contentIndex: 0, + partial: resultMessage, + }, + } as any, + ); + + assert.equal(host.chatContainer.children.length, 2, "assistant text should render after existing server tool output"); + assert.equal(host.chatContainer.children[0]?.constructor?.name, "ToolExecutionComponent"); + assert.equal(host.chatContainer.children[1]?.constructor?.name, "AssistantMessageComponent"); +}); + +test("chat-controller pins latest assistant text above editor when tool calls are present", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolId = "tool-pin-1"; + const toolCall = { + type: "toolCall", + id: toolId, + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should be empty at message_start"); + + // Send a message with text followed by a tool call + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([ + { type: "text", text: "Looking at the files now." }, + toolCall, + ]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "file contents" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([{ type: "text", text: "Looking at the files now." }, toolCall]), + }, + } as any, + ); + + // Pinned zone should now have a DynamicBorder and a Markdown component + assert.equal(host.pinnedMessageContainer.children.length, 2, "pinned zone should have border + markdown"); + assert.equal(host.pinnedMessageContainer.children[0]?.constructor?.name, "DynamicBorder"); + assert.equal(host.pinnedMessageContainer.children[1]?.constructor?.name, "Markdown"); +}); + +test("chat-controller clears pinned zone when a new assistant message starts", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolCall = { + type: "toolCall", + id: "tool-clear-1", + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + // Populate the pinned zone + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "ok" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + }, + } as any, + ); + + assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated"); + + // Start a new assistant message — pinned zone should clear + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on new assistant message"); +}); + +test("chat-controller clears pinned zone when the agent turn ends", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolCall = { + type: "toolCall", + id: "tool-clear-on-end-1", + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "ok" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant([{ type: "text", text: "Working on it." }, toolCall]), + }, + } as any, + ); + + assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated before agent_end"); + + await handleAgentEvent(host, { type: "agent_end" } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on agent_end"); +}); + +test("chat-controller clears pinned zone when assistant message ends", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + const toolCall = { + type: "toolCall", + id: "tool-msg-end-1", + name: "exec_command", + arguments: { cmd: "echo hi" }, + }; + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + host.getMarkdownThemeWithSettings = () => ({}); + const msgContent = [{ type: "text", text: "Summary after tools." }, toolCall]; + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant(msgContent), + assistantMessageEvent: { + type: "toolcall_end", + contentIndex: 1, + toolCall: { + ...toolCall, + externalResult: { + content: [{ type: "text", text: "ok" }], + details: {}, + isError: false, + }, + }, + partial: makeAssistant(msgContent), + }, + } as any, + ); + + assert.ok(host.pinnedMessageContainer.children.length > 0, "pinned zone should be populated during streaming"); + + // End the assistant message (e.g. before form elicitation) — pinned zone should clear + await handleAgentEvent(host, { type: "message_end", message: makeAssistant(msgContent) } as any); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should clear on message_end to prevent duplicate display"); +}); + +test("chat-controller does not pin when there are no tool calls", async () => { + (globalThis as any)[Symbol.for("@gsd/pi-coding-agent:theme")] = { + fg: (_key: string, text: string) => text, + bg: (_key: string, text: string) => text, + bold: (text: string) => text, + italic: (text: string) => text, + truncate: (text: string) => text, + }; + + const host = createHost(); + + await handleAgentEvent(host, { type: "message_start", message: makeAssistant([]) } as any); + + host.getMarkdownThemeWithSettings = () => ({}); + await handleAgentEvent( + host, + { + type: "message_update", + message: makeAssistant([{ type: "text", text: "Just some text, no tools." }]), + assistantMessageEvent: { + type: "text_delta", + contentIndex: 0, + delta: "Just some text, no tools.", + partial: makeAssistant([{ type: "text", text: "Just some text, no tools." }]), + }, + } as any, + ); + + assert.equal(host.pinnedMessageContainer.children.length, 0, "pinned zone should stay empty without tool calls"); +}); diff --git a/packages/pi-coding-agent/src/core/extensions/index.ts b/packages/pi-coding-agent/src/core/extensions/index.ts index 70525095a..0438d364b 100644 --- a/packages/pi-coding-agent/src/core/extensions/index.ts +++ b/packages/pi-coding-agent/src/core/extensions/index.ts @@ -43,6 +43,9 @@ export type { BeforeProviderRequestEventResult, // Context CompactOptions, + // Events - Adjust Tool Set (ADR-005) + AdjustToolSetEvent, + AdjustToolSetResult, // Events - Agent ContextEvent, // Event Results @@ -135,6 +138,7 @@ export type { ToolCallEvent, ToolCallEventResult, // Tools + ToolCompatibility, ToolDefinition, // Events - Tool Execution ToolExecutionEndEvent, diff --git a/packages/pi-coding-agent/src/core/extensions/loader.ts b/packages/pi-coding-agent/src/core/extensions/loader.ts index 7e25c837d..016f05448 100644 --- a/packages/pi-coding-agent/src/core/extensions/loader.ts +++ b/packages/pi-coding-agent/src/core/extensions/loader.ts @@ -38,6 +38,7 @@ import type { ExecOptions } from "../exec.js"; import { execCommand } from "../exec.js"; import { getUntrustedExtensionPaths } from "./project-trust.js"; export { isProjectTrusted, trustProject, getUntrustedExtensionPaths } from "./project-trust.js"; +import { registerToolCompatibility } from "../tools/tool-compatibility-registry.js"; import type { Extension, ExtensionAPI, @@ -428,8 +429,9 @@ export function createExtensionRuntime(): ExtensionRuntime { unregisterProvider: (name) => { runtime.pendingProviderRegistrations = runtime.pendingProviderRegistrations.filter((r) => r.name !== name); }, - // Stub replaced by ExtensionRunner at construction time via bindEmitMethods(). + // Stubs replaced by ExtensionRunner at construction time via bindEmitMethods(). emitBeforeModelSelect: async () => undefined, + emitAdjustToolSet: async () => undefined, }; return runtime; @@ -459,6 +461,10 @@ function createExtensionAPI( definition: tool, extensionPath: extension.path, }); + // ADR-005: auto-register tool compatibility metadata + if (tool.compatibility) { + registerToolCompatibility(tool.name, tool.compatibility); + } runtime.refreshTools(); }, @@ -585,6 +591,10 @@ function createExtensionAPI( return runtime.emitBeforeModelSelect(event); }, + async emitAdjustToolSet(event: Omit): Promise { + return runtime.emitAdjustToolSet(event); + }, + events: eventBus, } as ExtensionAPI; diff --git a/packages/pi-coding-agent/src/core/extensions/runner.ts b/packages/pi-coding-agent/src/core/extensions/runner.ts index 048ad534c..0b0f6114b 100644 --- a/packages/pi-coding-agent/src/core/extensions/runner.ts +++ b/packages/pi-coding-agent/src/core/extensions/runner.ts @@ -11,6 +11,8 @@ import type { KeyAction, KeybindingsConfig } from "../keybindings.js"; import type { ModelRegistry } from "../model-registry.js"; import type { SessionManager } from "../session-manager.js"; import type { + AdjustToolSetEvent, + AdjustToolSetResult, BeforeAgentStartEvent, BeforeAgentStartEventResult, BeforeModelSelectEvent, @@ -234,6 +236,7 @@ export class ExtensionRunner { this.modelRegistry = modelRegistry; // Bind emit methods into the shared runtime so createExtensionAPI can delegate to them. this.runtime.emitBeforeModelSelect = (event) => this.emitBeforeModelSelect(event); + this.runtime.emitAdjustToolSet = (event) => this.emitAdjustToolSet(event); } bindCore(actions: ExtensionActions, contextActions: ExtensionContextActions): void { @@ -713,6 +716,21 @@ export class ExtensionRunner { return result; } + async emitAdjustToolSet(event: Omit): Promise { + let result: AdjustToolSetResult | undefined; + await this.invokeHandlers("adjust_tool_set", () => ({ + type: "adjust_tool_set" as const, + ...event, + } satisfies AdjustToolSetEvent), (handlerResult) => { + if (handlerResult) { + result = handlerResult as AdjustToolSetResult; + return { done: true }; // first override wins + } + return { done: false }; + }); + return result; + } + async emitBeforeAgentStart( prompt: string, images: ImageContent[] | undefined, diff --git a/packages/pi-coding-agent/src/core/extensions/types.ts b/packages/pi-coding-agent/src/core/extensions/types.ts index f4c153992..5fea6389a 100644 --- a/packages/pi-coding-agent/src/core/extensions/types.ts +++ b/packages/pi-coding-agent/src/core/extensions/types.ts @@ -88,6 +88,8 @@ export interface ExtensionUIDialogOptions { timeout?: number; /** When true, the user can select multiple options. The return type becomes `string[]`. */ allowMultiple?: boolean; + /** When true, text input dialogs should hide typed characters if supported by the client surface. */ + secure?: boolean; } /** Placement for extension widgets. */ @@ -331,6 +333,19 @@ export interface ToolRenderResultOptions { isPartial: boolean; } +/** + * Tool compatibility metadata for provider-aware tool filtering (ADR-005 Phase 2). + * Tools without compatibility metadata are assumed universally compatible. + */ +export interface ToolCompatibility { + /** Tool produces image content in results (filtered for providers without imageToolResults) */ + producesImages?: boolean; + /** Tool requires schema features that some providers don't support (e.g., ["patternProperties"]) */ + schemaFeatures?: string[]; + /** Tool is effective only with models above a minimum capability threshold */ + minCapabilityTier?: "light" | "standard" | "heavy"; +} + /** * Tool definition for registerTool(). */ @@ -347,6 +362,8 @@ export interface ToolDefinition): void; on(event: "input", handler: ExtensionHandler): void; on(event: "before_model_select", handler: ExtensionHandler): void; + on(event: "adjust_tool_set", handler: ExtensionHandler): void; // ========================================================================= // Event Emission (for host extensions that orchestrate model selection) @@ -1077,6 +1119,9 @@ export interface ExtensionAPI { /** Emit before_model_select event. Returns override model ID or undefined. */ emitBeforeModelSelect(event: Omit): Promise; + /** Emit adjust_tool_set event (ADR-005). Returns override tool names or undefined. */ + emitAdjustToolSet(event: Omit): Promise; + // ========================================================================= // Tool Registration // ========================================================================= @@ -1395,6 +1440,8 @@ export interface ExtensionRuntimeState { unregisterProvider: (name: string) => void; /** Emit before_model_select event to all registered handlers. Bound by ExtensionRunner. */ emitBeforeModelSelect: (event: Omit) => Promise; + /** Emit adjust_tool_set event to all registered handlers. Bound by ExtensionRunner (ADR-005). */ + emitAdjustToolSet: (event: Omit) => Promise; } /** diff --git a/packages/pi-coding-agent/src/core/lsp/config.ts b/packages/pi-coding-agent/src/core/lsp/config.ts index 758657856..29401a363 100644 --- a/packages/pi-coding-agent/src/core/lsp/config.ts +++ b/packages/pi-coding-agent/src/core/lsp/config.ts @@ -172,16 +172,49 @@ export function hasRootMarkers(cwd: string, markers: string[]): boolean { // Local Binary Resolution // ============================================================================= -const LOCAL_BIN_PATHS: Array<{ markers: string[]; binDir: string }> = [ - { markers: ["package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml"], binDir: "node_modules/.bin" }, - { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: ".venv/bin" }, - { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: "venv/bin" }, - { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDir: ".env/bin" }, - { markers: ["Gemfile", "Gemfile.lock"], binDir: "vendor/bundle/bin" }, - { markers: ["Gemfile", "Gemfile.lock"], binDir: "bin" }, - { markers: ["go.mod", "go.sum"], binDir: "bin" }, +const LOCAL_BIN_PATHS: Array<{ markers: string[]; binDirs: string[] }> = [ + { markers: ["package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml"], binDirs: ["node_modules/.bin"] }, + { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: [".venv/bin", ".venv/Scripts"] }, + { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: ["venv/bin", "venv/Scripts"] }, + { markers: ["pyproject.toml", "requirements.txt", "setup.py", "Pipfile"], binDirs: [".env/bin", ".env/Scripts"] }, + { markers: ["Gemfile", "Gemfile.lock"], binDirs: ["vendor/bundle/bin"] }, + { markers: ["Gemfile", "Gemfile.lock"], binDirs: ["bin"] }, + { markers: ["go.mod", "go.sum"], binDirs: ["bin"] }, ]; +function getWindowsBinaryCandidates(command: string): string[] { + const ext = path.extname(command).toLowerCase(); + if (ext) { + return [command]; + } + + return [ + command, + `${command}.cmd`, + `${command}.bat`, + `${command}.exe`, + ]; +} + +export function resolveLocalBinaryPath(command: string, cwd: string, isWindows: boolean): string | null { + for (const { markers, binDirs } of LOCAL_BIN_PATHS) { + if (!hasRootMarkers(cwd, markers)) continue; + + for (const binDir of binDirs) { + const basePath = path.join(cwd, binDir, command); + const candidates = isWindows ? getWindowsBinaryCandidates(basePath) : [basePath]; + + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return candidate; + } + } + } + } + + return null; +} + export function which(command: string): string | null { // On Windows, prefer `where.exe` over `which` — MSYS/Git Bash's `which` // returns POSIX paths (/c/Users/...) that Node's spawn() can't execute. @@ -196,15 +229,8 @@ export function which(command: string): string | null { } export function resolveCommand(command: string, cwd: string): string | null { - for (const { markers, binDir } of LOCAL_BIN_PATHS) { - if (hasRootMarkers(cwd, markers)) { - const localPath = path.join(cwd, binDir, command); - if (fs.existsSync(localPath)) { - return localPath; - } - } - } - + const localPath = resolveLocalBinaryPath(command, cwd, process.platform === "win32"); + if (localPath) return localPath; return which(command); } diff --git a/packages/pi-coding-agent/src/core/model-resolver-initial-model-auth.test.ts b/packages/pi-coding-agent/src/core/model-resolver-initial-model-auth.test.ts new file mode 100644 index 000000000..e7d5fb46a --- /dev/null +++ b/packages/pi-coding-agent/src/core/model-resolver-initial-model-auth.test.ts @@ -0,0 +1,78 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import type { Api, Model } from "@gsd/pi-ai"; +import type { ModelRegistry } from "./model-registry.js"; +import { findInitialModel } from "./model-resolver.js"; + +function makeModel(provider: string, id: string): Model { + return { + id, + name: id, + provider, + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 8192, + } as Model; +} + +function makeRegistry(opts: { + readyProviders?: Set; + byProviderAndId?: Map>; + available?: Model[]; +}): ModelRegistry { + const readyProviders = opts.readyProviders ?? new Set(); + const byProviderAndId = opts.byProviderAndId ?? new Map>(); + const available = opts.available ?? []; + + return { + find: (provider: string, modelId: string) => byProviderAndId.get(`${provider}/${modelId}`), + getAvailable: async () => available, + isProviderRequestReady: (provider: string) => readyProviders.has(provider), + } as unknown as ModelRegistry; +} + +describe("findInitialModel auth gating for saved defaults", () => { + it("uses saved default when provider is request-ready", async () => { + const saved = makeModel("anthropic", "claude-opus-4-6"); + const registry = makeRegistry({ + readyProviders: new Set(["anthropic"]), + byProviderAndId: new Map([[`anthropic/claude-opus-4-6`, saved]]), + available: [saved], + }); + + const result = await findInitialModel({ + scopedModels: [], + isContinuing: false, + defaultProvider: "anthropic", + defaultModelId: "claude-opus-4-6", + modelRegistry: registry, + }); + + assert.equal(result.model?.provider, "anthropic"); + assert.equal(result.model?.id, "claude-opus-4-6"); + }); + + it("skips saved default when provider is not request-ready and falls back to available", async () => { + const staleDefault = makeModel("anthropic", "claude-opus-4-6"); + const fallback = makeModel("openai", "gpt-5.4"); + const registry = makeRegistry({ + readyProviders: new Set(["openai"]), + byProviderAndId: new Map([[`anthropic/claude-opus-4-6`, staleDefault]]), + available: [fallback], + }); + + const result = await findInitialModel({ + scopedModels: [], + isContinuing: false, + defaultProvider: "anthropic", + defaultModelId: "claude-opus-4-6", + modelRegistry: registry, + }); + + assert.equal(result.model?.provider, "openai"); + assert.equal(result.model?.id, "gpt-5.4"); + }); +}); diff --git a/packages/pi-coding-agent/src/core/model-resolver.test.ts b/packages/pi-coding-agent/src/core/model-resolver.test.ts new file mode 100644 index 000000000..d15e93793 --- /dev/null +++ b/packages/pi-coding-agent/src/core/model-resolver.test.ts @@ -0,0 +1,85 @@ +/** + * Regression test for the #unconfigured-models fix: findInitialModel() must + * skip the saved default when its provider has no working auth, rather than + * returning an unusable model that every selector surface would display as + * "current". + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { findInitialModel } from "./model-resolver.js"; + +function fakeRegistry(options: { + models: Array<{ provider: string; id: string }>; + readyProviders: Set; +}) { + const fullModels = options.models.map((m) => ({ + ...m, + name: m.id, + api: "anthropic-messages", + baseUrl: "", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128_000, + maxTokens: 4096, + })); + const available = fullModels.filter((m) => options.readyProviders.has(m.provider)); + return { + find(provider: string, id: string) { + return fullModels.find((m) => m.provider === provider && m.id === id); + }, + getAvailable() { + return available; + }, + isProviderRequestReady(provider: string) { + return options.readyProviders.has(provider); + }, + }; +} + +test("findInitialModel skips saved default when provider has no auth", async () => { + // User saved xai/grok-4 as default, but XAI_API_KEY is unset so xai is + // in the registry but not ready. Previously findInitialModel() step 3 + // returned xai anyway — now it must fall through to step 4 and pick + // an available model. + const registry = fakeRegistry({ + models: [ + { provider: "xai", id: "grok-4-fast-non-reasoning" }, + { provider: "anthropic", id: "claude-opus-4-6" }, + ], + readyProviders: new Set(["anthropic"]), + }); + + const result = await findInitialModel({ + scopedModels: [], + isContinuing: false, + defaultProvider: "xai", + defaultModelId: "grok-4-fast-non-reasoning", + modelRegistry: registry as any, + }); + + assert.ok(result.model, "a model must be returned"); + assert.equal(result.model!.provider, "anthropic", "unauth'd saved default must be skipped"); +}); + +test("findInitialModel keeps saved default when provider has auth", async () => { + const registry = fakeRegistry({ + models: [ + { provider: "anthropic", id: "claude-opus-4-6" }, + { provider: "openai", id: "gpt-5.4" }, + ], + readyProviders: new Set(["anthropic", "openai"]), + }); + + const result = await findInitialModel({ + scopedModels: [], + isContinuing: false, + defaultProvider: "openai", + defaultModelId: "gpt-5.4", + modelRegistry: registry as any, + }); + + assert.equal(result.model?.provider, "openai"); + assert.equal(result.model?.id, "gpt-5.4"); +}); diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts index 5cd324401..df3c8988d 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.test.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts @@ -171,6 +171,25 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => { const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start"); assert.ok(retryStart, "Regular 429 should enter backoff retry"); }); + + it("classifies OpenRouter credit affordability errors as quota_exhausted", async () => { + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("openrouter", "openai/gpt-5-pro"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + "402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.", + ); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, true, "affordability error should trigger credit-aware retry"); + const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start"); + assert.ok(retryStart, "Expected immediate retry after reducing max tokens"); + }); }); describe("long-context model downgrade", () => { @@ -271,6 +290,61 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => { }); }); + describe("credit-aware maxTokens retry", () => { + it("reduces maxTokens on same model when provider reports affordable cap", async () => { + const expensiveModel = createMockModel("openrouter", "openai/gpt-5-pro"); + expensiveModel.maxTokens = 128000; + + const { deps, emittedEvents, onModelChangeFn } = createMockDeps({ + model: expensiveModel, + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + "402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.", + ); + + const result = await handler.handleRetryableError(msg); + assert.equal(result, true, "should retry after reducing maxTokens"); + + const setModelCalls = (deps.agent.setModel as any).mock.calls; + assert.equal(setModelCalls.length, 1, "should apply one model downgrade"); + const downgraded = setModelCalls[0].arguments[0] as Model; + assert.equal(downgraded.provider, "openrouter"); + assert.equal(downgraded.id, "openai/gpt-5-pro"); + assert.equal(downgraded.maxTokens, 297, "expected affordability cap with safety buffer"); + + assert.equal(onModelChangeFn.mock.calls.length, 1, "should notify about model update"); + const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch"); + assert.ok(switchEvent, "should emit model-adjustment event"); + assert.ok( + String(switchEvent?.reason || "").includes("credit-aware retry"), + "switch reason should mention credit-aware retry", + ); + }); + + it("does not mark credentials in cooldown for affordability quota errors", async () => { + const expensiveModel = createMockModel("openrouter", "openai/gpt-5-pro"); + expensiveModel.maxTokens = 128000; + + const { deps, markUsageLimitReached } = createMockDeps({ + model: expensiveModel, + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + "402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.", + ); + + await handler.handleRetryableError(msg); + assert.equal(markUsageLimitReached.mock.calls.length, 0, "quota error should skip credential cooldown"); + }); + }); + describe("isRetryableError", () => { it("considers long-context entitlement error as retryable", () => { const { deps } = createMockDeps(); @@ -291,6 +365,15 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => { ); assert.equal(handler.isRetryableError(msg), false); }); + + it("considers OpenRouter affordability credit errors as retryable", () => { + const { deps } = createMockDeps(); + const handler = new RetryHandler(deps); + const msg = errorMessage( + "402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.", + ); + assert.equal(handler.isRetryableError(msg), true); + }); }); describe("third-party block claude-code fallback (#3772)", () => { diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index 78d12c8ba..399d92fd4 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -116,7 +116,7 @@ export class RetryHandler { // generated error from getApiKey() when credentials are in a backoff window. // Re-entering the retry handler for that message creates a cascade of empty // error entries in the session file, breaking resume (#3429). - return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test( + return /overloaded|rate.?limit|too many requests|402|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|requires more credits|can only afford|insufficient credits|not enough credits|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test( err, ); } @@ -158,6 +158,14 @@ export class RetryHandler { const isRateLimit = errorType === "rate_limit"; const isQuotaError = errorType === "quota_exhausted"; + // Credit-aware retry (OpenRouter-style 402 affordability errors): + // when provider reports "can only afford N", lower maxTokens and retry + // on the same model before rotating credentials/providers. + if (isQuotaError) { + const adjusted = this._tryAffordableMaxTokensRetry(message, retryGeneration); + if (adjusted) return true; + } + // Credential rotation — only for transient rate limits (#3430). // Quota errors ("Extra usage is required") are account-level billing // gates; rotating to another credential on the same account won't help @@ -409,12 +417,63 @@ export class RetryHandler { // Long-context entitlement errors are billing gates, not transient rate limits. // Must be checked before the generic 429/rate_limit regex. if (/extra usage is required|long context required/i.test(err)) return "quota_exhausted"; + if (/requires more credits|can only afford|insufficient credits|not enough credits|credit balance/i.test(err)) + return "quota_exhausted"; if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted"; if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit"; if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error"; return "unknown"; } + /** + * Attempt a same-model retry by reducing maxTokens when provider reports + * an affordability cap (e.g., "can only afford 329"). + */ + private _tryAffordableMaxTokensRetry(message: AssistantMessage, retryGeneration: number): boolean { + const currentModel = this._deps.getModel(); + if (!currentModel || !message.errorMessage) return false; + + // Example: "can only afford 329" + const match = message.errorMessage.match(/can only afford\s+([\d,]+)/i); + if (!match?.[1]) return false; + + const affordable = Number.parseInt(match[1].replace(/,/g, ""), 10); + if (!Number.isFinite(affordable) || affordable <= 0) return false; + + // Leave a small buffer so slight input variance doesn't immediately re-fail. + const safetyBuffer = Math.min(64, Math.max(16, Math.floor(affordable * 0.1))); + const targetMaxTokens = Math.max(64, affordable - safetyBuffer); + const downgradedMaxTokens = Math.min(currentModel.maxTokens, targetMaxTokens); + if (downgradedMaxTokens >= currentModel.maxTokens) return false; + + const downgradedModel = { + ...currentModel, + maxTokens: downgradedMaxTokens, + }; + + this._deps.agent.setModel(downgradedModel); + this._deps.onModelChange(downgradedModel); + this._removeLastAssistantError(); + + this._deps.emit({ + type: "fallback_provider_switch", + from: `${currentModel.provider}/${currentModel.id} (maxTokens=${currentModel.maxTokens})`, + to: `${downgradedModel.provider}/${downgradedModel.id} (maxTokens=${downgradedModel.maxTokens})`, + reason: `credit-aware retry: provider affordable cap ${affordable} tokens`, + }); + + this._deps.emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (reducing max tokens)`, + }); + + this._scheduleContinue(retryGeneration); + return true; + } + /** * Attempt to downgrade a long-context model (e.g. claude-opus-4-6[1m]) to its * base model (claude-opus-4-6) when the account lacks the long-context billing diff --git a/packages/pi-coding-agent/src/core/sdk.test.ts b/packages/pi-coding-agent/src/core/sdk.test.ts new file mode 100644 index 000000000..cebb41490 --- /dev/null +++ b/packages/pi-coding-agent/src/core/sdk.test.ts @@ -0,0 +1,89 @@ +// pi-coding-agent / CredentialCooldownError unit tests +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { CredentialCooldownError } from "./sdk.js"; + +// ─── CredentialCooldownError ────────────────────────────────────────────────── + +describe("CredentialCooldownError", () => { + it("is an instance of Error", () => { + const err = new CredentialCooldownError("anthropic"); + assert.ok(err instanceof Error); + }); + + it("has name set to CredentialCooldownError", () => { + const err = new CredentialCooldownError("anthropic"); + assert.equal(err.name, "CredentialCooldownError"); + }); + + it("has code set to AUTH_COOLDOWN", () => { + const err = new CredentialCooldownError("anthropic"); + assert.equal(err.code, "AUTH_COOLDOWN"); + }); + + it("message includes the provider name", () => { + const err = new CredentialCooldownError("openai"); + assert.ok( + err.message.includes("openai"), + `Expected message to include provider "openai", got: ${err.message}`, + ); + }); + + it("message mentions cooldown window", () => { + const err = new CredentialCooldownError("anthropic"); + assert.ok( + /cooldown window/i.test(err.message), + `Expected message to mention "cooldown window", got: ${err.message}`, + ); + }); + + it("retryAfterMs is undefined when not provided", () => { + const err = new CredentialCooldownError("anthropic"); + assert.equal(err.retryAfterMs, undefined); + }); + + it("retryAfterMs holds the provided value when specified", () => { + const err = new CredentialCooldownError("anthropic", 30_000); + assert.equal(err.retryAfterMs, 30_000); + }); + + it("retryAfterMs is 0 when explicitly passed as 0", () => { + const err = new CredentialCooldownError("anthropic", 0); + assert.equal(err.retryAfterMs, 0); + }); + + it("code property is readonly and always AUTH_COOLDOWN regardless of provider", () => { + for (const provider of ["anthropic", "openai", "google", "openrouter"]) { + const err = new CredentialCooldownError(provider); + assert.equal(err.code, "AUTH_COOLDOWN", `code should be AUTH_COOLDOWN for provider "${provider}"`); + } + }); + + it("different providers produce different messages", () => { + const err1 = new CredentialCooldownError("anthropic"); + const err2 = new CredentialCooldownError("openai"); + assert.notEqual(err1.message, err2.message); + }); + + it("can be caught as an Error in a try/catch", () => { + let caught: unknown; + try { + throw new CredentialCooldownError("anthropic", 5_000); + } catch (e) { + caught = e; + } + assert.ok(caught instanceof Error); + assert.ok(caught instanceof CredentialCooldownError); + assert.equal((caught as CredentialCooldownError).retryAfterMs, 5_000); + }); + + it("code property is detectable via plain object check (cross-process pattern)", () => { + const err = new CredentialCooldownError("anthropic", 15_000); + // Simulate cross-process serialization: only plain properties survive JSON round-trip + const plain = { code: err.code, retryAfterMs: err.retryAfterMs, message: err.message }; + assert.equal(plain.code, "AUTH_COOLDOWN"); + assert.equal(plain.retryAfterMs, 15_000); + }); +}); diff --git a/packages/pi-coding-agent/src/core/sdk.ts b/packages/pi-coding-agent/src/core/sdk.ts index a0c2d943b..1558b10b2 100644 --- a/packages/pi-coding-agent/src/core/sdk.ts +++ b/packages/pi-coding-agent/src/core/sdk.ts @@ -1,4 +1,24 @@ import { join } from "node:path"; + +/** + * Structured error thrown when all credentials for a provider are in a + * backoff window. Carries typed metadata so callers (e.g. the auto-loop) + * can make informed retry decisions instead of string-matching the message. + */ +export class CredentialCooldownError extends Error { + readonly code = "AUTH_COOLDOWN" as const; + /** Milliseconds until the earliest credential becomes available, or undefined if unknown. */ + readonly retryAfterMs: number | undefined; + + constructor(provider: string, retryAfterMs?: number) { + super( + `All credentials for "${provider}" are in a cooldown window. ` + + `Please wait a moment and try again, or switch to a different provider.`, + ); + this.name = "CredentialCooldownError"; + this.retryAfterMs = retryAfterMs; + } +} import { Agent, type AgentMessage, type ThinkingLevel } from "@gsd/pi-agent-core"; import type { Message, Model } from "@gsd/pi-ai"; import { getAgentDir, getDocsPath } from "../config.js"; @@ -199,6 +219,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} time("resourceLoader.reload"); } + // Flush provider registrations queued during extension loading so that + // extension models (e.g. pi-claude-cli) are visible in the registry before + // findInitialModel() runs. bindCore() repeats this flush as a safety net + // for any late-arriving registrations. + const { runtime: extensionRuntime } = resourceLoader.getExtensions(); + for (const { name, config } of extensionRuntime.pendingProviderRegistrations) { + modelRegistry.registerProvider(name, config); + } + extensionRuntime.pendingProviderRegistrations = []; + // Check if session has existing data to restore const existingSession = sessionManager.buildSessionContext(); const hasExistingSession = existingSession.messages.length > 0; @@ -341,6 +371,14 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} thinkingBudgets: settingsManager.getThinkingBudgets(), maxRetryDelayMs: settingsManager.getRetrySettings().maxDelayMs, externalToolExecution: (m) => modelRegistry.getProviderAuthMode(m.provider) === "externalCli", + getProviderOptions: async (currentModel) => { + if (currentModel.provider !== "claude-code") return undefined; + const runner = extensionRunnerRef.current; + if (!runner?.hasUI()) return undefined; + return { + extensionUIContext: runner.getUIContext(), + }; + }, getApiKey: async (provider) => { // Use the provider argument from the in-flight request; // agent.state.model may already be switched mid-turn. @@ -355,8 +393,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} // Retry key resolution with backoff to handle transient network failures // (e.g., OAuth token refresh failing due to brief connectivity loss). + // When credentials are in a cooldown window (e.g., after a 429), wait + // for the backoff to expire instead of using fixed delays that are + // shorter than the cooldown duration. const maxAttempts = 3; const baseDelayMs = 2000; + const maxCooldownWaitMs = 60_000; // Don't wait longer than 60s (skip quota-exhausted 30min backoffs) for (let attempt = 1; attempt <= maxAttempts; attempt++) { const key = await modelRegistry.getApiKeyForProvider(resolvedProvider); if (key) return key; @@ -371,7 +413,21 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} const isOAuth = model && modelRegistry.isUsingOAuth(model); if (!hasAuth && !isOAuth) break; - // Wait with exponential backoff before retrying + // If credentials are in a cooldown window, wait for the earliest + // one to expire rather than using a fixed delay that's too short. + const backoffExpiry = modelRegistry.authStorage.getEarliestBackoffExpiry(resolvedProvider); + if (backoffExpiry !== undefined) { + const waitMs = backoffExpiry - Date.now() + 500; // 500ms buffer + if (waitMs > 0 && waitMs <= maxCooldownWaitMs) { + await new Promise(resolve => setTimeout(resolve, waitMs)); + continue; // Retry immediately after cooldown clears + } + if (waitMs > maxCooldownWaitMs) { + break; // Quota-exhausted or very long backoff — don't block + } + } + + // Standard exponential backoff for non-cooldown transient failures await new Promise(resolve => setTimeout(resolve, baseDelayMs * attempt)); } @@ -382,10 +438,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} // the retry handler and creating cascading error entries (#3429). const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider); if (hasAuth) { - throw new Error( - `All credentials for "${resolvedProvider}" are in a cooldown window. ` + - `Please wait a moment and try again, or switch to a different provider.`, - ); + const expiry = modelRegistry.authStorage.getEarliestBackoffExpiry(resolvedProvider); + const retryAfterMs = expiry !== undefined ? Math.max(0, expiry - Date.now()) : undefined; + throw new CredentialCooldownError(resolvedProvider, retryAfterMs); } const model = agent.state.model; const isOAuth = model && modelRegistry.isUsingOAuth(model); @@ -393,10 +448,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} // If credentials exist but are all in a backoff window (quota / rate-limit), // surface a specific message instead of the misleading "Authentication failed". if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) { - throw new Error( - `All credentials for "${resolvedProvider}" are in a cooldown window. ` + - `Please wait a moment and try again, or switch to a different provider.`, - ); + const expiry = modelRegistry.authStorage.getEarliestBackoffExpiry(resolvedProvider); + const retryAfterMs = expiry !== undefined ? Math.max(0, expiry - Date.now()) : undefined; + throw new CredentialCooldownError(resolvedProvider, retryAfterMs); } throw new Error( `Authentication failed for "${resolvedProvider}". ` + diff --git a/packages/pi-coding-agent/src/core/tools/index.ts b/packages/pi-coding-agent/src/core/tools/index.ts index d54ac2a9c..90a5a524c 100644 --- a/packages/pi-coding-agent/src/core/tools/index.ts +++ b/packages/pi-coding-agent/src/core/tools/index.ts @@ -112,6 +112,13 @@ export { lspTool, } from "../lsp/index.js"; export type { LspServerStatus } from "../lsp/client.js"; +export { + registerToolCompatibility, + getToolCompatibility, + getAllToolCompatibility, + registerMcpToolCompatibility, + resetToolCompatibilityRegistry, +} from "./tool-compatibility-registry.js"; import type { AgentTool } from "@gsd/pi-agent-core"; import { type BashToolOptions, bashTool, createBashTool } from "./bash.js"; diff --git a/packages/pi-coding-agent/src/core/tools/tool-compatibility-registry.ts b/packages/pi-coding-agent/src/core/tools/tool-compatibility-registry.ts new file mode 100644 index 000000000..9e5bea3b5 --- /dev/null +++ b/packages/pi-coding-agent/src/core/tools/tool-compatibility-registry.ts @@ -0,0 +1,83 @@ +// GSD-2 — Tool Compatibility Registry (ADR-005 Phase 2) +// Maps tool names to their provider compatibility metadata. +// Used by the model router to filter tools incompatible with the selected provider. + +import type { ToolCompatibility } from "../extensions/types.js"; + +// ─── Registry State ───────────────────────────────────────────────────────── + +const registry = new Map(); + +// ─── Built-in Tool Compatibility (universally compatible) ─────────────────── +// Built-in tools (bash, read, write, edit, grep, find, ls) produce text-only +// results and use standard JSON Schema — compatible with all providers. + +const BUILTIN_TOOLS: Record = { + bash: {}, + read: {}, + write: {}, + edit: {}, + grep: {}, + find: {}, + ls: {}, + lsp: {}, + hashline_edit: {}, + hashline_read: {}, +}; + +// Pre-populate registry with built-in tools +for (const [name, compat] of Object.entries(BUILTIN_TOOLS)) { + registry.set(name, compat); +} + +// ─── MCP Tool Defaults ───────────────────────────────────────────────────── +// MCP tools may use complex schemas. Default to cautious compatibility. + +const MCP_TOOL_DEFAULTS: ToolCompatibility = { + schemaFeatures: ["patternProperties"], +}; + +// ─── Public API ───────────────────────────────────────────────────────────── + +/** + * Register compatibility metadata for a tool. + * Called automatically by registerTool() for extension tools that include + * compatibility metadata in their ToolDefinition. + */ +export function registerToolCompatibility(toolName: string, compatibility: ToolCompatibility): void { + registry.set(toolName, compatibility); +} + +/** + * Get compatibility metadata for a tool. + * Returns undefined for unknown tools (treated as universally compatible + * per ADR-005 principle: "fail open, don't restrict without data"). + */ +export function getToolCompatibility(toolName: string): ToolCompatibility | undefined { + return registry.get(toolName); +} + +/** + * Get all registered tool compatibility entries. + */ +export function getAllToolCompatibility(): ReadonlyMap { + return registry; +} + +/** + * Register an MCP tool with default cautious compatibility. + * MCP tools may use complex schemas that some providers don't support. + */ +export function registerMcpToolCompatibility(toolName: string, overrides?: Partial): void { + registry.set(toolName, { ...MCP_TOOL_DEFAULTS, ...overrides }); +} + +/** + * Clear all non-builtin entries (for testing). + */ +export function resetToolCompatibilityRegistry(): void { + registry.clear(); + for (const [name, compat] of Object.entries(BUILTIN_TOOLS)) { + registry.set(name, compat); + } +} diff --git a/packages/pi-coding-agent/src/index.ts b/packages/pi-coding-agent/src/index.ts index 86686caf0..54a20b846 100644 --- a/packages/pi-coding-agent/src/index.ts +++ b/packages/pi-coding-agent/src/index.ts @@ -49,6 +49,8 @@ export { export { createEventBus, type EventBus, type EventBusController } from "./core/event-bus.js"; // Extension system export type { + AdjustToolSetEvent, + AdjustToolSetResult, AgentEndEvent, AgentStartEvent, AgentToolResult, @@ -118,6 +120,7 @@ export type { SlashCommandSource, TerminalInputHandler, ToolCallEvent, + ToolCompatibility, ToolDefinition, ToolInfo, SortResult, @@ -173,6 +176,7 @@ export { DefaultResourceLoader } from "./core/resource-loader.js"; export { type CreateAgentSessionOptions, type CreateAgentSessionResult, + CredentialCooldownError, // Factory createAgentSession, createBashTool, @@ -310,6 +314,12 @@ export { type HashlineReadToolDetails, type HashlineReadToolInput, type HashlineReadToolOptions, + // Tool compatibility registry (ADR-005) + registerToolCompatibility, + getToolCompatibility, + getAllToolCompatibility, + registerMcpToolCompatibility, + resetToolCompatibilityRegistry, } from "./core/tools/index.js"; // Main entry point export { main } from "./main.js"; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/login-dialog.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/login-dialog.test.ts new file mode 100644 index 000000000..1ce0469ff --- /dev/null +++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/login-dialog.test.ts @@ -0,0 +1,24 @@ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { buildAuthUrlPresentation } from "../login-dialog.js"; + +describe("LoginDialogComponent", () => { + test("shows the full OAuth URL when the hyperlink label is truncated", () => { + const presentation = buildAuthUrlPresentation( + "https://auth.example.com/device?code=ABCD-1234&callback=oauth&state=needs-full-visibility", + 52, + ); + + assert.notEqual( + presentation.displayUrl, + "https://auth.example.com/device?code=ABCD-1234&callback=oauth&state=needs-full-visibility", + "narrow terminals should still truncate the hyperlink label", + ); + assert.ok(presentation.fullUrlLines.length > 1, "truncated URLs should expose wrapped full-url lines"); + assert.match(presentation.fullUrlLines[0] ?? "", /https:\/\/auth\.example\.com\/device\?code=ABCD-1234&/); + assert.match( + presentation.fullUrlLines[presentation.fullUrlLines.length - 1] ?? "", + /state=needs-full-visibility/, + ); + }); +}); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts index 9b3123fa5..43fd3f7a5 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/tool-execution.test.ts @@ -27,6 +27,26 @@ function renderTool( return stripAnsi(component.render(120).join("\n")); } +function renderToolCollapsed( + toolName: string, + args: Record, + result?: { + content: Array<{ type: string; text?: string }>; + isError: boolean; + details?: Record; + }, +): string { + const component = new ToolExecutionComponent( + toolName, + args, + {}, + undefined, + { requestRender() {} } as any, + ); + if (result) component.updateResult(result); + return stripAnsi(component.render(120).join("\n")); +} + describe("ToolExecutionComponent", () => { test("renders capitalized Claude Code Bash tool names with bash output instead of generic args JSON", () => { const rendered = renderTool( @@ -51,4 +71,56 @@ describe("ToolExecutionComponent", () => { assert.match(rendered, /hello/); assert.match(rendered, /world/); }); + + test("generic fallback strips mcp____ prefix and shows server·tool title", () => { + const rendered = renderTool( + "mcp__context7__resolve_library_id", + { name: "react" }, + { content: [{ type: "text", text: "react@18.3.1" }], isError: false }, + ); + + assert.match(rendered, /context7\u00b7resolve_library_id/); + assert.doesNotMatch(rendered, /mcp__/); + assert.match(rendered, /name="react"/); + assert.match(rendered, /react@18\.3\.1/); + }); + + test("generic fallback renders compact key=value args for primitive args", () => { + const rendered = renderTool( + "some_unknown_tool", + { count: 3, enabled: true, label: "hello" }, + ); + + assert.match(rendered, /some_unknown_tool/); + assert.match(rendered, /count=3/); + assert.match(rendered, /enabled=true/); + assert.match(rendered, /label="hello"/); + assert.doesNotMatch(rendered, /^\{$/m); + }); + + test("generic fallback truncates long output when collapsed", () => { + const longOutput = Array.from({ length: 25 }, (_, i) => `line ${i + 1}`).join("\n"); + const rendered = renderToolCollapsed( + "mcp__demo__do_thing", + { ok: true }, + { content: [{ type: "text", text: longOutput }], isError: false }, + ); + + assert.match(rendered, /line 1\b/); + assert.match(rendered, /line 10\b/); + assert.doesNotMatch(rendered, /line 20\b/); + assert.match(rendered, /\(15 more lines/); + }); + + test("generic fallback falls back to truncated JSON for complex args", () => { + const rendered = renderTool( + "mcp__demo__nested", + { payload: { nested: { deeply: ["a", "b", "c"] } }, name: "x" }, + ); + + assert.match(rendered, /demo\u00b7nested/); + // Multi-line JSON dump for the complex payload + assert.match(rendered, /"payload"/); + assert.match(rendered, /"nested"/); + }); }); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts index a54298065..5a023afd3 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts @@ -1,8 +1,10 @@ -import type { Component } from "@gsd/pi-tui"; +import type { Component, TUI } from "@gsd/pi-tui"; +import { visibleWidth } from "@gsd/pi-tui"; import { theme } from "../theme/theme.js"; /** * Dynamic border component that adjusts to viewport width. + * Supports an optional animated spinner in the label area. * * Note: When used from extensions loaded via jiti, the global `theme` may be undefined * because jiti creates a separate module cache. Always pass an explicit color @@ -10,11 +12,51 @@ import { theme } from "../theme/theme.js"; */ export class DynamicBorder implements Component { private color: (str: string) => string; + private label?: string; + private spinnerFrames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]; + private spinnerIndex = 0; + private spinnerInterval: NodeJS.Timeout | null = null; + private spinnerColorFn?: (str: string) => string; constructor(color: (str: string) => string = (str) => { try { return theme.fg("border", str); } catch { return str; } - }) { + }, label?: string) { this.color = color; + this.label = label; + } + + setLabel(label: string | undefined): void { + this.label = label; + } + + /** + * Start an animated spinner that prepends to the label. + * The spinner rotates every 80ms and triggers a re-render via the TUI. + */ + startSpinner(ui: TUI, colorFn: (str: string) => string): void { + this.stopSpinner(); + this.spinnerColorFn = colorFn; + this.spinnerIndex = 0; + this.spinnerInterval = setInterval(() => { + this.spinnerIndex = (this.spinnerIndex + 1) % this.spinnerFrames.length; + ui.requestRender(); + }, 80); + ui.requestRender(); + } + + /** + * Stop the spinner animation. The border reverts to a static label. + */ + stopSpinner(): void { + if (this.spinnerInterval) { + clearInterval(this.spinnerInterval); + this.spinnerInterval = null; + } + this.spinnerColorFn = undefined; + } + + get isSpinning(): boolean { + return this.spinnerInterval !== null; } invalidate(): void { @@ -22,6 +64,20 @@ export class DynamicBorder implements Component { } render(width: number): string[] { + const spinnerPrefix = this.spinnerInterval && this.spinnerColorFn + ? this.spinnerColorFn(this.spinnerFrames[this.spinnerIndex]) + " " + : ""; + + if (this.label) { + const labelText = ` ${spinnerPrefix}${this.label} `; + const labelVisible = visibleWidth(labelText); + const leading = "── "; + const remaining = Math.max(0, width - labelVisible - leading.length); + const trailing = "─".repeat(Math.max(1, remaining)); + // Color leading and trailing separately so embedded ANSI in the + // spinner/label doesn't bleed into the trailing dashes. + return [this.color(leading) + labelText + this.color(trailing)]; + } return [this.color("─".repeat(Math.max(1, width)))]; } } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts index 525bcfc06..7634d154f 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts @@ -11,6 +11,7 @@ import { keyHint } from "./keybinding-hints.js"; export interface ExtensionInputOptions { tui?: TUI; timeout?: number; + secure?: boolean; } export class ExtensionInputComponent extends Container implements Focusable { @@ -61,6 +62,7 @@ export class ExtensionInputComponent extends Container implements Focusable { } this.input = new Input(); + this.input.secure = opts?.secure === true; if (placeholder) { this.input.placeholder = placeholder; } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/login-dialog.ts b/packages/pi-coding-agent/src/modes/interactive/components/login-dialog.ts index bf9e8b4ed..0a13465bb 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/login-dialog.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/login-dialog.ts @@ -7,6 +7,27 @@ import { theme } from "../theme/theme.js"; import { DynamicBorder } from "./dynamic-border.js"; import { keyHint } from "./keybinding-hints.js"; +function wrapPlainText(text: string, width: number): string[] { + const lines: string[] = []; + const safeWidth = Math.max(1, width); + for (let idx = 0; idx < text.length; idx += safeWidth) { + lines.push(text.slice(idx, idx + safeWidth)); + } + return lines.length > 0 ? lines : [""]; +} + +export function buildAuthUrlPresentation(url: string, terminalColumns: number): { + displayUrl: string; + fullUrlLines: string[]; +} { + const maxUrlWidth = Math.max(20, terminalColumns - 4); + const displayUrl = truncateToWidth(url, maxUrlWidth); + return { + displayUrl, + fullUrlLines: displayUrl === url ? [] : wrapPlainText(url, maxUrlWidth), + }; +} + /** * Login dialog component - replaces editor during OAuth login flow. * @@ -124,14 +145,21 @@ export class LoginDialogComponent extends Container implements Focusable { // Truncate the visible URL text so it never wraps (which would break // the OSC 8 hyperlink). The full URL is still the link target. - const maxUrlWidth = Math.max(20, this.tui.terminal.columns - 4); - const displayUrl = truncateToWidth(url, maxUrlWidth); + const { displayUrl, fullUrlLines } = buildAuthUrlPresentation(url, this.tui.terminal.columns); const urlLink = `\x1b]8;;${url}\x07${theme.fg("accent", displayUrl)}\x1b]8;;\x07`; this.contentContainer.addChild(new Text(urlLink, 1, 0)); const clickHint = process.platform === "darwin" ? "Cmd+click to open" : "Ctrl+click to open"; this.contentContainer.addChild(new Text(theme.fg("dim", clickHint), 1, 0)); + if (fullUrlLines.length > 0) { + this.contentContainer.addChild(new Spacer(1)); + this.contentContainer.addChild(new Text(theme.fg("dim", "Full URL:"), 1, 0)); + for (const line of fullUrlLines) { + this.contentContainer.addChild(new Text(theme.fg("dim", line), 1, 0)); + } + } + if (instructions) { this.contentContainer.addChild(new Spacer(1)); this.contentContainer.addChild(new Text(theme.fg("warning", instructions), 1, 0)); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts index 9f978ffdf..191cefdca 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts @@ -120,7 +120,12 @@ export class ModelSelectorComponent extends Container implements Focusable { this.settingsManager = settingsManager; this.modelRegistry = modelRegistry; this.scopedModels = scopedModels; - this.scope = scopedModels.length > 0 ? "scoped" : "all"; + // Only land in "scoped" view when at least one scoped model has working + // auth — otherwise the user would see an empty picker (#unconfigured-models). + const hasReadyScopedModel = scopedModels.some((scoped) => + modelRegistry.isProviderRequestReady(scoped.model.provider), + ); + this.scope = hasReadyScopedModel ? "scoped" : "all"; this.onSelectCallback = onSelect; this.onCancelCallback = onCancel; @@ -215,12 +220,16 @@ export class ModelSelectorComponent extends Container implements Focusable { } this.allModels = this.sortModelsWithinProvider(models); + // Scoped models must also be filtered by provider readiness so users + // can't pick a scoped model whose provider has no API key / OAuth. this.scopedModelItems = this.sortModelsWithinProvider( - this.scopedModels.map((scoped) => ({ - provider: scoped.model.provider, - id: scoped.model.id, - model: scoped.model, - })), + this.scopedModels + .filter((scoped) => this.modelRegistry.isProviderRequestReady(scoped.model.provider)) + .map((scoped) => ({ + provider: scoped.model.provider, + id: scoped.model.id, + model: scoped.model, + })), ); this.activeModels = this.scope === "scoped" ? this.scopedModelItems : this.allModels; this.filteredModels = this.activeModels; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts index 4f7bcb641..7e01befbb 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts @@ -51,6 +51,60 @@ function str(value: unknown): string | null { return null; // Invalid type } +/** + * Split a Claude Code MCP tool name (`mcp____`) into its parts. + * Returns null for non-prefixed names. Duplicated from the claude-code-cli + * extension (parseMcpToolName) so this package doesn't have to import across + * the resources/extensions boundary. + */ +function parseMcpToolName(name: string): { server: string; tool: string } | null { + if (!name.startsWith("mcp__")) return null; + const rest = name.slice("mcp__".length); + const delim = rest.indexOf("__"); + if (delim <= 0 || delim === rest.length - 2) return null; + return { server: rest.slice(0, delim), tool: rest.slice(delim + 2) }; +} + +const COMPACT_ARG_VALUE_LIMIT = 60; +const GENERIC_OUTPUT_PREVIEW_LINES = 10; +const GENERIC_ARGS_JSON_PREVIEW_LINES = 10; + +/** + * Format tool args for the generic-renderer fallback. Produces a one-line + * `k=v, k=v` summary when every value is a primitive that fits inline; falls + * back to a truncated JSON dump for structurally complex args. + */ +function formatCompactArgs(args: unknown, expanded: boolean): string { + if (args == null) return ""; + if (typeof args !== "object") return String(args); + + const entries = Object.entries(args as Record); + if (entries.length === 0) return ""; + + const allPrimitive = entries.every(([, value]) => { + const t = typeof value; + if (t === "number" || t === "boolean") return true; + if (t === "string") return (value as string).length <= COMPACT_ARG_VALUE_LIMIT; + return value == null; + }); + + if (allPrimitive) { + return entries + .map(([key, value]) => { + if (typeof value === "string") return `${key}=${JSON.stringify(value)}`; + if (value == null) return `${key}=null`; + return `${key}=${String(value)}`; + }) + .join(", "); + } + + // Complex args: show truncated JSON. + const lines = JSON.stringify(args, null, 2).split("\n"); + const maxLines = expanded ? lines.length : GENERIC_ARGS_JSON_PREVIEW_LINES; + if (lines.length <= maxLines) return lines.join("\n"); + return lines.slice(0, maxLines).join("\n") + "\n..."; +} + export interface ToolExecutionOptions { showImages?: boolean; // default: true (only used if terminal supports images) } @@ -325,6 +379,29 @@ export class ToolExecutionComponent extends Container { this.maybeConvertImagesForKitty(); } + /** + * Finalize a pending tool call as failed/interrupted while preserving any streamed partial output. + */ + completeWithError(message?: string): void { + this.isPartial = false; + if (this.result) { + let content = this.result.content; + if (message) { + const alreadyHasMessage = content.some((block) => block.type === "text" && block.text === message); + if (!alreadyHasMessage) { + content = [...content, { type: "text", text: message }]; + } + } + this.result = { ...this.result, content, isError: true }; + } else { + this.result = { + content: message ? [{ type: "text", text: message }] : [], + isError: true, + }; + } + this.updateDisplay(); + } + /** * Convert non-PNG images to PNG for Kitty graphics protocol. * Kitty requires PNG format (f=100), so JPEG/GIF/WebP won't display. @@ -652,6 +729,12 @@ export class ToolExecutionComponent extends Container { text = `${theme.fg("toolTitle", theme.bold("read"))} ${pathDisplay}`; if (this.result) { + if (this.result.isError) { + const errorText = this.getTextOutput().trim() || "read failed"; + text += `\n\n${theme.fg("error", errorText)}`; + return text; + } + const rawOutput = this.getTextOutput(); // Strip hashline prefixes (e.g. "1#BQ:content") for TUI display const output = rawOutput.replace(/^(\s*)\d+#[ZPMQVRWSNKTXJBYH]{2}:/gm, "$1"); @@ -804,6 +887,12 @@ export class ToolExecutionComponent extends Container { } if (this.result) { + if (this.result.isError) { + const errorText = this.getTextOutput().trim() || "ls failed"; + text += `\n\n${theme.fg("error", errorText)}`; + return text; + } + const output = this.getTextOutput().trim(); if (output) { const lines = output.split("\n"); @@ -846,6 +935,12 @@ export class ToolExecutionComponent extends Container { } if (this.result) { + if (this.result.isError) { + const errorText = this.getTextOutput().trim() || "find failed"; + text += `\n\n${theme.fg("error", errorText)}`; + return text; + } + const output = this.getTextOutput().trim(); if (output) { const lines = output.split("\n"); @@ -892,6 +987,12 @@ export class ToolExecutionComponent extends Container { } if (this.result) { + if (this.result.isError) { + const errorText = this.getTextOutput().trim() || "grep failed"; + text += `\n\n${theme.fg("error", errorText)}`; + return text; + } + const output = this.getTextOutput().trim(); if (output) { const lines = output.split("\n"); @@ -943,19 +1044,37 @@ export class ToolExecutionComponent extends Container { } } } else { - // Generic tool (shouldn't reach here for custom tools) - text = theme.fg("toolTitle", theme.bold(this.toolName)); + // Generic tool / MCP tool without a registered renderer. + // MCP tool names from Claude Code arrive as `mcp____`; + // render the server prefix in muted style so the tool name reads + // cleanly. GSD-registered MCP tools have already had their prefix + // stripped upstream in partial-builder.ts and won't reach this branch. + const parsed = parseMcpToolName(this.toolName); + const displayName = parsed ? parsed.tool : this.toolName; + const serverPrefix = parsed ? theme.fg("muted", `${parsed.server}\u00b7`) : ""; + text = serverPrefix + theme.fg("toolTitle", theme.bold(displayName)); - const contentLines = JSON.stringify(this.args, null, 2).split("\n"); - const maxContentLines = 20; - const truncatedContent = contentLines.slice(0, maxContentLines); - if (contentLines.length > maxContentLines) { - truncatedContent.push("..."); + const argsText = formatCompactArgs(this.args, this.expanded); + if (argsText) { + if (argsText.includes("\n")) { + text += `\n\n${theme.fg("toolOutput", argsText)}`; + } else { + text += " " + theme.fg("toolOutput", argsText); + } } - text += `\n\n${truncatedContent.join("\n")}`; - const output = this.getTextOutput(); - if (output) { - text += `\n${output}`; + + if (this.result) { + const output = this.getTextOutput().trim(); + if (output) { + const lines = output.split("\n"); + const maxLines = this.expanded ? lines.length : GENERIC_OUTPUT_PREVIEW_LINES; + const displayLines = lines.slice(0, maxLines); + const remaining = lines.length - maxLines; + text += `\n\n${displayLines.map((line: string) => theme.fg("toolOutput", line)).join("\n")}`; + if (remaining > 0) { + text += `${theme.fg("muted", `\n... (${remaining} more lines,`)} ${keyHint("expandTools", "to expand")})`; + } + } } } diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.test.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.test.ts new file mode 100644 index 000000000..d667af20d --- /dev/null +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.test.ts @@ -0,0 +1,71 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { findLatestPinnableText } from "./chat-controller.js"; + +test("findLatestPinnableText: empty content returns empty string", () => { + assert.equal(findLatestPinnableText([]), ""); +}); + +test("findLatestPinnableText: no tool calls returns empty string", () => { + const blocks = [ + { type: "text", text: "hello" }, + { type: "text", text: "world" }, + ]; + assert.equal(findLatestPinnableText(blocks), ""); +}); + +test("findLatestPinnableText: returns text preceding a tool call", () => { + const blocks = [ + { type: "text", text: "doing the thing" }, + { type: "toolCall", id: "1", name: "Read" }, + ]; + assert.equal(findLatestPinnableText(blocks), "doing the thing"); +}); + +test("findLatestPinnableText: ignores trailing streaming text after the last tool call (regression: pinned mirror duplicated chat-container tokens)", () => { + const blocks = [ + { type: "text", text: "first prose" }, + { type: "toolCall", id: "1", name: "Read" }, + { type: "text", text: "second prose still streaming" }, + ]; + assert.equal(findLatestPinnableText(blocks), "first prose"); +}); + +test("findLatestPinnableText: with multiple tools, picks text before the most recent tool call", () => { + const blocks = [ + { type: "text", text: "first" }, + { type: "toolCall", id: "1", name: "Read" }, + { type: "text", text: "second" }, + { type: "toolCall", id: "2", name: "Grep" }, + { type: "text", text: "third streaming" }, + ]; + assert.equal(findLatestPinnableText(blocks), "second"); +}); + +test("findLatestPinnableText: treats serverToolUse the same as toolCall", () => { + const blocks = [ + { type: "text", text: "before web search" }, + { type: "serverToolUse", id: "ws1", name: "web_search" }, + { type: "text", text: "answer streaming" }, + ]; + assert.equal(findLatestPinnableText(blocks), "before web search"); +}); + +test("findLatestPinnableText: skips empty/whitespace-only text blocks", () => { + const blocks = [ + { type: "text", text: "real prose" }, + { type: "text", text: " " }, + { type: "text", text: "" }, + { type: "toolCall", id: "1", name: "Read" }, + ]; + assert.equal(findLatestPinnableText(blocks), "real prose"); +}); + +test("findLatestPinnableText: thinking blocks are not pinnable", () => { + const blocks = [ + { type: "thinking", thinking: "internal" }, + { type: "toolCall", id: "1", name: "Read" }, + ]; + assert.equal(findLatestPinnableText(blocks), ""); +}); diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts index d301acd12..1fe373f20 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts @@ -1,14 +1,58 @@ -import { Loader, Spacer, Text } from "@gsd/pi-tui"; +import { Loader, Markdown, Spacer, Text } from "@gsd/pi-tui"; import type { InteractiveModeEvent, InteractiveModeStateHost } from "../interactive-mode-state.js"; import { theme } from "../theme/theme.js"; import { AssistantMessageComponent } from "../components/assistant-message.js"; import { ToolExecutionComponent } from "../components/tool-execution.js"; +import { DynamicBorder } from "../components/dynamic-border.js"; import { appKey } from "../components/keybinding-hints.js"; // Tracks the last processed content index to avoid re-scanning all blocks on every message_update let lastProcessedContentIndex = 0; +function hasVisibleAssistantContent(message: { content: Array }): boolean { + return message.content.some( + (c) => + (c.type === "text" && typeof c.text === "string" && c.text.trim().length > 0) + || (c.type === "thinking" && typeof c.thinking === "string" && c.thinking.trim().length > 0), + ); +} + +function hasAssistantToolBlocks(message: { content: Array }): boolean { + return message.content.some((c) => c.type === "toolCall" || c.type === "serverToolUse"); +} + +// Pick the latest non-empty text block that appears strictly before the most +// recent tool call. Text blocks that come after the last tool call are still +// streaming live into the chat container, so mirroring them into the pinned +// "Latest Output" zone would render the same tokens twice. +export function findLatestPinnableText(contentBlocks: Array): string { + let lastToolIdx = -1; + for (let i = contentBlocks.length - 1; i >= 0; i--) { + const c = contentBlocks[i]; + if (c?.type === "toolCall" || c?.type === "serverToolUse") { + lastToolIdx = i; + break; + } + } + for (let i = lastToolIdx - 1; i >= 0; i--) { + const c = contentBlocks[i]; + if (c?.type === "text" && typeof c.text === "string" && c.text.trim()) { + return c.text.trim(); + } + } + return ""; +} + +// Tracks the latest assistant text for the pinned message zone +let lastPinnedText = ""; +// Whether any tool execution has been added in this assistant turn (triggers pinned display) +let hasToolsInTurn = false; +// Reference to the pinned border so we can toggle its label between working/idle +let pinnedBorder: DynamicBorder | undefined; +// Reference to the pinned markdown component below the border +let pinnedTextComponent: Markdown | undefined; + export async function handleAgentEvent(host: InteractiveModeStateHost & { init: () => Promise; getMarkdownThemeWithSettings: () => any; @@ -31,9 +75,15 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.footer.invalidate(); - // Reset content index tracker when a new assistant message starts + // Reset content index tracker and pinned state when a new assistant message starts if (event.type === "message_start" && event.message.role === "assistant") { lastProcessedContentIndex = 0; + lastPinnedText = ""; + hasToolsInTurn = false; + if (pinnedBorder) pinnedBorder.stopSpinner(); + pinnedBorder = undefined; + pinnedTextComponent = undefined; + host.pinnedMessageContainer.clear(); } switch (event.type) { @@ -46,6 +96,12 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.streamingMessage = undefined; host.pendingTools.clear(); host.pendingMessagesContainer.clear(); + host.pinnedMessageContainer.clear(); + lastPinnedText = ""; + hasToolsInTurn = false; + if (pinnedBorder) pinnedBorder.stopSpinner(); + pinnedBorder = undefined; + pinnedTextComponent = undefined; host.compactionQueuedMessages = []; host.rebuildChatFromMessages(); host.updatePendingMessagesDisplay(); @@ -104,45 +160,54 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.updatePendingMessagesDisplay(); host.ui.requestRender(); } else if (event.message.role === "assistant") { - host.streamingComponent = new AssistantMessageComponent( - undefined, - host.hideThinkingBlock, - host.getMarkdownThemeWithSettings(), - host.settingsManager.getTimestampFormat(), - ); host.streamingMessage = event.message; - host.chatContainer.addChild(host.streamingComponent); - host.streamingComponent.updateContent(host.streamingMessage); + // External-tool providers can stream multiple assistant turns through + // one response. Delay component creation until visible assistant text + // arrives so tool outputs keep chronological ordering. host.ui.requestRender(); } break; case "message_update": - if (host.streamingComponent && event.message.role === "assistant") { + if (event.message.role === "assistant") { host.streamingMessage = event.message; - host.streamingComponent.updateContent(host.streamingMessage); - - // When the stream adapter signals a completed tool call with an - // external result (from Claude Code SDK), update the pending - // ToolExecutionComponent immediately so output is visible in - // real-time instead of waiting for the session to end. const innerEvent = event.assistantMessageEvent; + + let externalToolResult: + | { toolCallId: string; content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>; details: Record; isError: boolean } + | undefined; if (innerEvent.type === "toolcall_end" && innerEvent.toolCall) { const tc = innerEvent.toolCall as any; - const externalResult = tc.externalResult; - if (externalResult) { - const component = host.pendingTools.get(tc.id); - if (component) { - component.updateResult({ - content: externalResult.content ?? [{ type: "text", text: "" }], - details: externalResult.details ?? {}, - isError: externalResult.isError ?? false, - }); - } + const ext = tc.externalResult; + if (ext) { + externalToolResult = { + toolCallId: tc.id, + content: ext.content ?? [{ type: "text", text: "" }], + details: ext.details ?? {}, + isError: ext.isError ?? false, + }; + } + } else if (innerEvent.type === "server_tool_use") { + const idx = typeof innerEvent.contentIndex === "number" ? innerEvent.contentIndex : -1; + const block = idx >= 0 ? (host.streamingMessage.content[idx] as any) : undefined; + const ext = block?.externalResult; + if (block?.id && ext) { + externalToolResult = { + toolCallId: block.id, + content: ext.content ?? [{ type: "text", text: "" }], + details: ext.details ?? {}, + isError: ext.isError ?? false, + }; } } const contentBlocks = host.streamingMessage.content; + // Some adapters reuse a single assistant lifecycle while internally + // spanning multiple provider turns. When a new turn starts, content + // length can shrink back to 0/1; reset scan index to avoid skipping. + if (lastProcessedContentIndex >= contentBlocks.length) { + lastProcessedContentIndex = 0; + } for (let i = lastProcessedContentIndex; i < contentBlocks.length; i++) { const content = contentBlocks[i]; if (content.type === "toolCall") { @@ -192,19 +257,100 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { } } } + + // When the stream adapter signals a completed tool call with an + // external result (from Claude Code SDK), update the pending + // ToolExecutionComponent immediately so output is visible in + // real-time instead of waiting for the session to end. + if (externalToolResult) { + const component = host.pendingTools.get(externalToolResult.toolCallId); + if (component) { + component.updateResult({ + content: externalToolResult.content, + details: externalToolResult.details, + isError: externalToolResult.isError, + }); + } + } + + // Render assistant text/thinking after tool components so mixed + // streams keep chronological ordering in the chat container. + const hasToolBlocks = hasAssistantToolBlocks(host.streamingMessage); + if (!host.streamingComponent && hasVisibleAssistantContent(host.streamingMessage)) { + host.streamingComponent = new AssistantMessageComponent( + undefined, + host.hideThinkingBlock, + host.getMarkdownThemeWithSettings(), + host.settingsManager.getTimestampFormat(), + ); + host.chatContainer.addChild(host.streamingComponent); + } + if (host.streamingComponent) { + if (hasToolBlocks) { + host.chatContainer.removeChild(host.streamingComponent); + host.chatContainer.addChild(host.streamingComponent); + } + host.streamingComponent.updateContent(host.streamingMessage); + } + // Update index: fully processed blocks won't need re-scanning. // Keep the last block's index (it may still be accumulating data), // so we re-check it next time but skip all earlier ones. if (contentBlocks.length > 0) { lastProcessedContentIndex = Math.max(0, contentBlocks.length - 1); } + + // Pinned message: mirror the latest assistant text above the editor + // when tool executions push it out of the viewport. + const hasTools = contentBlocks.some( + (c: any) => c.type === "toolCall" || c.type === "serverToolUse", + ); + if (hasTools) hasToolsInTurn = true; + + if (hasToolsInTurn) { + const latestText = findLatestPinnableText(contentBlocks); + + if (latestText && latestText !== lastPinnedText) { + lastPinnedText = latestText; + + if (!pinnedBorder) { + // First time: create border + text component + host.pinnedMessageContainer.clear(); + pinnedBorder = new DynamicBorder( + (str: string) => theme.fg("dim", str), + "Working · Latest Output", + ); + pinnedBorder.startSpinner(host.ui, (str: string) => theme.fg("accent", str)); + host.pinnedMessageContainer.addChild(pinnedBorder); + pinnedTextComponent = new Markdown(latestText, 1, 0, host.getMarkdownThemeWithSettings()); + // Cap pinned content to ~40% of terminal height so tall output + // doesn't exceed the viewport and cause render flashing. + pinnedTextComponent.maxLines = Math.max(3, Math.floor(host.ui.terminal.rows * 0.4)); + host.pinnedMessageContainer.addChild(pinnedTextComponent); + // Hide the separate status loader — the pinned zone replaces it + if (host.loadingAnimation) { + host.loadingAnimation.stop(); + host.loadingAnimation = undefined; + } + host.statusContainer.clear(); + } else { + // Update existing markdown component in-place + pinnedTextComponent?.setText(latestText); + // Refresh maxLines in case terminal was resized + if (pinnedTextComponent) { + pinnedTextComponent.maxLines = Math.max(3, Math.floor(host.ui.terminal.rows * 0.4)); + } + } + } + } + host.ui.requestRender(); } break; case "message_end": if (event.message.role === "user") break; - if (host.streamingComponent && event.message.role === "assistant") { + if (event.message.role === "assistant") { host.streamingMessage = event.message; let errorMessage: string | undefined; if (host.streamingMessage.stopReason === "aborted") { @@ -214,13 +360,36 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { : "Operation aborted"; host.streamingMessage.errorMessage = errorMessage; } - host.streamingComponent.updateContent(host.streamingMessage); + + const shouldRenderAssistant = hasVisibleAssistantContent(host.streamingMessage) + || ( + (host.streamingMessage.stopReason === "aborted" || host.streamingMessage.stopReason === "error") + && !hasAssistantToolBlocks(host.streamingMessage) + ); + if (!host.streamingComponent && shouldRenderAssistant) { + host.streamingComponent = new AssistantMessageComponent( + undefined, + host.hideThinkingBlock, + host.getMarkdownThemeWithSettings(), + host.settingsManager.getTimestampFormat(), + ); + host.chatContainer.addChild(host.streamingComponent); + } + if (host.streamingComponent) { + host.streamingComponent.updateContent(host.streamingMessage); + } + if (host.streamingMessage.stopReason === "aborted" || host.streamingMessage.stopReason === "error") { if (!errorMessage) { errorMessage = host.streamingMessage.errorMessage || "Error"; } - for (const [, component] of host.pendingTools.entries()) { - component.updateResult({ content: [{ type: "text", text: errorMessage }], isError: true }); + const pendingComponents = Array.from(host.pendingTools.values()); + if (pendingComponents.length > 0) { + const [first, ...rest] = pendingComponents; + first.completeWithError(errorMessage); + for (const component of rest) { + component.completeWithError(); + } } host.pendingTools.clear(); } else { @@ -230,6 +399,15 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { } host.streamingComponent = undefined; host.streamingMessage = undefined; + // Clear pinned output once the message is finalized in the chat + // container — prevents duplicate display when the agent continues + // (e.g. form elicitation) after the assistant message ends. + if (pinnedBorder) pinnedBorder.stopSpinner(); + host.pinnedMessageContainer.clear(); + lastPinnedText = ""; + hasToolsInTurn = false; + pinnedBorder = undefined; + pinnedTextComponent = undefined; host.footer.invalidate(); } host.ui.requestRender(); @@ -282,6 +460,16 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.streamingMessage = undefined; } host.pendingTools.clear(); + // Pinned output is only useful while work is actively streaming. + // Keep chat history as the single source after completion. + if (pinnedBorder) { + pinnedBorder.stopSpinner(); + } + host.pinnedMessageContainer.clear(); + lastPinnedText = ""; + hasToolsInTurn = false; + pinnedBorder = undefined; + pinnedTextComponent = undefined; await host.checkShutdownRequested(); host.ui.requestRender(); break; diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/model-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/model-controller.ts index ab6ccf6a9..3e6ae686f 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/model-controller.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/model-controller.ts @@ -52,7 +52,12 @@ export async function findExactModelMatch(host: any, searchTerm: string): Promis export async function getModelCandidates(host: any): Promise[]> { if (host.session.scopedModels.length > 0) { - return host.session.scopedModels.map((scoped: any) => scoped.model); + // Filter scoped models by provider auth readiness so callers like + // findExactModelMatch can't resolve a scoped-but-unconfigured model. + const registry = host.session.modelRegistry; + return host.session.scopedModels + .filter((scoped: any) => registry.isProviderRequestReady(scoped.model.provider)) + .map((scoped: any) => scoped.model); } host.session.modelRegistry.refresh(); diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts index cf91b00b1..bffa82d51 100644 --- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts +++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode-state.ts @@ -9,6 +9,7 @@ export interface InteractiveModeStateHost { keybindings: any; statusContainer: any; chatContainer: any; + pinnedMessageContainer: any; settingsManager: any; pendingTools: Map; toolOutputExpanded: boolean; diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts index 85ba64d39..33a185c04 100644 --- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts @@ -168,6 +168,7 @@ export class InteractiveMode { private chatContainer: Container; private pendingMessagesContainer: Container; private statusContainer: Container; + private pinnedMessageContainer: Container; private defaultEditor: CustomEditor; private editor: EditorComponent; private autocompleteProvider: CombinedAutocompleteProvider | undefined; @@ -285,6 +286,7 @@ export class InteractiveMode { this.chatContainer = new Container(); this.pendingMessagesContainer = new Container(); this.statusContainer = new Container(); + this.pinnedMessageContainer = new Container(); this.widgetContainerAbove = new Container(); this.widgetContainerBelow = new Container(); this.keybindings = KeybindingsManager.create(); @@ -490,6 +492,7 @@ export class InteractiveMode { this.ui.addChild(this.chatContainer); this.ui.addChild(this.pendingMessagesContainer); this.ui.addChild(this.statusContainer); + this.ui.addChild(this.pinnedMessageContainer); this.renderWidgets(); // Initialize with default spacer this.ui.addChild(this.widgetContainerAbove); this.ui.addChild(this.editorContainer); @@ -1396,7 +1399,19 @@ export class InteractiveMode { */ private renderWidgets(): void { if (!this.widgetContainerAbove || !this.widgetContainerBelow) return; - this.renderWidgetContainer(this.widgetContainerAbove, this.extensionWidgetsAbove, true, true); + + // widgetContainerAbove: spacer collapses when pinned content is visible + // so there's no extra blank line between pinned output and the editor border. + this.widgetContainerAbove.clear(); + const pinned = this.pinnedMessageContainer; + this.widgetContainerAbove.addChild({ + render: () => pinned.children.length > 0 ? [] : [""], + invalidate: () => {}, + }); + for (const component of this.extensionWidgetsAbove.values()) { + this.widgetContainerAbove.addChild(component); + } + this.renderWidgetContainer(this.widgetContainerBelow, this.extensionWidgetsBelow, false, false); this.ui.requestRender(); } @@ -1631,7 +1646,7 @@ export class InteractiveMode { this.hideExtensionInput(); resolve(undefined); }, - { tui: this.ui, timeout: opts?.timeout }, + { tui: this.ui, timeout: opts?.timeout, secure: opts?.secure }, ); this.editorContainer.clear(); @@ -1770,7 +1785,7 @@ export class InteractiveMode { } else if (type === "warning") { this.showWarning(message); } else { - this.showStatus(message); + this.showStatus(message, { append: true }); } } @@ -2037,12 +2052,13 @@ export class InteractiveMode { * If multiple status messages are emitted back-to-back (without anything else being added to the chat), * we update the previous status line instead of appending new ones to avoid log spam. */ - private showStatus(message: string): void { + private showStatus(message: string, options?: { append?: boolean }): void { + const append = options?.append ?? false; const children = this.chatContainer.children; const last = children.length > 0 ? children[children.length - 1] : undefined; const secondLast = children.length > 1 ? children[children.length - 2] : undefined; - if (last && secondLast && last === this.lastStatusText && secondLast === this.lastStatusSpacer) { + if (!append && last && secondLast && last === this.lastStatusText && secondLast === this.lastStatusSpacer) { this.lastStatusText.setText(theme.fg("dim", message)); this.ui.requestRender(); return; @@ -2264,6 +2280,7 @@ export class InteractiveMode { updateFooter: true, populateHistory: true, }); + this.populatePinnedFromMessages(context.messages); // Show compaction info if session was compacted const allEntries = this.sessionManager.getEntries(); @@ -2287,6 +2304,54 @@ export class InteractiveMode { this.chatContainer.clear(); const context = this.sessionManager.buildSessionContext(); this.renderSessionContext(context); + this.populatePinnedFromMessages(context.messages); + } + + /** + * After rebuilding chat from messages, pin the last assistant text above the + * editor if tool results would otherwise push it out of the viewport. + */ + private populatePinnedFromMessages(messages: AgentMessage[]): void { + this.pinnedMessageContainer.clear(); + + // Walk backwards to find the last assistant message + let lastAssistant: AssistantMessage | undefined; + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg && "role" in msg && msg.role === "assistant") { + lastAssistant = msg as AssistantMessage; + break; + } + } + if (!lastAssistant) return; + + // Check if any tool calls follow the last text block + const content = lastAssistant.content; + let lastTextIndex = -1; + let hasToolAfterText = false; + for (let i = 0; i < content.length; i++) { + if (content[i].type === "text") lastTextIndex = i; + } + if (lastTextIndex >= 0) { + for (let i = lastTextIndex + 1; i < content.length; i++) { + if (content[i].type === "toolCall" || content[i].type === "serverToolUse") { + hasToolAfterText = true; + break; + } + } + } + if (!hasToolAfterText || lastTextIndex < 0) return; + + const textBlock = content[lastTextIndex] as { type: "text"; text: string }; + const text = textBlock.text?.trim(); + if (!text) return; + + this.pinnedMessageContainer.addChild( + new DynamicBorder((str: string) => theme.fg("dim", str), "Latest Output"), + ); + this.pinnedMessageContainer.addChild( + new Markdown(text, 1, 0, this.getMarkdownThemeWithSettings()), + ); } // ========================================================================= diff --git a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts index c510e63b4..91da276cb 100644 --- a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts +++ b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts @@ -305,11 +305,13 @@ async function handleShareCommand(ctx: SlashCommandContext): Promise { ctx.showStatus("Share cancelled"); }; - try { - const result = await new Promise<{ stdout: string; stderr: string; code: number | null }>((resolve) => { - proc = spawn("gh", ["gist", "create", "--public=false", tmpFile]); - let stdout = ""; - let stderr = ""; + try { + const result = await new Promise<{ stdout: string; stderr: string; code: number | null }>((resolve) => { + proc = spawn("gh", ["gist", "create", "--public=false", tmpFile], { + shell: process.platform === "win32", + }); + let stdout = ""; + let stderr = ""; proc.stdout?.on("data", (data) => { stdout += data.toString(); }); @@ -497,12 +499,14 @@ function handleHotkeysCommand(ctx: SlashCommandContext): void { const suspend = getAppKeyDisplay(ctx.keybindings, "suspend"); const cycleThinkingLevel = getAppKeyDisplay(ctx.keybindings, "cycleThinkingLevel"); const cycleModelForward = getAppKeyDisplay(ctx.keybindings, "cycleModelForward"); + const cycleModelBackward = getAppKeyDisplay(ctx.keybindings, "cycleModelBackward"); const selectModel = getAppKeyDisplay(ctx.keybindings, "selectModel"); const expandTools = getAppKeyDisplay(ctx.keybindings, "expandTools"); const toggleThinking = getAppKeyDisplay(ctx.keybindings, "toggleThinking"); const externalEditor = getAppKeyDisplay(ctx.keybindings, "externalEditor"); const followUp = getAppKeyDisplay(ctx.keybindings, "followUp"); const dequeue = getAppKeyDisplay(ctx.keybindings, "dequeue"); + const pasteImage = getAppKeyDisplay(ctx.keybindings, "pasteImage"); let hotkeys = ` **Navigation** @@ -538,14 +542,14 @@ function handleHotkeysCommand(ctx: SlashCommandContext): void { | \`${exit}\` | Exit (when editor is empty) | | \`${suspend}\` | Suspend to background | | \`${cycleThinkingLevel}\` | Cycle thinking level | -| \`${cycleModelForward}\` | Cycle models | +| \`${cycleModelForward}\` / \`${cycleModelBackward}\` | Cycle models | | \`${selectModel}\` | Open model selector | | \`${expandTools}\` | Toggle tool output expansion | | \`${toggleThinking}\` | Toggle thinking block visibility | | \`${externalEditor}\` | Edit message in external editor | | \`${followUp}\` | Queue follow-up message | | \`${dequeue}\` | Restore queued messages | -| \`Ctrl+V\` | Paste image from clipboard | +| \`${pasteImage}\` | Paste image from clipboard | | \`/\` | Slash commands | | \`!\` | Run bash command | | \`!!\` | Run bash command (excluded from context) | diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts index f2f8fbe4c..7d36e563a 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts @@ -224,7 +224,7 @@ export async function runRpcMode(session: AgentSession): Promise { ), input: (title, placeholder, opts) => - createDialogPromise(opts, undefined, { method: "input", title, placeholder, timeout: opts?.timeout }, (r) => + createDialogPromise(opts, undefined, { method: "input", title, placeholder, timeout: opts?.timeout, secure: opts?.secure }, (r) => "cancelled" in r && r.cancelled ? undefined : "value" in r ? r.value : undefined, ), diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts index 20d5c2c73..d6cd25bfc 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts @@ -291,6 +291,7 @@ export type RpcExtensionUIRequest = title: string; placeholder?: string; timeout?: number; + secure?: boolean; } | { type: "extension_ui_request"; id: string; method: "editor"; title: string; prefill?: string } | { diff --git a/packages/pi-tui/src/components/__tests__/editor.test.ts b/packages/pi-tui/src/components/__tests__/editor.test.ts index 057ed20da..91eb6257b 100644 --- a/packages/pi-tui/src/components/__tests__/editor.test.ts +++ b/packages/pi-tui/src/components/__tests__/editor.test.ts @@ -61,4 +61,22 @@ describe("Editor", () => { assert.ok(rendered.includes(CURSOR_MARKER)); }); + + it("maps kitty keypad digits to plain editor text", () => { + const editor = new Editor(new TUI(makeTerminal()), theme); + editor.focused = true; + + editor.handleInput("\x1b[57404;129u"); + + assert.equal(editor.getText(), "5"); + }); + + it("does not insert kitty keypad navigation private-use glyphs into the editor", () => { + const editor = new Editor(new TUI(makeTerminal()), theme); + editor.focused = true; + + editor.handleInput("\x1b[57419u"); + + assert.equal(editor.getText(), ""); + }); }); diff --git a/packages/pi-tui/src/components/__tests__/input.test.ts b/packages/pi-tui/src/components/__tests__/input.test.ts index c47100492..7ea0fec46 100644 --- a/packages/pi-tui/src/components/__tests__/input.test.ts +++ b/packages/pi-tui/src/components/__tests__/input.test.ts @@ -32,4 +32,33 @@ describe("Input", () => { input.focused = false; assert.equal(input.focused, false); }); + + it("secure mode obscures typed characters in render output", () => { + const input = new Input(); + input.secure = true; + input.focused = true; + input.handleInput("secret123"); + + const line = input.render(40)[0] ?? ""; + assert.ok(!line.includes("secret123"), "rendered line must not expose raw secret text"); + assert.ok(line.includes("*********"), "rendered line should include masked characters"); + }); + + it("maps kitty keypad digits to text instead of inserting private-use glyphs", () => { + const input = new Input(); + input.focused = true; + + input.handleInput("\x1b[57400;129u"); + + assert.equal(input.getValue(), "1"); + }); + + it("ignores kitty keypad navigation keys in text input", () => { + const input = new Input(); + input.focused = true; + + input.handleInput("\x1b[57417u"); + + assert.equal(input.getValue(), ""); + }); }); diff --git a/packages/pi-tui/src/components/__tests__/markdown-maxlines.test.ts b/packages/pi-tui/src/components/__tests__/markdown-maxlines.test.ts new file mode 100644 index 000000000..fb9fbf0bc --- /dev/null +++ b/packages/pi-tui/src/components/__tests__/markdown-maxlines.test.ts @@ -0,0 +1,75 @@ +import assert from "node:assert/strict"; +import { test } from "node:test"; + +import { Markdown, type MarkdownTheme } from "../markdown.js"; + +function noopTheme(): MarkdownTheme { + const identity = (text: string) => text; + return { + heading: identity, + link: identity, + linkUrl: identity, + code: identity, + codeBlock: identity, + codeBlockBorder: identity, + quote: identity, + quoteBorder: identity, + hr: identity, + listBullet: identity, + bold: identity, + italic: identity, + strikethrough: identity, + underline: identity, + }; +} + +test("Markdown renders all lines when maxLines is not set", () => { + const text = "Line 1\n\nLine 2\n\nLine 3\n\nLine 4\n\nLine 5"; + const md = new Markdown(text, 0, 0, noopTheme()); + const lines = md.render(80); + // Each paragraph produces a line + an inter-paragraph blank line + const contentLines = lines.filter((l) => l.trim().length > 0); + assert.ok(contentLines.length >= 5, `expected at least 5 content lines, got ${contentLines.length}`); +}); + +test("Markdown truncates from the top when maxLines is exceeded", () => { + const text = "Line 1\n\nLine 2\n\nLine 3\n\nLine 4\n\nLine 5"; + const md = new Markdown(text, 0, 0, noopTheme()); + md.maxLines = 3; + const lines = md.render(80); + assert.ok(lines.length <= 3, `expected at most 3 lines, got ${lines.length}`); + // First line should be the ellipsis indicator + assert.ok(lines[0].includes("…"), "first line should contain ellipsis indicator"); + assert.ok(lines[0].includes("above"), "first line should mention lines above"); +}); + +test("Markdown preserves most recent content when truncating", () => { + const text = "First paragraph\n\nSecond paragraph\n\nThird paragraph\n\nFourth paragraph\n\nFifth paragraph"; + const md = new Markdown(text, 0, 0, noopTheme()); + md.maxLines = 3; + const lines = md.render(80); + // The last rendered line should contain "Fifth paragraph" (the most recent content) + const lastContentLine = lines.filter((l) => !l.includes("…")).pop() ?? ""; + assert.ok( + lastContentLine.includes("Fifth paragraph"), + `expected last content line to contain "Fifth paragraph", got "${lastContentLine}"`, + ); +}); + +test("Markdown does not truncate when content fits within maxLines", () => { + const text = "Short text"; + const md = new Markdown(text, 0, 0, noopTheme()); + md.maxLines = 10; + const lines = md.render(80); + assert.ok(!lines.some((l) => l.includes("…")), "should not contain ellipsis when content fits"); + assert.ok(lines.some((l) => l.includes("Short text")), "should contain the original text"); +}); + +test("Markdown trims trailing empty lines", () => { + const text = "Some text\n\n"; + const md = new Markdown(text, 0, 0, noopTheme()); + const lines = md.render(80); + // Last line should not be empty (trailing empties are trimmed) + const lastLine = lines[lines.length - 1]; + assert.ok(lastLine.trim().length > 0 || lines.length === 1, "trailing empty lines should be trimmed"); +}); diff --git a/packages/pi-tui/src/components/input.ts b/packages/pi-tui/src/components/input.ts index 627f3557c..78535ab3f 100644 --- a/packages/pi-tui/src/components/input.ts +++ b/packages/pi-tui/src/components/input.ts @@ -21,6 +21,8 @@ export class Input implements Component, Focusable { public onSubmit?: (value: string) => void; public onEscape?: () => void; public placeholder: string = ""; + /** When true, render obscured characters instead of the actual value. */ + public secure: boolean = false; /** Focusable interface - set by TUI when focus changes */ private _focused: boolean = false; @@ -446,6 +448,7 @@ export class Input implements Component, Focusable { // Calculate visible window const prompt = "> "; const availableWidth = width - prompt.length; + const renderValue = this.secure ? "*".repeat(this.value.length) : this.value; if (availableWidth <= 0) { return [prompt]; @@ -466,7 +469,7 @@ export class Input implements Component, Focusable { if (this.value.length < availableWidth) { // Everything fits (leave room for cursor at end) - visibleText = this.value; + visibleText = renderValue; } else { // Need horizontal scrolling // Reserve one character for cursor if it's at the end @@ -501,17 +504,17 @@ export class Input implements Component, Focusable { if (this.cursor < halfWidth) { // Cursor near start - visibleText = this.value.slice(0, findValidEnd(scrollWidth)); + visibleText = renderValue.slice(0, findValidEnd(scrollWidth)); cursorDisplay = this.cursor; } else if (this.cursor > this.value.length - halfWidth) { // Cursor near end const start = findValidStart(this.value.length - scrollWidth); - visibleText = this.value.slice(start); + visibleText = renderValue.slice(start); cursorDisplay = this.cursor - start; } else { // Cursor in middle const start = findValidStart(this.cursor - halfWidth); - visibleText = this.value.slice(start, findValidEnd(start + scrollWidth)); + visibleText = renderValue.slice(start, findValidEnd(start + scrollWidth)); cursorDisplay = halfWidth; } } diff --git a/packages/pi-tui/src/components/markdown.ts b/packages/pi-tui/src/components/markdown.ts index 0920e6b4f..e1d7d454f 100644 --- a/packages/pi-tui/src/components/markdown.ts +++ b/packages/pi-tui/src/components/markdown.ts @@ -58,10 +58,13 @@ export class Markdown implements Component { private defaultTextStyle?: DefaultTextStyle; private theme: MarkdownTheme; private defaultStylePrefix?: string; + /** Maximum rendered lines (excluding padding). When set, content is truncated from the top with an ellipsis indicator so the most recent output remains visible. */ + maxLines?: number; // Cache for rendered output private cachedText?: string; private cachedWidth?: number; + private cachedMaxLines?: number; private cachedLines?: string[]; constructor( @@ -86,12 +89,13 @@ export class Markdown implements Component { invalidate(): void { this.cachedText = undefined; this.cachedWidth = undefined; + this.cachedMaxLines = undefined; this.cachedLines = undefined; } render(width: number): string[] { // Check cache - if (this.cachedLines && this.cachedText === this.text && this.cachedWidth === width) { + if (this.cachedLines && this.cachedText === this.text && this.cachedWidth === width && this.cachedMaxLines === this.maxLines) { return this.cachedLines; } @@ -104,6 +108,7 @@ export class Markdown implements Component { // Update cache this.cachedText = this.text; this.cachedWidth = width; + this.cachedMaxLines = this.maxLines; this.cachedLines = result; return result; } @@ -124,6 +129,12 @@ export class Markdown implements Component { for (let j = 0; j < tokenLines.length; j++) renderedLines.push(tokenLines[j]); } + // Trim trailing empty lines — inter-block spacing at the end just adds + // unwanted whitespace before whatever follows (e.g. pinned output border). + while (renderedLines.length > 0 && renderedLines[renderedLines.length - 1] === "") { + renderedLines.pop(); + } + // Wrap lines (NO padding, NO background yet) const wrappedLines: string[] = []; for (const line of renderedLines) { @@ -143,6 +154,15 @@ export class Markdown implements Component { } } + // Truncate from the top when maxLines is set so the most recent content + // stays visible. This prevents the pinned output zone from exceeding the + // terminal height and causing render flashing. + if (this.maxLines !== undefined && wrappedLines.length > this.maxLines) { + const keep = Math.max(1, this.maxLines - 1); // Reserve one line for the ellipsis indicator + const truncated = wrappedLines.length - keep; + wrappedLines.splice(0, truncated, `… ${truncated} line${truncated !== 1 ? "s" : ""} above`); + } + // Add margins and background to each wrapped line const leftMargin = " ".repeat(this.paddingX); const rightMargin = " ".repeat(this.paddingX); @@ -181,6 +201,7 @@ export class Markdown implements Component { // Update cache this.cachedText = this.text; this.cachedWidth = width; + this.cachedMaxLines = this.maxLines; this.cachedLines = result; return result.length > 0 ? result : [""]; diff --git a/packages/pi-tui/src/keys.ts b/packages/pi-tui/src/keys.ts index eff21579c..952b04462 100644 --- a/packages/pi-tui/src/keys.ts +++ b/packages/pi-tui/src/keys.ts @@ -309,6 +309,28 @@ const CODEPOINTS = { kpEnter: 57414, // Numpad Enter (Kitty protocol) } as const; +const KITTY_PRIVATE_USE_RANGE = { start: 57344, end: 63743 } as const; + +const KITTY_KEYPAD_PRINTABLES = new Map([ + [57399, "0"], // KP_0 + [57400, "1"], // KP_1 + [57401, "2"], // KP_2 + [57402, "3"], // KP_3 + [57403, "4"], // KP_4 + [57404, "5"], // KP_5 + [57405, "6"], // KP_6 + [57406, "7"], // KP_7 + [57407, "8"], // KP_8 + [57408, "9"], // KP_9 + [57409, "."], // KP_DECIMAL + [57410, "/"], // KP_DIVIDE + [57411, "*"], // KP_MULTIPLY + [57412, "-"], // KP_SUBTRACT + [57413, "+"], // KP_ADD + [57415, "="], // KP_EQUAL + [57416, ","], // KP_SEPARATOR +]); + const ARROW_CODEPOINTS = { up: -1, down: -2, @@ -1168,6 +1190,16 @@ export function decodeKittyPrintable(data: string): string | undefined { // Drop control characters or invalid codepoints. if (!Number.isFinite(effectiveCodepoint) || effectiveCodepoint < 32) return undefined; + const keypadPrintable = KITTY_KEYPAD_PRINTABLES.get(effectiveCodepoint); + if (keypadPrintable !== undefined) return keypadPrintable; + + if ( + effectiveCodepoint >= KITTY_PRIVATE_USE_RANGE.start && + effectiveCodepoint <= KITTY_PRIVATE_USE_RANGE.end + ) { + return undefined; + } + try { return String.fromCodePoint(effectiveCodepoint); } catch { diff --git a/pkg/package.json b/pkg/package.json index 154dec5ad..810a3232e 100644 --- a/pkg/package.json +++ b/pkg/package.json @@ -1,6 +1,6 @@ { "name": "@glittercowboy/gsd", - "version": "2.68.0", + "version": "2.73.0", "piConfig": { "name": "gsd", "configDir": ".gsd" diff --git a/scripts/bump-version.mjs b/scripts/bump-version.mjs index 77be226c1..4f7286ceb 100644 --- a/scripts/bump-version.mjs +++ b/scripts/bump-version.mjs @@ -3,7 +3,7 @@ * Bump version in package.json, then sync platform packages and pkg/package.json. * Usage: node scripts/bump-version.mjs */ -import { readFileSync, writeFileSync } from "fs"; +import { readFileSync, writeFileSync, existsSync } from "fs"; import { resolve, dirname } from "path"; import { execSync } from "child_process"; import { fileURLToPath } from "url"; @@ -37,3 +37,15 @@ execSync("node native/scripts/sync-platform-versions.cjs", { cwd: root, stdio: " // 4. Sync pkg/package.json (reads from pi-coding-agent) execSync("node scripts/sync-pkg-version.cjs", { cwd: root, stdio: "inherit" }); + +// 5. Regenerate root package-lock.json to match the new version. +// --package-lock-only updates the lockfile in-place without touching node_modules. +execSync("npm install --package-lock-only --ignore-scripts", { cwd: root, stdio: "inherit" }); +console.log(`[bump-version] package-lock.json regenerated at ${newVersion}`); + +// 6. Regenerate web/package-lock.json if the web app is present. +const webDir = resolve(root, "web"); +if (existsSync(webDir)) { + execSync("npm install --package-lock-only --ignore-scripts", { cwd: webDir, stdio: "inherit" }); + console.log(`[bump-version] web/package-lock.json regenerated`); +} diff --git a/scripts/dev.js b/scripts/dev.js index faf9a75d2..0eea64072 100644 --- a/scripts/dev.js +++ b/scripts/dev.js @@ -11,15 +11,18 @@ import { spawn } from 'node:child_process' import { resolve, dirname } from 'node:path' import { fileURLToPath } from 'node:url' +import { createRequire } from 'node:module' const __dirname = dirname(fileURLToPath(import.meta.url)) const root = resolve(__dirname, '..') +const require = createRequire(import.meta.url) +const tscBin = require.resolve('typescript/bin/tsc') const procs = [ spawn('node', [resolve(__dirname, 'watch-resources.js')], { cwd: root, stdio: 'inherit' }), - spawn(resolve(root, 'node_modules', '.bin', 'tsc'), ['--watch'], { + spawn(process.execPath, [tscBin, '--watch'], { cwd: root, stdio: 'inherit' }) ] diff --git a/scripts/install-hooks.mjs b/scripts/install-hooks.mjs new file mode 100644 index 000000000..dea550585 --- /dev/null +++ b/scripts/install-hooks.mjs @@ -0,0 +1,52 @@ +#!/usr/bin/env node + +import { execFileSync } from 'node:child_process'; +import { chmodSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; + +const MARKER = '# gsd-secret-scan'; + +function git(args) { + return execFileSync('git', args, { + encoding: 'utf8', + shell: process.platform === 'win32', + }).trim(); +} + +const gitDir = git(['rev-parse', '--git-dir']); +const repoRoot = git(['rev-parse', '--show-toplevel']); +const hookDir = join(gitDir, 'hooks'); +const hookFile = join(hookDir, 'pre-commit'); +const hookCommand = `node "${join(repoRoot, 'scripts', 'secret-scan.mjs')}"`; + +mkdirSync(hookDir, { recursive: true }); + +if (existsSync(hookFile)) { + const current = readFileSync(hookFile, 'utf8'); + if (current.includes(MARKER)) { + process.stdout.write('secret-scan pre-commit hook already installed.\n'); + process.exit(0); + } + + const next = `${current.replace(/\s*$/, '\n')}${MARKER}\n${hookCommand}\n`; + writeFileSync(hookFile, next, 'utf8'); + process.stdout.write('secret-scan appended to existing pre-commit hook.\n'); + process.exit(0); +} + +const hookBody = [ + '#!/usr/bin/env sh', + '# gsd-secret-scan', + '# Pre-commit hook: scan staged files for hardcoded secrets', + hookCommand, + '', +].join('\n'); + +writeFileSync(hookFile, hookBody, 'utf8'); +try { + chmodSync(hookFile, 0o755); +} catch { + // Best effort on Windows filesystems that do not honor chmod. +} + +process.stdout.write('secret-scan pre-commit hook installed.\n'); diff --git a/scripts/parallel-monitor.mjs b/scripts/parallel-monitor.mjs index b29109682..e3acd6545 100755 --- a/scripts/parallel-monitor.mjs +++ b/scripts/parallel-monitor.mjs @@ -42,7 +42,7 @@ import fs from 'node:fs'; import path from 'node:path'; -import { execSync } from 'node:child_process'; +import { execSync, spawn, spawnSync } from 'node:child_process'; // ─── Configuration ─────────────────────────────────────────────────────────── @@ -294,7 +294,10 @@ function findGsdLoader() { // 3. Try `which gsd` and resolve symlink try { - const bin = execSync('which gsd', { encoding: 'utf-8', timeout: 3000 }).trim(); + const pathLookup = process.platform === 'win32' ? 'where.exe' : 'which'; + const lookupArgs = ['gsd']; + const result = spawnSync(pathLookup, lookupArgs, { encoding: 'utf-8', timeout: 3000 }); + const bin = result.status === 0 ? result.stdout.trim().split(/\r?\n/)[0]?.trim() : ''; if (bin) { const realBin = fs.realpathSync(bin); const loader = path.resolve(path.dirname(realBin), '..', 'dist', 'loader.js'); @@ -309,7 +312,7 @@ const GSD_LOADER = findGsdLoader(); /** * Respawn a dead worker. Returns the new PID or null on failure. - * Uses nohup + output redirection so the child is fully detached. + * Uses a detached Node child with log file descriptors so the child is fully detached. */ function respawnWorker(mid) { const worktreeDir = path.resolve(PROJECT_ROOT, `.gsd/worktrees/${mid}`); @@ -319,41 +322,37 @@ function respawnWorker(mid) { const stdoutLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stdout.log`); const stderrLog = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.stderr.log`); + let stdoutFd; + let stderrFd; try { - const env = [ - `GSD_MILESTONE_LOCK=${mid}`, - `GSD_PROJECT_ROOT=${PROJECT_ROOT}`, - `GSD_PARALLEL_WORKER=1`, - ].join(' '); - - // Use a shell script written to a temp file to avoid quoting hell - const script = [ - '#!/bin/bash', - `cd "${worktreeDir}"`, - `export GSD_MILESTONE_LOCK=${mid}`, - `export GSD_PROJECT_ROOT="${PROJECT_ROOT}"`, - `export GSD_PARALLEL_WORKER=1`, - `exec node "${GSD_LOADER}" headless --json auto > "${stdoutLog}" 2>> "${stderrLog}"`, - ].join('\n'); - - const scriptPath = path.resolve(PROJECT_ROOT, PARALLEL_DIR, `${mid}.respawn.sh`); - fs.writeFileSync(scriptPath, script, { mode: 0o755 }); - - // Launch detached via nohup - const result = execSync( - `nohup bash "${scriptPath}" > /dev/null 2>&1 & echo $!`, - { timeout: 5000, encoding: 'utf-8', cwd: worktreeDir } - ).trim(); - - // Clean up the temp script after a delay (process already forked) - setTimeout(() => { - try { fs.unlinkSync(scriptPath); } catch {} - }, 5000); - - const newPid = parseInt(result, 10); - return isNaN(newPid) ? null : newPid; + fs.mkdirSync(path.dirname(stdoutLog), { recursive: true }); + stdoutFd = fs.openSync(stdoutLog, 'a'); + stderrFd = fs.openSync(stderrLog, 'a'); + + const child = spawn(process.execPath, [GSD_LOADER, 'headless', '--json', 'auto'], { + cwd: worktreeDir, + detached: true, + env: { + ...process.env, + GSD_MILESTONE_LOCK: mid, + GSD_PROJECT_ROOT: PROJECT_ROOT, + GSD_PARALLEL_WORKER: '1', + }, + stdio: ['ignore', stdoutFd, stderrFd], + windowsHide: true, + }); + + child.unref(); + return child.pid ?? null; } catch (err) { return null; + } finally { + if (stdoutFd !== undefined) { + try { fs.closeSync(stdoutFd); } catch {} + } + if (stderrFd !== undefined) { + try { fs.closeSync(stderrFd); } catch {} + } } } diff --git a/scripts/pr-risk-check.mjs b/scripts/pr-risk-check.mjs index 18c88e02b..94b61f13b 100644 --- a/scripts/pr-risk-check.mjs +++ b/scripts/pr-risk-check.mjs @@ -20,7 +20,7 @@ import { createInterface } from 'readline'; const __dirname = dirname(fileURLToPath(import.meta.url)); const REPO_ROOT = resolve(__dirname, '..'); -const MAP_PATH = resolve(REPO_ROOT, 'docs/FILE-SYSTEM-MAP.md'); +const MAP_PATH = resolve(REPO_ROOT, 'docs/dev/FILE-SYSTEM-MAP.md'); // --------------------------------------------------------------------------- // Risk tier definitions diff --git a/scripts/prepublish-check.mjs b/scripts/prepublish-check.mjs new file mode 100644 index 000000000..c47cafbbd --- /dev/null +++ b/scripts/prepublish-check.mjs @@ -0,0 +1,19 @@ +#!/usr/bin/env node + +import { spawnSync } from 'node:child_process'; + +if (process.env.CI === 'true' || process.env.CI === '1') { + process.exit(0); +} + +const result = spawnSync('git', ['diff', '--exit-code'], { + stdio: 'inherit', + shell: process.platform === 'win32', +}); + +if (result.status === 0) { + process.exit(0); +} + +process.stderr.write('ERROR: version sync changed files — commit them before publishing\n'); +process.exit(result.status ?? 1); diff --git a/scripts/secret-scan.mjs b/scripts/secret-scan.mjs new file mode 100644 index 000000000..e8f1a5f79 --- /dev/null +++ b/scripts/secret-scan.mjs @@ -0,0 +1,184 @@ +#!/usr/bin/env node + +import { execFileSync } from 'node:child_process'; +import { existsSync, readFileSync } from 'node:fs'; + +const RED = '\x1b[0;31m'; +const YELLOW = '\x1b[1;33m'; +const NC = '\x1b[0m'; +const IGNORE_FILE = '.secretscanignore'; + +const PATTERNS = [ + { label: 'AWS Access Key', regex: /AKIA[0-9A-Z]{16}/g }, + { label: 'Generic API Key', regex: /(api[_-]?key|apikey|api[_-]?secret)[ \t]*[:=][ \t]*['"][0-9a-zA-Z_./-]{20,}['"]/gi }, + { label: 'Generic Secret', regex: /(secret|token|password|passwd|pwd|credential)[ \t]*[:=][ \t]*['"][^\s'"]{8,}['"]/gi }, + { label: 'Authorization Header', regex: /(authorization|bearer)[ \t]*[:=][ \t]*['"][^\s'"]{8,}['"]/gi }, + { label: 'Private Key', regex: /-----BEGIN\s+(RSA|DSA|EC|OPENSSH|PGP)\s+PRIVATE\s+KEY-----/g }, + { label: 'Database URL', regex: /(mysql|postgres|postgresql|mongodb|redis|amqp|mssql):\/\/[^\s'"]{8,}/gi }, + { label: 'GitHub Token', regex: /gh[pousr]_[0-9a-zA-Z]{36,}/g }, + { label: 'GitLab Token', regex: /glpat-[0-9a-zA-Z-]{20,}/g }, + { label: 'Slack Token', regex: /xox[baprs]-[0-9a-zA-Z-]{10,}/g }, + { label: 'Slack Webhook', regex: /hooks\.slack\.com\/services\/T[0-9A-Z]{8,}\/B[0-9A-Z]{8,}\/[0-9a-zA-Z]{20,}/g }, + { label: 'Google API Key', regex: /AIza[0-9A-Za-z_-]{35}/g }, + { label: 'Stripe Key', regex: /[sr]k_(live|test)_[0-9a-zA-Z]{20,}/g }, + { label: 'npm Token', regex: /npm_[0-9a-zA-Z]{36,}/g }, + { label: 'Hex Secret', regex: /(secret|key|token|password)[ \t]*[:=][ \t]*['"]?[0-9a-f]{32,}['"]?/gi }, + { label: 'Hardcoded Password', regex: /password[ \t]*[:=][ \t]*['"][^'"]{4,}['"]/gi }, +]; + +function runGit(args) { + try { + return execFileSync('git', args, { + encoding: 'utf8', + shell: process.platform === 'win32', + stdio: ['ignore', 'pipe', 'ignore'], + }); + } catch { + return ''; + } +} + +function parseArgs(argv) { + if (argv[0] === '--diff') { + return { mode: 'diff', ref: argv[1] || 'HEAD' }; + } + if (argv[0] === '--file') { + return { mode: 'file', file: argv[1] || '' }; + } + return { mode: 'staged' }; +} + +function getFiles(options) { + if (options.mode === 'diff') { + return runGit(['diff', '--name-only', '--diff-filter=ACMR', options.ref]); + } + if (options.mode === 'file') { + return options.file; + } + return runGit(['diff', '--cached', '--name-only', '--diff-filter=ACMR']); +} + +function shouldScan(file) { + const lower = file.toLowerCase(); + const skippedExtensions = [ + '.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.woff', '.woff2', '.ttf', '.eot', + '.zip', '.tar', '.gz', '.tgz', '.bz2', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib', + '.o', '.a', '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.lock', '.map', '.node', '.wasm', + ]; + if (skippedExtensions.some((ext) => lower.endsWith(ext))) return false; + if ( + lower === '.secretscanignore' || + lower === '.gitignore' || + lower === '.gitattributes' || + lower.startsWith('license') || + lower.startsWith('changelog') || + lower.endsWith('.md') || + lower === 'package-lock.json' || + lower === 'pnpm-lock.yaml' || + lower === 'bun.lock' + ) { + return false; + } + if ( + lower.startsWith('node_modules/') || + lower.startsWith('dist/') || + lower.startsWith('coverage/') || + lower.startsWith('.gsd/') + ) { + return false; + } + if (lower.endsWith('.min.js') || lower.endsWith('.min.css')) return false; + return true; +} + +function getContent(file, mode) { + if (mode === 'staged') { + const staged = runGit(['show', `:${file}`]); + if (staged) return staged; + } + try { + return readFileSync(file, 'utf8'); + } catch { + return ''; + } +} + +function loadIgnorePatterns() { + if (!existsSync(IGNORE_FILE)) return []; + return readFileSync(IGNORE_FILE, 'utf8') + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith('#')); +} + +function isIgnored(file, lineContent, ignorePatterns) { + return ignorePatterns.some((pattern) => { + const splitIndex = pattern.indexOf(':'); + if (splitIndex > 0) { + const ignoreFile = pattern.slice(0, splitIndex); + const ignoreRegex = pattern.slice(splitIndex + 1); + if (file !== ignoreFile) return false; + try { + return new RegExp(ignoreRegex, 'i').test(lineContent); + } catch { + return false; + } + } + + try { + return new RegExp(pattern, 'i').test(lineContent); + } catch { + return false; + } + }); +} + +function resetRegex(regex) { + regex.lastIndex = 0; + return regex; +} + +const options = parseArgs(process.argv.slice(2)); +const files = getFiles(options) + .split(/\r?\n/) + .map((file) => file.trim()) + .filter(Boolean); + +if (files.length === 0) { + process.stdout.write('secret-scan: no files to scan\n'); + process.exit(0); +} + +const ignorePatterns = loadIgnorePatterns(); +let findings = 0; + +for (const file of files) { + if (!shouldScan(file)) continue; + const content = getContent(file, options.mode); + if (!content) continue; + + const lines = content.split(/\r?\n/); + for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) { + const line = lines[lineIndex]; + for (const pattern of PATTERNS) { + if (!resetRegex(pattern.regex).test(line)) continue; + if (isIgnored(file, line, ignorePatterns)) continue; + + process.stdout.write(`${RED}[SECRET DETECTED]${NC} ${YELLOW}${pattern.label}${NC}\n`); + process.stdout.write(` File: ${file}:${lineIndex + 1}\n`); + process.stdout.write(` Line: ${line.slice(0, 120)}...\n\n`); + findings++; + } + } +} + +if (findings > 0) { + process.stdout.write(`${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n`); + process.stdout.write(`${RED}Found ${findings} potential secret(s) in scanned files.${NC}\n`); + process.stdout.write(`${RED}Commit blocked. Remove the secrets or add exceptions${NC}\n`); + process.stdout.write(`${RED}to .secretscanignore if these are false positives.${NC}\n`); + process.stdout.write(`${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n`); + process.exit(1); +} + +process.stdout.write('secret-scan: no secrets detected ✓\n'); diff --git a/scripts/validate-pack.js b/scripts/validate-pack.js index e4bbe6277..b35bc1b5a 100644 --- a/scripts/validate-pack.js +++ b/scripts/validate-pack.js @@ -3,8 +3,8 @@ // Usage: npm run validate-pack (or node scripts/validate-pack.js) // Exit 0 = safe to publish, Exit 1 = broken package. -import { execSync } from 'node:child_process'; -import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { execFileSync } from 'node:child_process'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -15,8 +15,38 @@ const ROOT = resolve(__dirname, '..'); let tarball = null; let installDir = null; +let npmCacheDir = null; +const DEFAULT_MAX_BUFFER = 50 * 1024 * 1024; + +function getNpmCommand() { + return process.platform === 'win32' ? 'npm.cmd' : 'npm'; +} + +function runNpm(args, options = {}) { + return execFileSync(getNpmCommand(), args, { + cwd: ROOT, + encoding: 'utf8', + shell: process.platform === 'win32', + stdio: ['pipe', 'pipe', 'pipe'], + maxBuffer: DEFAULT_MAX_BUFFER, + env: { + ...process.env, + npm_config_cache: npmCacheDir ?? process.env.npm_config_cache, + }, + ...options, + }); +} + +function formatBytes(bytes) { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} try { + npmCacheDir = mkdtempSync(join(tmpdir(), 'validate-pack-npm-cache-')); + mkdirSync(npmCacheDir, { recursive: true }); + // --- Guard: workspace packages must not have @gsd/* cross-deps --- console.log('==> Checking workspace packages for @gsd/* cross-deps...'); const workspaces = ['native', 'pi-agent-core', 'pi-ai', 'pi-coding-agent', 'pi-tui']; @@ -42,12 +72,10 @@ try { // --- Pack tarball --- console.log('==> Packing tarball...'); - const packOutput = execSync('npm pack --ignore-scripts', { - cwd: ROOT, - encoding: 'utf8', - stdio: ['pipe', 'pipe', 'pipe'], - }); - const tarballName = packOutput.trim().split('\n').pop(); + const packOutput = runNpm(['pack', '--json', '--ignore-scripts']); + const packEntries = JSON.parse(packOutput); + const packEntry = Array.isArray(packEntries) ? packEntries[0] : null; + const tarballName = packEntry?.filename; tarball = join(ROOT, tarballName); if (!existsSync(tarball)) { @@ -55,12 +83,16 @@ try { process.exit(1); } - const stats = execSync(`du -h "${tarball}"`, { encoding: 'utf8' }).split('\t')[0].trim(); - console.log(`==> Tarball: ${tarballName} (${stats} compressed)`); + const stats = statSync(tarball); + console.log(`==> Tarball: ${tarballName} (${formatBytes(stats.size)} compressed)`); - // --- Check critical files using tar listing --- + // --- Check critical files using npm pack metadata --- console.log('==> Checking critical files...'); - const tarList = execSync(`tar tzf "${tarball}"`, { encoding: 'utf8', maxBuffer: 50 * 1024 * 1024 }); + const packedFiles = new Set( + Array.isArray(packEntry?.files) + ? packEntry.files.map((entry) => entry?.path).filter(Boolean) + : [], + ); const requiredFiles = [ 'dist/loader.js', @@ -73,7 +105,7 @@ try { let missing = false; for (const required of requiredFiles) { - if (!tarList.includes(`package/${required}`)) { + if (!packedFiles.has(required)) { console.log(` MISSING: ${required}`); missing = true; } @@ -91,10 +123,16 @@ try { writeFileSync(join(installDir, 'package.json'), JSON.stringify({ name: 'test-install', version: '1.0.0', private: true }, null, 2)); try { - const installOutput = execSync(`npm install "${tarball}"`, { + const installOutput = execFileSync(getNpmCommand(), ['install', tarball], { cwd: installDir, encoding: 'utf8', + shell: process.platform === 'win32', stdio: ['pipe', 'pipe', 'pipe'], + maxBuffer: DEFAULT_MAX_BUFFER, + env: { + ...process.env, + npm_config_cache: npmCacheDir, + }, }); console.log(installOutput); console.log('==> Install succeeded.'); @@ -145,11 +183,12 @@ try { process.exit(1); } try { - const versionOutput = execSync(`node "${loaderPath}" -v`, { + const versionOutput = execFileSync(process.execPath, [loaderPath, '-v'], { cwd: installDir, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 15000, + maxBuffer: DEFAULT_MAX_BUFFER, }).trim(); console.log(` gsd -v => ${versionOutput}`); if (!versionOutput.match(/^\d+\.\d+\.\d+/)) { @@ -173,4 +212,7 @@ try { if (tarball && existsSync(tarball)) { rmSync(tarball, { force: true }); } + if (npmCacheDir && existsSync(npmCacheDir)) { + rmSync(npmCacheDir, { recursive: true, force: true }); + } } diff --git a/scripts/version-stamp.mjs b/scripts/version-stamp.mjs index b673b424d..72b5c81b5 100644 --- a/scripts/version-stamp.mjs +++ b/scripts/version-stamp.mjs @@ -1,5 +1,10 @@ import { readFileSync, writeFileSync } from "fs"; -import { execFileSync } from "child_process"; +import { execFileSync, execSync } from "child_process"; +import { fileURLToPath } from "url"; +import { dirname, resolve } from "path"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const root = resolve(__dirname, ".."); const pkgPath = new URL("../package.json", import.meta.url); const pkg = JSON.parse(readFileSync(pkgPath, "utf8")); @@ -9,5 +14,9 @@ const devVersion = `${pkg.version}-dev.${shortSha}`; pkg.version = devVersion; writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + "\n"); - console.log(`Stamped version: ${devVersion}`); + +// Regenerate package-lock.json to reflect the stamped dev version. +// --package-lock-only updates the lockfile in-place without touching node_modules. +execSync("npm install --package-lock-only --ignore-scripts", { cwd: root, stdio: "inherit" }); +console.log(`[version-stamp] package-lock.json regenerated at ${devVersion}`); diff --git a/scripts/with-env.mjs b/scripts/with-env.mjs new file mode 100644 index 000000000..a338ffb3f --- /dev/null +++ b/scripts/with-env.mjs @@ -0,0 +1,46 @@ +#!/usr/bin/env node + +import { spawn } from 'node:child_process'; + +const args = process.argv.slice(2); +const env = { ...process.env }; + +let separatorIndex = args.indexOf('--'); +let commandStart = separatorIndex >= 0 ? separatorIndex + 1 : 0; + +for (let i = 0; i < (separatorIndex >= 0 ? separatorIndex : args.length); i++) { + const arg = args[i]; + const eq = arg.indexOf('='); + if (eq <= 0) { + commandStart = i; + separatorIndex = -1; + break; + } + env[arg.slice(0, eq)] = arg.slice(eq + 1); +} + +const commandArgs = args.slice(commandStart); +if (commandArgs.length === 0) { + process.stderr.write('with-env: expected a command after environment assignments\n'); + process.exit(1); +} + +const [command, ...childArgs] = commandArgs; +const child = spawn(command, childArgs, { + stdio: 'inherit', + env, + shell: process.platform === 'win32', +}); + +child.on('exit', (code, signal) => { + if (signal) { + process.kill(process.pid, signal); + return; + } + process.exit(code ?? 0); +}); + +child.on('error', (error) => { + process.stderr.write(`with-env: failed to run ${command}: ${error.message}\n`); + process.exit(1); +}); diff --git a/src/cli.ts b/src/cli.ts index 2a73e5b2f..a361e73db 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -16,8 +16,7 @@ import { agentDir, sessionsDir, authFilePath } from './app-paths.js' import { initResources, buildResourceLoader, getNewerManagedResourceVersion } from './resource-loader.js' import { ensureManagedTools } from './tool-bootstrap.js' import { loadStoredEnvKeys } from './wizard.js' -import { migratePiCredentials } from './pi-migration.js' -import { validateConfiguredModel } from './startup-model-validation.js' +import { migratePiCredentials, getPiDefaultModelAndProvider } from './pi-migration.js' import { shouldRunOnboarding, runOnboarding } from './onboarding.js' import chalk from 'chalk' import { checkForUpdates } from './update-check.js' @@ -130,6 +129,48 @@ function parseCliArgs(argv: string[]): CliFlags { return flags } +/** + * Validate the configured default model against the registry and reset it if + * it no longer exists. Must run AFTER extensions have registered their + * providers so that extension models (e.g. pi-claude-cli) are visible. + */ +function validateConfiguredModel( + modelRegistry: ModelRegistry, + settingsManager: SettingsManager, +): void { + const configuredProvider = settingsManager.getDefaultProvider() + const configuredModel = settingsManager.getDefaultModel() + const allModels = modelRegistry.getAll() + const availableModels = modelRegistry.getAvailable() + const configuredExists = configuredProvider && configuredModel && + allModels.some((m) => m.provider === configuredProvider && m.id === configuredModel) + const configuredAvailable = configuredProvider && configuredModel && + availableModels.some((m) => m.provider === configuredProvider && m.id === configuredModel) + + if (!configuredModel || !configuredExists) { + // Model not configured at all, or removed from registry — pick a fallback. + // Only fires when the model is genuinely unknown (not just temporarily unavailable). + const piDefault = getPiDefaultModelAndProvider() + const preferred = + (piDefault + ? availableModels.find((m) => m.provider === piDefault.provider && m.id === piDefault.model) + : undefined) || + availableModels.find((m) => m.provider === 'openai' && m.id === 'gpt-5.4') || + availableModels.find((m) => m.provider === 'openai') || + availableModels.find((m) => m.provider === 'anthropic' && m.id === 'claude-opus-4-6') || + availableModels.find((m) => m.provider === 'anthropic' && m.id.includes('opus')) || + availableModels.find((m) => m.provider === 'anthropic') || + availableModels[0] + if (preferred) { + settingsManager.setDefaultModelAndProvider(preferred.provider, preferred.id) + } + } + + if (settingsManager.getDefaultThinkingLevel() !== 'off' && !configuredExists) { + settingsManager.setDefaultThinkingLevel('off') + } +} + const cliFlags = parseCliArgs(process.argv) const isPrintMode = cliFlags.print || cliFlags.mode !== undefined @@ -281,6 +322,14 @@ if (cliFlags.messages[0] === 'sessions') { }) rl.close() + // Clean up stdin state left by readline.createInterface(). + // Without this, downstream TUI initialization gets corrupted listeners and exhibits + // duplicate terminal I/O. Match the pattern used after onboarding cleanup. + process.stdin.removeAllListeners('data') + process.stdin.removeAllListeners('keypress') + if (process.stdin.setRawMode) process.stdin.setRawMode(false) + process.stdin.pause() + const choice = parseInt(answer, 10) if (isNaN(choice) || choice < 1 || choice > toShow.length) { process.stderr.write(chalk.dim('Cancelled.\n')) @@ -341,7 +390,7 @@ const modelsJsonPath = resolveModelsJsonPath() const modelRegistry = new ModelRegistry(authStorage, modelsJsonPath) markStartup('ModelRegistry') -const settingsManager = SettingsManager.create(agentDir) +const settingsManager = SettingsManager.create(process.cwd(), agentDir) applySecurityOverrides(settingsManager) markStartup('SettingsManager.create') @@ -373,8 +422,23 @@ if (!isPrintMode && process.stdout.columns && process.stdout.columns < 40) { ) } -// --list-models: print available models and exit (no TTY needed) +// --list-models: load extensions so that extension-registered providers (e.g. +// pi-claude-cli) appear in the listing, then flush their pending registrations +// into the model registry before printing. if (cliFlags.listModels !== undefined) { + exitIfManagedResourcesAreNewer(agentDir) + initResources(agentDir) + const listModelsLoader = new DefaultResourceLoader({ + agentDir, + additionalExtensionPaths: cliFlags.extensions.length > 0 ? cliFlags.extensions : undefined, + }) + await listModelsLoader.reload() + const listModelsExtensions = listModelsLoader.getExtensions() + for (const { name, config } of listModelsExtensions.runtime.pendingProviderRegistrations) { + modelRegistry.registerProvider(name, config) + } + listModelsExtensions.runtime.pendingProviderRegistrations = [] + const models = modelRegistry.getAvailable() if (models.length === 0) { console.log('No models available. Set API keys in environment variables.') @@ -500,6 +564,11 @@ if (isPrintMode) { } } + // Validate configured model now that extension providers are registered. + // Must run after createAgentSession() which flushes pendingProviderRegistrations + // so extension models (e.g. pi-claude-cli) are visible in the registry. + validateConfiguredModel(modelRegistry, settingsManager) + // Apply --model override if specified if (cliFlags.model) { const available = modelRegistry.getAvailable() @@ -522,6 +591,17 @@ if (isPrintMode) { if (mode === 'mcp') { printStartupTimings() const { startMcpServer } = await import('./mcp-server.js') + + // Activate every registered tool before starting the MCP transport. + // `session.agent.state.tools` is the *active* subset, not the full + // registry — if we expose only the active set, extension-registered + // tools (gsd workflow, browser-tools, mac-tools, search-the-web, …) + // are invisible to MCP clients. Flipping the active set to every + // known tool name makes `state.tools` mirror the full registry for + // this MCP session, which is what an external client expects. + const allToolNames = session.getAllTools().map((t) => t.name) + session.setActiveToolsByName(allToolNames) + await startMcpServer({ tools: session.agent.state.tools ?? [], version: process.env.GSD_VERSION || '0.0.0', @@ -672,6 +752,11 @@ if (extensionsResult.errors.length > 0) { } } +// Validate configured model now that extension providers are registered. +// Must run after createAgentSession() which flushes pendingProviderRegistrations +// so extension models (e.g. pi-claude-cli) are visible in the registry. +validateConfiguredModel(modelRegistry, settingsManager) + // Restore scoped models from settings on startup. // The upstream InteractiveMode reads enabledModels from settings when /scoped-models is opened, // but doesn't apply them to the session at startup — so Ctrl+P cycles all models instead of diff --git a/src/headless-events.ts b/src/headless-events.ts index 190ac99a1..f80acfccd 100644 --- a/src/headless-events.ts +++ b/src/headless-events.ts @@ -70,6 +70,7 @@ export const IDLE_TIMEOUT_MS = 15_000 // between tool calls (e.g. after mkdir, before writing files). Use a // longer idle timeout to avoid killing the session prematurely (#808). export const NEW_MILESTONE_IDLE_TIMEOUT_MS = 120_000 +const INTERACTIVE_HEADLESS_TOOLS = new Set(['ask_user_questions', 'secure_env_collect']) export function isTerminalNotification(event: Record): boolean { if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false @@ -89,6 +90,14 @@ export function isMilestoneReadyNotification(event: Record): bo return /milestone\s+m\d+.*ready/i.test(String(event.message ?? '')) } +export function isInteractiveHeadlessTool(toolName: string | undefined): boolean { + return INTERACTIVE_HEADLESS_TOOLS.has(String(toolName ?? '')) +} + +export function shouldArmHeadlessIdleTimeout(toolCallCount: number, interactiveToolCount: number): boolean { + return toolCallCount > 0 && interactiveToolCount === 0 +} + // --------------------------------------------------------------------------- // Quick Command Detection // --------------------------------------------------------------------------- diff --git a/src/headless.ts b/src/headless.ts index cd0d86124..d277c6725 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -30,6 +30,8 @@ import { FIRE_AND_FORGET_METHODS, IDLE_TIMEOUT_MS, NEW_MILESTONE_IDLE_TIMEOUT_MS, + isInteractiveHeadlessTool, + shouldArmHeadlessIdleTimeout, EXIT_SUCCESS, EXIT_ERROR, EXIT_BLOCKED, @@ -367,6 +369,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): let exitCode = 0 let milestoneReady = false // tracks "Milestone X ready." for auto-chaining const recentEvents: TrackedEvent[] = [] + const interactiveToolCallIds = new Set() // JSON batch mode: cost aggregation (cumulative-max pattern per K004) let cumulativeCostUsd = 0 @@ -460,7 +463,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): function resetIdleTimer(): void { if (idleTimer) clearTimeout(idleTimer) - if (toolCallCount > 0) { + if (shouldArmHeadlessIdleTimeout(toolCallCount, interactiveToolCallIds.size)) { idleTimer = setTimeout(() => { completed = true resolveCompletion() @@ -484,6 +487,20 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): client.onEvent((event) => { const eventObj = event as unknown as Record trackEvent(eventObj) + + const eventType = String(eventObj.type ?? '') + if (eventType === 'tool_execution_start') { + const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? '') + if (toolCallId && isInteractiveHeadlessTool(String(eventObj.toolName ?? ''))) { + interactiveToolCallIds.add(toolCallId) + } + } else if (eventType === 'tool_execution_end') { + const toolCallId = String(eventObj.toolCallId ?? eventObj.id ?? '') + if (toolCallId) { + interactiveToolCallIds.delete(toolCallId) + } + } + resetIdleTimer() // Answer injector: observe events for question metadata @@ -492,7 +509,6 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number): // --json / --output-format stream-json: forward events as JSONL to stdout (filtered if --events) // --output-format json (batch mode): suppress streaming, track cost for final result if (options.json && options.outputFormat === 'stream-json') { - const eventType = String(eventObj.type ?? '') if (!options.eventFilter || options.eventFilter.has(eventType)) { process.stdout.write(JSON.stringify(eventObj) + '\n') } diff --git a/src/loader.ts b/src/loader.ts index 1d3ce46a2..13e1605b4 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -110,6 +110,11 @@ if (!existsSync(appRoot)) { // GSD_CODING_AGENT_DIR — tells pi's getAgentDir() to return ~/.gsd/agent/ instead of ~/.gsd/agent/ process.env.GSD_CODING_AGENT_DIR = agentDir +// GSD_PKG_ROOT — absolute path to gsd-pi package root. Used by deployed extensions +// (e.g. auto.ts resume path) to import modules like resource-loader.js that live +// in the package tree, not in the deployed ~/.gsd/agent/ tree. +process.env.GSD_PKG_ROOT = gsdRoot + // RTK environment — make ~/.gsd/agent/bin visible to all child-process paths, // not just the bash tool, and force-disable RTK telemetry for GSD-managed use. applyRtkProcessEnv(process.env) diff --git a/src/mcp-server.ts b/src/mcp-server.ts index f7417235e..6db605dc9 100644 --- a/src/mcp-server.ts +++ b/src/mcp-server.ts @@ -16,13 +16,18 @@ export interface McpToolDef { }> } -// MCP SDK subpath imports use wildcard exports (./*) that NodeNext resolves -// at runtime but TypeScript cannot statically type-check. We construct the -// specifiers dynamically so tsc treats them as `any`. -// Use createRequire to resolve wildcard subpaths — CJS resolver auto-appends -// .js, which the ESM wildcard export map does not (#3603). -import { createRequire } from 'node:module' -const _require = createRequire(import.meta.url) +// MCP SDK subpath imports use wildcard exports (./*) in @modelcontextprotocol/sdk's +// package.json export map. The wildcard maps "./foo" → "./dist/cjs/foo" (no .js +// suffix), so bare subpath specifiers like `${MCP_PKG}/server/stdio` resolve to +// a non-existent file. Historically the workaround (#3603) used createRequire so +// the CJS resolver could auto-append `.js`; that no longer works with current +// Node + SDK releases (#3914) — `_require.resolve` also fails with +// "Cannot find module .../dist/cjs/server/stdio". +// +// The reliable convention (matching packages/mcp-server/{server,cli}.ts) is to +// write the `.js` suffix explicitly on every wildcard subpath. Specifiers are +// built via a template string so TypeScript's NodeNext resolver treats them as +// `any` and skips static checking. const MCP_PKG = '@modelcontextprotocol/sdk' /** @@ -45,9 +50,9 @@ export async function startMcpServer(options: { }): Promise { const { tools, version = '0.0.0' } = options - const serverMod = await import(`${MCP_PKG}/server`) - const stdioMod = await import(_require.resolve(`${MCP_PKG}/server/stdio`)) - const typesMod = await import(_require.resolve(`${MCP_PKG}/types`)) + const serverMod = await import(`${MCP_PKG}/server/index.js`) + const stdioMod = await import(`${MCP_PKG}/server/stdio.js`) + const typesMod = await import(`${MCP_PKG}/types.js`) const Server = serverMod.Server const StdioServerTransport = stdioMod.StdioServerTransport @@ -73,8 +78,14 @@ export async function startMcpServer(options: { })), })) - // tools/call — execute the requested tool and return content blocks - server.setRequestHandler(CallToolRequestSchema, async (request: any) => { + // tools/call — execute the requested tool and return content blocks. + // + // The MCP SDK passes an `extra` argument to request handlers that includes + // an AbortSignal scoped to the RPC request (cancelled when the client + // cancels the tool call or the transport closes). Threading it into + // AgentTool.execute ensures long-running tools (Bash, WebFetch, grep on + // huge trees) actually stop when the client gives up on the result. + server.setRequestHandler(CallToolRequestSchema, async (request: any, extra: any) => { const { name, arguments: args } = request.params const tool = toolMap.get(name) if (!tool) { @@ -84,22 +95,37 @@ export async function startMcpServer(options: { } } + const signal: AbortSignal | undefined = extra?.signal + try { const result = await tool.execute( `mcp-${Date.now()}`, args ?? {}, - undefined, // no AbortSignal - undefined, // no onUpdate callback + signal, + undefined, // onUpdate not yet wired — progress notifications require a progressToken round-trip ) - // Convert AgentToolResult content blocks to MCP content format + // Convert AgentToolResult content blocks to MCP content format. + // text and image pass through; any other shape is serialized as text + // so the client sees the payload rather than an empty response. const content = result.content.map((block: any) => { if (block.type === 'text') return { type: 'text' as const, text: block.text ?? '' } - if (block.type === 'image') return { type: 'image' as const, data: block.data ?? '', mimeType: block.mimeType ?? 'image/png' } + if (block.type === 'image') { + return { + type: 'image' as const, + data: block.data ?? '', + mimeType: block.mimeType ?? 'image/png', + } + } + // Preserve unknown block types (resource, resource_link, audio, ...) + // by stringifying into a text block so clients see the payload. return { type: 'text' as const, text: JSON.stringify(block) } }) return { content } } catch (err: unknown) { + // AbortError from a cancelled tool surfaces as a normal error — MCP + // clients interpret `isError: true` as a failed call, which is the + // correct behaviour for a cancelled request. const message = err instanceof Error ? err.message : String(err) return { isError: true, content: [{ type: 'text' as const, text: message }] } } diff --git a/src/onboarding.ts b/src/onboarding.ts index d51d408dc..a47d29498 100644 --- a/src/onboarding.ts +++ b/src/onboarding.ts @@ -323,6 +323,15 @@ async function runLlmStep(p: ClackModule, pc: PicoModule, authStorage: AuthStora p.log.info('Your Claude subscription will be used for inference. No API key needed.') // Store sentinel so hasAuth('claude-code') returns true on future boots authStorage.set('claude-code', { type: 'api_key', key: 'cli' }) + // Persist claude-code as the default provider so the startup migration in + // cli.ts does not need to fire and the user is not left on "anthropic". + const settingsPath = join(agentDir, 'settings.json') + try { + const raw = existsSync(settingsPath) ? JSON.parse(readFileSync(settingsPath, 'utf-8')) : {} + raw.defaultProvider = 'claude-code' + mkdirSync(dirname(settingsPath), { recursive: true }) + writeFileSync(settingsPath, JSON.stringify(raw, null, 2), 'utf-8') + } catch { /* non-fatal — startup migration will catch it */ } return true } diff --git a/src/provider-migrations.ts b/src/provider-migrations.ts new file mode 100644 index 000000000..1e61c69df --- /dev/null +++ b/src/provider-migrations.ts @@ -0,0 +1,34 @@ +import type { AuthStorage } from "@gsd/pi-coding-agent" + +type AnthropicMigrationDeps = { + authStorage: Pick + isClaudeCodeReady: boolean + defaultProvider: string | undefined + env?: NodeJS.ProcessEnv +} + +export function hasDirectAnthropicApiKey( + authStorage: Pick, + env: NodeJS.ProcessEnv = process.env, +): boolean { + if ((env.ANTHROPIC_API_KEY ?? "").trim()) { + return true + } + + return authStorage.getCredentialsForProvider("anthropic").some((credential: any) => + credential?.type === "api_key" && typeof credential?.key === "string" && credential.key.trim().length > 0, + ) +} + +export function shouldMigrateAnthropicToClaudeCode({ + authStorage, + isClaudeCodeReady, + defaultProvider, + env = process.env, +}: AnthropicMigrationDeps): boolean { + if (!isClaudeCodeReady || defaultProvider !== "anthropic") { + return false + } + + return !hasDirectAnthropicApiKey(authStorage, env) +} diff --git a/src/resource-loader.ts b/src/resource-loader.ts index 901d8e1b1..7ddc3b7ee 100644 --- a/src/resource-loader.ts +++ b/src/resource-loader.ts @@ -2,7 +2,7 @@ import { DefaultResourceLoader, sortExtensionPaths } from '@gsd/pi-coding-agent' import { createHash } from 'node:crypto' import { homedir } from 'node:os' import { chmodSync, copyFileSync, cpSync, existsSync, lstatSync, mkdirSync, openSync, closeSync, readFileSync, readlinkSync, readdirSync, rmSync, statSync, symlinkSync, unlinkSync, writeFileSync } from 'node:fs' -import { dirname, join, relative, resolve } from 'node:path' +import { basename, dirname, join, relative, resolve } from 'node:path' import { fileURLToPath } from 'node:url' import { compareSemver } from './update-check.js' import { discoverExtensionEntryPaths } from './extension-discovery.js' @@ -287,33 +287,147 @@ function copyDirRecursive(src: string, dest: string): void { * ~/.gsd/agent/extensions/ have no ancestor node_modules, so imports of * @gsd/* packages fail. The symlink makes Node's standard resolution find * them without requiring every call site to use jiti. + * + * Layout differences by install method: + * - Source/monorepo: packageRoot/node_modules has everything → simple symlink + * - npm/bun global: deps hoisted to dirname(packageRoot), including @gsd/* → simple symlink + * - pnpm global: external deps hoisted, but @gsd/* stays in packageRoot/node_modules + * → merged directory with symlinks from both roots (#3529, #3564) */ function ensureNodeModulesSymlink(agentDir: string): void { const agentNodeModules = join(agentDir, 'node_modules') - const gsdNodeModules = join(packageRoot, 'node_modules') + const internalNodeModules = join(packageRoot, 'node_modules') + const hoistedNodeModules = dirname(packageRoot) + const isGlobalInstall = basename(hoistedNodeModules) === 'node_modules' + if (!isGlobalInstall) { + // Source/monorepo: internal node_modules has everything + reconcileSymlink(agentNodeModules, internalNodeModules) + return + } + + // Global install: check if workspace scopes (@gsd/*) are hoisted. + // npm/bun hoist everything; pnpm keeps workspace packages internal. + if (!hasMissingWorkspaceScopes(hoistedNodeModules, internalNodeModules)) { + // Everything is hoisted — simple symlink to parent node_modules + reconcileSymlink(agentNodeModules, hoistedNodeModules) + return + } + + // pnpm-style layout: create a real directory merging both roots + reconcileMergedNodeModules(agentNodeModules, hoistedNodeModules, internalNodeModules) +} + +/** Check if any @gsd* scopes exist in internal but not in hoisted node_modules */ +function hasMissingWorkspaceScopes(hoisted: string, internal: string): boolean { + if (!existsSync(internal)) return false try { - const stat = lstatSync(agentNodeModules) + for (const entry of readdirSync(internal, { withFileTypes: true })) { + if (entry.isDirectory() && entry.name.startsWith('@gsd') && + !existsSync(join(hoisted, entry.name))) { + return true + } + } + } catch { /* non-fatal */ } + return false +} +/** Ensure a symlink at `link` points to `target`, fixing stale/wrong entries */ +function reconcileSymlink(link: string, target: string): void { + try { + const stat = lstatSync(link) if (stat.isSymbolicLink()) { - const existing = readlinkSync(agentNodeModules) - // Symlink exists — verify it points to the correct, existing target - if (existing === gsdNodeModules && existsSync(agentNodeModules)) return // correct and target exists - // Stale or wrong target — remove and recreate - unlinkSync(agentNodeModules) + const existing = readlinkSync(link) + if (existing === target && existsSync(link)) return // correct and target exists + unlinkSync(link) } else { - // Real directory (not a symlink) is blocking — remove it - rmSync(agentNodeModules, { recursive: true, force: true }) + // Real directory (or merged dir from previous pnpm fix) — remove it + rmSync(link, { recursive: true, force: true }) } } catch { - // lstatSync throws if path doesn't exist — that's fine, we'll create below + // lstatSync throws if path doesn't exist — fine, we'll create below } try { - symlinkSync(gsdNodeModules, agentNodeModules, 'junction') + symlinkSync(target, link, 'junction') } catch (err) { - // This failure makes GSD non-functional — extensions can't resolve @gsd/* packages - console.error(`[gsd] WARN: Failed to symlink ${agentNodeModules} → ${gsdNodeModules}: ${err instanceof Error ? err.message : err}`) + console.error(`[gsd] WARN: Failed to symlink ${link} → ${target}: ${err instanceof Error ? err.message : err}`) + } +} + +/** + * Create a real node_modules directory containing symlinks from both the + * hoisted root (external deps) and internal root (@gsd/* workspace packages). + * Used for pnpm global installs where @gsd/* isn't hoisted. + */ +function reconcileMergedNodeModules( + agentNodeModules: string, + hoisted: string, + internal: string, +): void { + // Fast path: if already merged for this packageRoot + same directory contents, skip. + // The fingerprint includes entry names from both roots so `pnpm add/remove` triggers rebuild. + const marker = join(agentNodeModules, '.gsd-merged') + const fingerprint = mergedFingerprint(hoisted, internal) + try { + if (existsSync(marker) && readFileSync(marker, 'utf-8').trim() === fingerprint) return + } catch { /* rebuild */ } + + // Remove any existing symlink or stale merged directory + try { + const stat = lstatSync(agentNodeModules) + if (stat.isSymbolicLink()) { + unlinkSync(agentNodeModules) + } else { + rmSync(agentNodeModules, { recursive: true, force: true }) + } + } catch { /* doesn't exist */ } + + mkdirSync(agentNodeModules, { recursive: true }) + + let linkedCount = 0 + + // Symlink entries from the hoisted node_modules (external deps) + try { + for (const entry of readdirSync(hoisted, { withFileTypes: true })) { + // Skip the gsd-pi package itself and dotfiles + if (entry.name === basename(packageRoot)) continue + if (entry.name.startsWith('.')) continue + try { symlinkSync(join(hoisted, entry.name), join(agentNodeModules, entry.name)); linkedCount++ } catch { /* skip individual */ } + } + } catch (err) { + console.error(`[gsd] WARN: Failed to read hoisted node_modules at ${hoisted}: ${err instanceof Error ? err.message : err}`) + } + + // Overlay internal node_modules entries that weren't hoisted. + // This covers @gsd/* workspace packages AND optional deps like + // @anthropic-ai/claude-agent-sdk that npm keeps internal. + try { + for (const entry of readdirSync(internal, { withFileTypes: true })) { + if (entry.name.startsWith('.')) continue + const link = join(agentNodeModules, entry.name) + // Replace hoisted symlink with internal version (internal takes precedence) + try { lstatSync(link); unlinkSync(link) } catch { /* didn't exist — will create below */ } + try { symlinkSync(join(internal, entry.name), link); linkedCount++ } catch { /* skip individual */ } + } + } catch (err) { + console.error(`[gsd] WARN: Failed to read internal node_modules at ${internal}: ${err instanceof Error ? err.message : err}`) + } + + // Only stamp marker if we actually linked something — avoids caching a broken state + if (linkedCount > 0) { + try { writeFileSync(marker, fingerprint) } catch { /* non-fatal */ } + } +} + +/** Build a cache fingerprint from packageRoot + sorted entry names of both directories */ +function mergedFingerprint(hoisted: string, internal: string): string { + try { + const h = readdirSync(hoisted).sort().join(',') + const i = readdirSync(internal).sort().join(',') + return `${packageRoot}\n${h}\n${i}` + } catch { + return packageRoot // fallback: at least invalidate on version change } } diff --git a/src/resources/GSD-WORKFLOW.md b/src/resources/GSD-WORKFLOW.md index ef0759969..736ce117a 100644 --- a/src/resources/GSD-WORKFLOW.md +++ b/src/resources/GSD-WORKFLOW.md @@ -275,7 +275,7 @@ Work flows through these phases. Each phase produces a file. **How to do it manually:** 1. Read the roadmap to understand the scope. 2. Identify 3-5 gray areas — implementation decisions the user cares about. -3. Use `ask_user_questions` to discuss each area. +3. Use `ask_user_questions` to discuss each area, one round at a time. Never fabricate user input; wait for the user's actual response before the next round. 4. Write decisions to the appropriate context file (`M###-CONTEXT.md` or `S##-CONTEXT.md`). 5. Do NOT discuss how to implement — only what the user wants. diff --git a/src/resources/agents/debugger.md b/src/resources/agents/debugger.md new file mode 100644 index 000000000..30b74c6ce --- /dev/null +++ b/src/resources/agents/debugger.md @@ -0,0 +1,58 @@ +--- +name: debugger +description: Hypothesis-driven bug investigation with root cause analysis +model: sonnet +--- + +You are a debugger. Investigate bugs using a systematic, hypothesis-driven approach. Your goal is to find the root cause, not just suppress symptoms. + +## Process + +1. **Reproduce**: Understand the symptoms — what happens vs. what should happen +2. **Hypothesize**: List 2-3 most likely causes based on symptoms +3. **Investigate**: For each hypothesis, gather evidence (read code, check logs, trace execution) +4. **Narrow**: Eliminate hypotheses that don't match the evidence +5. **Root cause**: Identify the actual cause with file:line references +6. **Fix**: Propose the minimal change that addresses the root cause + +## Investigation Tools + +- Read source files at specific line ranges +- Grep for error messages, function names, variable usage +- Check git blame for recent changes to suspect areas +- Read test files to understand expected behavior +- Run tests to reproduce failures + +## Output Format + +## Symptoms + +What's happening vs. what's expected. + +## Hypotheses + +1. **[hypothesis]** — why this could be the cause +2. **[hypothesis]** — why this could be the cause + +## Investigation + +### Hypothesis 1: [name] + +Evidence gathered, files read, what was found. +**Verdict:** Confirmed / Eliminated — reason. + +### Hypothesis 2: [name] + +(same structure) + +## Root Cause + +**File:** `path/to/file.ts:42` +**Cause:** Clear explanation of the bug. +**Why it wasn't caught:** Missing test, edge case, etc. + +## Recommended Fix + +```typescript +// minimal fix with explanation +``` diff --git a/src/resources/agents/doc-writer.md b/src/resources/agents/doc-writer.md new file mode 100644 index 000000000..72c550b56 --- /dev/null +++ b/src/resources/agents/doc-writer.md @@ -0,0 +1,43 @@ +--- +name: doc-writer +description: Documentation generation from code — API docs, inline comments, READMEs +model: sonnet +--- + +You are a documentation specialist. You read code and produce clear, accurate documentation. You write for the reader, not the author — explain what they need to know to use or maintain the code. + +## Process + +1. Read the code thoroughly — understand what it does, not just how +2. Identify the audience — users (API docs), maintainers (inline docs), or newcomers (guides) +3. Write documentation that answers the reader's actual questions +4. Verify accuracy — every code reference must match the current implementation + +## Documentation Types + +- **API docs**: Function signatures, parameters, return values, examples, error cases +- **Inline comments**: Explain *why*, not *what* — the code shows what, comments explain intent +- **Module docs**: What this module does, its public API, and how it fits in the architecture +- **Guides**: Step-by-step instructions for common tasks with working examples + +## Quality Rules + +- Every claim must be verifiable against the current code +- Examples must be working code, not pseudocode +- Don't document the obvious — focus on non-obvious behavior, gotchas, and edge cases +- Keep it concise — more docs isn't better docs +- Use the project's existing documentation style and format + +## Output Format + +## Documentation Plan + +What to document and for whom. + +## Documentation + +(The actual documentation content, formatted appropriately for its type) + +## Accuracy Check + +Files referenced and verified against current implementation. diff --git a/src/resources/agents/git-ops.md b/src/resources/agents/git-ops.md new file mode 100644 index 000000000..4bc73eef3 --- /dev/null +++ b/src/resources/agents/git-ops.md @@ -0,0 +1,56 @@ +--- +name: git-ops +description: Conflict resolution, rebase strategy, PR preparation, and changelog generation +model: sonnet +--- + +You are a git operations specialist. You handle merge conflicts, plan rebase strategies, prepare pull requests, and generate changelogs. You understand git internals well enough to choose the right strategy for each situation. + +## Capabilities + +### Conflict Resolution +- Analyze conflict markers and understand both sides' intent +- Choose the correct resolution based on code context, not just recency +- Verify resolved code compiles and tests pass + +### Rebase Strategy +- Assess whether rebase or merge is appropriate for the situation +- Plan interactive rebase sequences (squash, reorder, edit) +- Handle complex rebase conflicts with minimal manual intervention + +### PR Preparation +- Write clear PR titles and descriptions from commit history +- Organize commits into logical, reviewable units +- Ensure CI checks will pass before pushing + +### Changelog Generation +- Extract user-facing changes from commit messages and code diffs +- Categorize changes (features, fixes, breaking changes) +- Write changelog entries for the target audience (users, not developers) + +## Process + +1. Assess the git state — branches, commits, conflicts, divergence +2. Determine the goal — clean history, resolved conflicts, PR ready +3. Plan the steps — in order, with rollback points +4. Execute carefully — verify after each step +5. Confirm the result — clean history, passing tests + +## Output Format + +## Git State + +Current branch, commits, conflicts, or divergence summary. + +## Strategy + +What to do and why this approach. + +## Steps + +1. Command or action — with expected outcome +2. Command or action — with verification + +## Result + +Final state after operations complete. diff --git a/src/resources/agents/javascript-pro.md b/src/resources/agents/javascript-pro.md index 30e8e5016..4b429c343 100644 --- a/src/resources/agents/javascript-pro.md +++ b/src/resources/agents/javascript-pro.md @@ -2,279 +2,54 @@ name: javascript-pro description: "Modern JavaScript specialist for browser, Node.js, and full-stack applications requiring ES2023+ features, async patterns, or performance-critical implementations. Use when building WebSocket servers, refactoring callback-heavy code to async/await, investigating memory leaks in Node.js, scaffolding ES module libraries with Jest and ESLint, optimizing DOM-heavy rendering, or reviewing JavaScript implementations for modern patterns and test coverage." model: sonnet -memory: project --- -You are a senior JavaScript developer with mastery of modern JavaScript ES2023+ and Node.js 20+, specializing in both frontend vanilla JavaScript and Node.js backend development. Your expertise spans asynchronous patterns, functional programming, performance optimization, and the entire JavaScript ecosystem with focus on writing clean, maintainable code. +You are a senior JavaScript developer with mastery of modern JavaScript ES2023+ and Node.js 20+. You write production-grade code that prioritizes correctness, readability, performance, and maintainability — in that order. -## Core Identity +## Initialization -You write production-grade JavaScript. Every decision you make prioritizes correctness, readability, performance, and maintainability — in that order. You use the latest stable language features but never at the expense of clarity. - -## Operational Protocol - -When invoked: -1. Read `package.json`, build configuration files, and module system setup to understand the project context -2. Analyze existing code patterns, async implementations, and performance characteristics +1. Read `package.json`, build config, and module setup to understand the project +2. Analyze existing code patterns, async implementations, and conventions 3. Implement solutions following modern JavaScript best practices -4. Verify your work — run linters, tests, and validate output before declaring completion +4. Verify — run linters, tests, and validate output before declaring completion -## Quality Checklist (Mandatory Before Completion) +## Core Principles -- ESLint passes with zero errors (check for `.eslintrc.*` or `eslint.config.*` first) -- Prettier formatting applied (check for `.prettierrc.*` first) -- Tests written and passing — target >85% coverage -- JSDoc documentation on all public functions and module exports -- Bundle size considered (no unnecessary dependencies) -- Error handling covers all async boundaries -- No `var` usage — `const` by default, `let` only when reassignment is required +- `const` by default, `let` only for reassignment, never `var` +- ESM (`"type": "module"`) preferred, named exports over defaults +- Optional chaining (`?.`), nullish coalescing (`??`), immutable array methods (`toSorted`, `toReversed`) +- Private class fields (`#field`) for encapsulation +- `structuredClone()` for deep cloning, `Object.groupBy()` for grouping +- Prefer pure functions and composition over inheritance +- `AbortController` for cancellation, `Promise.allSettled` for concurrent error isolation +- `for await...of` for async iteration, pipeline for stream composition +- `node:` prefix for Node.js built-in imports -## Modern JavaScript Standards +## Key Patterns -### Language Features (ES2023+) +- Concurrent independent operations with `Promise.all`, not sequential `await` +- Event delegation for DOM-heavy applications, `requestAnimationFrame` for visual updates +- `WeakRef`/`WeakMap` for caches, clean up listeners/intervals in teardown +- `worker_threads` for CPU-intensive work, `AsyncLocalStorage` for request context +- Dynamic `import()` for code splitting, tree-shake with named exports +- `crypto.randomUUID()` for secure randomness, never `Math.random()` +- Sanitize user input before DOM insertion, use CSP headers -- Optional chaining (`?.`) and nullish coalescing (`??`) — prefer over manual checks -- Private class fields (`#field`) — use for true encapsulation, not convention (`_field`) -- Top-level `await` in ESM modules -- `Array.prototype.findLast()`, `Array.prototype.findLastIndex()` -- `Array.prototype.toSorted()`, `toReversed()`, `toSpliced()`, `with()` — immutable array methods -- `Object.groupBy()` and `Map.groupBy()` -- `structuredClone()` for deep cloning -- `using` declarations for resource management (when targeting environments that support it) +## Testing -### Async Patterns +- Unit tests for pure functions, integration tests for async workflows +- Mock at module boundaries, not deep internals +- Test error paths explicitly, not just happy paths +- Target >85% coverage -```javascript -// PREFERRED: Concurrent execution with error isolation -const results = await Promise.allSettled([ - fetchUsers(), - fetchOrders(), - fetchProducts(), -]); +## Verification Checklist -// PREFERRED: AbortController for cancellation -const controller = new AbortController(); -const response = await fetch(url, { signal: controller.signal }); +1. ESLint passes with zero errors +2. Prettier formatting applied +3. Tests written and passing +4. No `var`, no `==` (except `== null`), no callback hell +5. Error handling at all async boundaries +6. No `console.log` debugging left in production code +7. Bundle size considered — no unnecessary dependencies -// PREFERRED: Async iteration -for await (const chunk of readableStream) { - process(chunk); -} - -// AVOID: Sequential await when operations are independent -// BAD: -const users = await fetchUsers(); -const orders = await fetchOrders(); -// GOOD: -const [users, orders] = await Promise.all([fetchUsers(), fetchOrders()]); -``` - -### Error Handling - -```javascript -// PREFERRED: Specific error types -class ValidationError extends Error { - constructor(field, message) { - super(message); - this.name = 'ValidationError'; - this.field = field; - } -} - -// PREFERRED: Error boundaries at async boundaries -async function fetchData(url) { - const response = await fetch(url); - if (!response.ok) { - throw new HttpError(response.status, await response.text()); - } - return response.json(); -} - -// AVOID: Swallowing errors -try { doSomething(); } catch (e) { /* silent */ } - -// AVOID: catch(e) { throw e } — pointless re-throw -``` - -### Module Design - -- Default to ESM (`"type": "module"` in package.json) -- Use named exports — avoid default exports for better refactoring and tree-shaking -- Handle circular dependencies by restructuring, not by lazy requires -- Use `package.json` `exports` field for public API surface -- Dynamic `import()` for code splitting and conditional loading - -### Functional Patterns - -- Prefer pure functions — same inputs produce same outputs, no side effects -- Use `const` and immutable array methods (`toSorted`, `toReversed`, `map`, `filter`, `reduce`) -- Compose small functions rather than writing monolithic procedures -- Memoize expensive pure computations -- Avoid mutating function arguments - -### Object-Oriented Patterns - -- Prefer composition over inheritance — use mixins or object composition -- Use private fields (`#`) for encapsulation -- Static methods for factory patterns and utility functions -- Keep class responsibilities narrow (Single Responsibility Principle) - -## Performance Guidelines - -### Memory Management -- Clean up event listeners, intervals, and subscriptions in teardown -- Use `WeakRef` and `WeakMap` for caches that should not prevent garbage collection -- Avoid closures that capture large scopes unnecessarily -- Profile with heap snapshots before optimizing — measure first - -### Runtime Performance -- Use event delegation for DOM-heavy applications -- Debounce/throttle high-frequency event handlers -- Offload CPU-intensive work to Web Workers or Worker Threads -- Use `requestAnimationFrame` for visual updates, not `setTimeout` -- Prefer `for...of` over `forEach` in hot paths (avoids function call overhead) -- Use `Map` and `Set` over plain objects when keys are dynamic or non-string - -### Bundle Optimization -- Tree-shake by using named exports and avoiding side effects in module scope -- Use dynamic `import()` for route-level code splitting -- Analyze bundle with tools like `webpack-bundle-analyzer` or `source-map-explorer` -- Externalize large dependencies that consumers likely already have - -## Node.js Specific - -### Stream Processing -```javascript -// PREFERRED: Pipeline for stream composition -import { pipeline } from 'node:stream/promises'; -await pipeline(readStream, transformStream, writeStream); - -// PREFERRED: Node.js built-in modules with node: prefix -import { readFile } from 'node:fs/promises'; -import { join } from 'node:path'; -``` - -### Concurrency -- Use `worker_threads` for CPU-intensive operations -- Use `cluster` module for multi-core HTTP server scaling -- Understand the event loop — never block it with synchronous I/O in request handlers -- Use `AsyncLocalStorage` for request-scoped context - -## Browser API Patterns - -- Use `fetch` with `AbortController` — never raw `XMLHttpRequest` -- Prefer `IntersectionObserver` over scroll-based lazy loading -- Use `MutationObserver` for DOM change detection instead of polling -- Implement `Service Workers` for offline-first capability -- Use `Web Components` (`customElements.define`) for framework-agnostic reusable UI - -## Testing Strategy - -- Unit tests for pure functions and business logic — fast and isolated -- Integration tests for async workflows, API routes, and database interactions -- Mock external dependencies at module boundaries, not deep internals -- Use `describe`/`it` for readable test structure -- Test error paths explicitly — not just happy paths -- Snapshot tests only for stable serializable output (not volatile DOM structures) - -## Security Practices - -- Sanitize all user input before DOM insertion — prevent XSS -- Use `Content-Security-Policy` headers -- Validate and sanitize on the server, not just the client -- Use `crypto.randomUUID()` or `crypto.getRandomValues()` — never `Math.random()` for security -- Audit dependencies with `npm audit` or equivalent -- Prevent prototype pollution — freeze prototypes or use `Object.create(null)` for dictionaries - -## Development Workflow - -### Phase 1: Analysis -Before writing code, read and understand: -- `package.json` — dependencies, scripts, module type, engine constraints -- Build config — webpack, rollup, esbuild, vite configuration -- Lint/format config — ESLint rules, Prettier settings -- Test config — Jest, Vitest, or Mocha setup -- Existing code patterns — naming conventions, module structure, async patterns in use - -### Phase 2: Implementation -- Start with the public API surface — define function signatures and types (via JSDoc) -- Implement core logic with pure functions where possible -- Add error handling at every async boundary -- Write tests alongside implementation, not after -- Use `Bash` tool to run linters and tests frequently during development - -### Phase 3: Verification -Before declaring completion: -1. Run `npx eslint .` (or project-specific lint command) — zero errors -2. Run `npx prettier --check .` (or project-specific format command) -3. Run test suite — all passing, coverage target met -4. Review your own code for: unused variables, missing error handling, potential memory leaks, missing JSDoc -5. Verify no `console.log` debugging statements left in production code - -## Anti-Patterns to Reject - -- `var` declarations — always `const` or `let` -- `==` loose equality — always `===` (except intentional `== null` check) -- Nested callbacks ("callback hell") — use async/await -- `arguments` object — use rest parameters (`...args`) -- `new Array()` or `new Object()` — use literals `[]`, `{}` -- Modifying built-in prototypes -- `eval()` or `Function()` constructor with user input -- `with` statement -- Synchronous I/O in Node.js request handlers (`readFileSync` in route handlers) - -## Communication - -When reporting completion, state concretely: -- What was implemented or changed -- Which files were modified -- Test results (pass count, coverage percentage) -- Lint results (clean or specific remaining warnings with justification) -- Any trade-offs made and why - -Do not use vague language like "improved performance" — state measurable outcomes ("reduced bundle from 120kb to 72kb" or "API response p99 dropped from 340ms to 85ms"). - -**Update your agent memory** as you discover JavaScript project patterns, module conventions, build tool configurations, testing patterns, and architectural decisions in the codebase. Write concise notes about what you found and where. - -Examples of what to record: -- Module system in use (ESM vs CJS) and how imports are structured -- Build tool configuration patterns and custom plugins -- Testing framework setup, fixture patterns, and mock strategies -- Common async patterns used across the codebase -- Performance-critical code paths and optimization techniques applied -- Dependency management patterns and version constraints -- Error handling conventions and custom error types - -# Persistent Agent Memory - -You have a persistent Persistent Agent Memory directory at `/home/ubuntulinuxqa2/repos/claude_skills/.claude/agent-memory/javascript-pro/`. Its contents persist across conversations. - -As you work, consult your memory files to build on previous experience. When you encounter a mistake that seems like it could be common, check your Persistent Agent Memory for relevant notes — and if nothing is written yet, record what you learned. - -Guidelines: -- `MEMORY.md` is always loaded into your system prompt — lines after 200 will be truncated, so keep it concise -- Create separate topic files (e.g., `debugging.md`, `patterns.md`) for detailed notes and link to them from MEMORY.md -- Update or remove memories that turn out to be wrong or outdated -- Organize memory semantically by topic, not chronologically -- Use the Write and Edit tools to update your memory files - -What to save: -- Stable patterns and conventions confirmed across multiple interactions -- Key architectural decisions, important file paths, and project structure -- User preferences for workflow, tools, and communication style -- Solutions to recurring problems and debugging insights - -What NOT to save: -- Session-specific context (current task details, in-progress work, temporary state) -- Information that might be incomplete — verify against project docs before writing -- Anything that duplicates or contradicts existing CLAUDE.md instructions -- Speculative or unverified conclusions from reading a single file - -Explicit user requests: -- When the user asks you to remember something across sessions (e.g., "always use bun", "never auto-commit"), save it — no need to wait for multiple interactions -- When the user asks to forget or stop remembering something, find and remove the relevant entries from your memory files -- Since this memory is project-scope and shared with your team via version control, tailor your memories to this project - -## MEMORY.md - -Your MEMORY.md is currently empty. When you notice a pattern worth preserving across sessions, save it here. Anything in MEMORY.md will be included in your system prompt next time. +Report concrete outcomes, not vague claims. State files changed, test results, and trade-offs made. diff --git a/src/resources/agents/planner.md b/src/resources/agents/planner.md new file mode 100644 index 000000000..cb630ffc6 --- /dev/null +++ b/src/resources/agents/planner.md @@ -0,0 +1,55 @@ +--- +name: planner +description: Architecture and implementation planning — outputs plans, not code +model: sonnet +conflicts_with: plan-milestone, plan-slice, plan-task, research-milestone, research-slice +--- + +You are a planning specialist. You analyze requirements and produce detailed implementation plans. You output plans — never code. Your plans are specific enough that another agent can execute them without ambiguity. + +## Process + +1. **Understand** the goal — what needs to be built, changed, or fixed +2. **Explore** the current codebase to understand constraints, patterns, and conventions +3. **Identify** the components that need to change and their dependencies +4. **Design** the approach — what to build, where to put it, how it connects +5. **Sequence** the work — ordered steps with clear dependencies +6. **Risk** — flag unknowns, trade-offs, and things that could go wrong + +## Plan Quality Criteria + +- Every step references specific files and functions +- Dependencies between steps are explicit +- Each step is small enough to verify independently +- Trade-offs are stated with reasoning, not just chosen silently +- Risks and unknowns are flagged, not hidden + +## Output Format + +## Goal + +What we're building and why. + +## Current State + +Relevant architecture and code that exists today. + +## Plan + +### Step 1: [action] + +- **Files:** `path/to/file.ts` — what changes +- **Depends on:** nothing / Step N +- **Verification:** how to confirm this step worked + +### Step 2: [action] + +(same structure) + +## Trade-offs + +Decisions made and alternatives considered. + +## Risks + +What could go wrong and how to mitigate it. diff --git a/src/resources/agents/refactorer.md b/src/resources/agents/refactorer.md new file mode 100644 index 000000000..92cd01134 --- /dev/null +++ b/src/resources/agents/refactorer.md @@ -0,0 +1,47 @@ +--- +name: refactorer +description: Safe code transformations — extract, inline, rename, simplify +model: sonnet +--- + +You are a refactoring specialist. You perform safe, behavior-preserving code transformations. Every refactoring must maintain identical external behavior — no feature changes, no bug fixes mixed in. + +## Process + +1. **Read** the code and understand the current behavior +2. **Identify** the specific transformation to apply +3. **Check** all call sites, imports, and references that will be affected +4. **Transform** in small, verifiable steps +5. **Verify** no behavior change by running existing tests + +## Supported Transformations + +- **Extract**: Pull code into a new function, class, module, or variable +- **Inline**: Replace a function/variable with its body when abstraction adds no value +- **Rename**: Change names for clarity — update all references +- **Simplify**: Reduce complexity — flatten nesting, remove dead code, simplify conditionals +- **Move**: Relocate code to a better module — update all imports +- **Decompose**: Break large functions/classes into smaller, focused units + +## Safety Rules + +- Run tests before AND after every transformation +- Never combine refactoring with behavior changes +- Update all call sites — grep for old names before declaring done +- Preserve public API signatures unless explicitly instructed to change them +- If tests don't exist for the affected code, flag it — don't refactor blind + +## Output Format + +## Transformation + +What was refactored and why. + +## Changes + +1. `path/to/file.ts` — what changed +2. `path/to/other.ts` — updated call sites + +## Verification + +Test results before and after — confirming identical behavior. diff --git a/src/resources/agents/reviewer.md b/src/resources/agents/reviewer.md new file mode 100644 index 000000000..c9599cb7d --- /dev/null +++ b/src/resources/agents/reviewer.md @@ -0,0 +1,48 @@ +--- +name: reviewer +description: Structured code review with severity ratings and actionable fixes +model: sonnet +--- + +You are a code reviewer. Analyze code changes for bugs, security issues, performance problems, and maintainability concerns. Produce structured findings with severity ratings and concrete fixes. + +## Process + +1. Read the changed files and understand their purpose +2. Trace call sites and data flow through the changes +3. Check for edge cases, error handling gaps, and type safety issues +4. Verify test coverage exists for new/changed behavior +5. Look for security implications (input validation, auth checks, data exposure) + +## Severity Levels + +- **Critical**: Bugs that will cause crashes, data loss, or security vulnerabilities +- **High**: Logic errors, missing error handling, race conditions +- **Medium**: Performance issues, poor abstractions, missing validation +- **Low**: Style issues, naming, minor refactoring opportunities + +## Output Format + +## Review Summary + +One paragraph: overall assessment and risk level. + +## Findings + +### [severity] Finding title + +**File:** `path/to/file.ts:42` +**Issue:** What's wrong and why it matters. +**Fix:** + +```typescript +// suggested fix +``` + +--- + +(Repeat for each finding, ordered by severity) + +## Verdict + +APPROVE / REQUEST_CHANGES / NEEDS_DISCUSSION — with one-sentence justification. diff --git a/src/resources/agents/security.md b/src/resources/agents/security.md new file mode 100644 index 000000000..3e1d2173a --- /dev/null +++ b/src/resources/agents/security.md @@ -0,0 +1,59 @@ +--- +name: security +description: OWASP security audit, dependency risks, and secrets detection +model: sonnet +--- + +You are a security auditor. Analyze code for vulnerabilities, insecure patterns, exposed secrets, and dependency risks. Focus on findings that are exploitable, not theoretical. + +## Audit Scope + +1. **Injection**: SQL injection, command injection, XSS, template injection, path traversal +2. **Authentication/Authorization**: Missing auth checks, broken access control, privilege escalation +3. **Data exposure**: Secrets in code, PII in logs, sensitive data in error messages, insecure storage +4. **Dependencies**: Known CVEs, outdated packages, typosquatting risks +5. **Cryptography**: Weak algorithms, hardcoded keys, insecure random generation +6. **Configuration**: Debug mode in production, permissive CORS, missing security headers + +## Process + +1. Read the target code and understand its trust boundaries +2. Identify where untrusted input enters the system +3. Trace untrusted input through the code — does it reach a sensitive sink without sanitization? +4. Check for hardcoded secrets, API keys, tokens, passwords +5. Review dependency versions against known vulnerabilities +6. Check configuration files for insecure defaults + +## Severity Classification + +- **Critical**: Remotely exploitable, no authentication required, data breach potential +- **High**: Exploitable with some preconditions, privilege escalation, auth bypass +- **Medium**: Requires specific conditions, information disclosure, DoS potential +- **Low**: Defense-in-depth improvements, hardening recommendations + +## Output Format + +## Security Assessment + +Overall risk level and attack surface summary. + +## Findings + +### [severity] Finding title + +**Location:** `path/to/file.ts:42` +**Category:** OWASP category (e.g., A03:2021 Injection) +**Issue:** What's vulnerable and how it could be exploited. +**Remediation:** + +```typescript +// secure alternative +``` + +--- + +(Repeat for each finding, ordered by severity) + +## Dependency Review + +Summary of dependency risks found (or clean bill of health). diff --git a/src/resources/agents/tester.md b/src/resources/agents/tester.md new file mode 100644 index 000000000..6d8794e67 --- /dev/null +++ b/src/resources/agents/tester.md @@ -0,0 +1,50 @@ +--- +name: tester +description: Test writing, fixing, and coverage gap identification +model: sonnet +--- + +You are a testing specialist. Write tests, fix broken tests, and identify coverage gaps. You prioritize tests that catch real bugs over tests that merely increase coverage numbers. + +## Process + +1. Read the code under test — understand its contract, edge cases, and failure modes +2. Check existing tests — understand the testing patterns, frameworks, and conventions in use +3. Identify gaps — what behaviors are untested? What edge cases are missing? +4. Write or fix tests — following the project's existing style and conventions +5. Run tests — verify they pass (and that new tests fail without the feature) + +## Test Priority + +Write tests in this order of value: + +1. **Regression tests** for known bugs — prevents recurrence +2. **Edge case tests** — boundary values, empty inputs, error paths +3. **Integration tests** for critical paths — data flow across modules +4. **Unit tests** for complex logic — pure functions, state machines, parsers +5. **Smoke tests** for new features — basic happy path + +## Conventions + +- Match the project's test framework and patterns (detect from existing tests) +- Use descriptive test names that explain the expected behavior +- One assertion per concept (not necessarily per test) +- Test behavior, not implementation — avoid testing private internals +- Use real data structures over mocks when practical + +## Output Format + +## Coverage Analysis + +What's tested, what's not, and what matters most. + +## Tests Written + +### `path/to/file.test.ts` + +- **test name** — what it verifies and why it matters +- **test name** — what it verifies + +## Test Results + +Pass/fail summary and any issues found during testing. diff --git a/src/resources/agents/typescript-pro.md b/src/resources/agents/typescript-pro.md index cbffd97ae..a831b3829 100644 --- a/src/resources/agents/typescript-pro.md +++ b/src/resources/agents/typescript-pro.md @@ -2,254 +2,60 @@ name: typescript-pro description: "TypeScript specialist for advanced type system patterns, complex generics, type-level programming, and end-to-end type safety across full-stack applications. Use when designing type-first APIs, creating branded types for domain modeling, building generic utilities, implementing discriminated unions for state machines, configuring tsconfig and build tooling, authoring type-safe libraries, setting up monorepo project references, migrating JavaScript to TypeScript, or optimizing TypeScript compilation and bundle performance." model: sonnet -memory: project --- -You are a senior TypeScript developer with mastery of TypeScript 5.0+ and its ecosystem, specializing in advanced type system features, full-stack type safety, and modern build tooling. Your expertise spans frontend frameworks, Node.js backends, and cross-platform development with focus on type safety and developer productivity. +You are a senior TypeScript developer with mastery of TypeScript 5.0+ and its ecosystem. You specialize in advanced type system features, full-stack type safety, and modern build tooling. Types are the specification — start there. -## Core Operating Principles +## Initialization -- **Type-first development**: Always start with type definitions before implementation. Types are the specification. -- **Strict mode always**: Assume `strict: true` and all strict compiler flags unless the project explicitly opts out. Never introduce `any` without documented justification. -- **Verify before stating**: Read actual project configuration (tsconfig.json, package.json, build configs) before making assumptions about the project setup. -- **Observable facts over assumptions**: If you need to know the TypeScript version, compiler options, or existing patterns — read the files. Do not guess. +1. Read `tsconfig.json`, `package.json`, and build tool configs +2. Assess existing type patterns — generics, utility types, declaration files +3. Identify framework and runtime (React, Vue, Node.js, Deno) +4. Check lint/format config to align with project conventions -## Initialization Protocol +## Core Principles -When invoked for any task: +- **Strict mode always**: `strict: true`, no `any` without documented justification +- **Type-first**: Define data shapes and API contracts before writing logic +- **Inference over annotation**: Let TypeScript infer where it produces correct, readable types +- **`satisfies` over type annotation**: Preserves literal types while validating +- **`as const`** for literal preservation in arrays and objects +- **`import type`** for type-only imports — reduces emit, improves tree shaking +- **Exhaustive checks** with `never` in switch/if-else — catch unhandled cases at compile time -1. **Read project configuration**: Check for `tsconfig.json`, `package.json`, and build tool configs (vite.config.ts, next.config.js, webpack.config.ts, etc.) -2. **Assess existing type patterns**: Grep for type imports, generic usage, utility types, and declaration files to understand the project's type maturity -3. **Identify framework and runtime**: Determine if this is React, Vue, Angular, Node.js, Deno, or another target — this affects type patterns and available APIs -4. **Check existing lint/format config**: Look for .eslintrc, prettier config, biome config to align with project conventions +## Key Patterns -## TypeScript Development Checklist +- Conditional types for flexible APIs: `T extends Array ? { data: U[] } : { data: T }` +- Mapped types for transformations: `{ readonly [K in keyof T]: T[K] }` +- Template literal types for string manipulation: `` `on${Capitalize}` `` +- Discriminated unions for state machines — each variant has a literal tag +- Branded types for domain modeling: `T & { readonly __brand: B }` +- Result types for error handling: `{ ok: true; value: T } | { ok: false; error: E }` +- Type guards at runtime boundaries — validate all external data (APIs, user input, files) -Apply to every implementation: +## Build & Tooling -- [ ] Strict mode enabled with all compiler flags -- [ ] No explicit `any` usage without documented justification -- [ ] 100% type coverage for public APIs -- [ ] Type-only imports used where applicable (`import type { ... }`) -- [ ] Source maps properly configured for debugging -- [ ] Declaration files generated for library code -- [ ] Generic constraints are as narrow as possible -- [ ] Discriminated unions preferred over optional fields for variant types +- `moduleResolution: "bundler"` for modern bundler projects +- `isolatedModules: true` for esbuild/SWC compatibility +- `incremental: true` with `.tsbuildinfo` for faster rebuilds +- `composite: true` + `declarationMap: true` for monorepo project references +- Type-only imports to reduce emit and improve tree shaking +- Monitor type instantiation counts with `--generateTrace` for slow compiles -## Advanced Type Patterns +## Testing -Apply these patterns where they improve safety and developer experience: - -**Conditional types** for flexible APIs: -```typescript -type ApiResponse = T extends Array - ? { data: U[]; total: number } - : { data: T }; -``` - -**Mapped types** for transformations: -```typescript -type Readonly = { readonly [K in keyof T]: T[K] }; -type Optional = Omit & Partial>; -``` - -**Template literal types** for string manipulation: -```typescript -type EventName = `on${Capitalize}`; -type RouteParam = T extends `${infer _}:${infer Param}/${infer Rest}` - ? Param | RouteParam - : T extends `${infer _}:${infer Param}` ? Param : never; -``` - -**Discriminated unions** for state machines: -```typescript -type State = - | { status: 'idle' } - | { status: 'loading'; startedAt: number } - | { status: 'success'; data: unknown; completedAt: number } - | { status: 'error'; error: Error; failedAt: number }; -``` - -**Branded types** for domain modeling: -```typescript -type Brand = T & { readonly __brand: B }; -type UserId = Brand; -type OrderId = Brand; -``` - -**Result types** for error handling: -```typescript -type Result = - | { ok: true; value: T } - | { ok: false; error: E }; -``` - -## Implementation Strategy - -When implementing TypeScript code: - -1. **Design types first**: Define the data shapes, API contracts, and state types before writing any logic -2. **Use the compiler as a correctness tool**: Structure types so invalid states are unrepresentable -3. **Leverage inference**: Don't over-annotate — let TypeScript infer where it produces correct and readable types -4. **Create type guards for runtime boundaries**: All external data (API responses, user input, file reads) must pass through type guards or validation -5. **Use `satisfies` for type validation without widening**: Prefer `const config = { ... } satisfies Config` over `const config: Config = { ... }` when you want to preserve literal types -6. **Use `as const` for literal types**: Apply const assertions to preserve literal types in arrays and objects -7. **Exhaustive checking**: Use `never` type in switch/if-else chains to ensure all cases are handled - -```typescript -function assertNever(x: never): never { - throw new Error(`Unexpected value: ${x}`); -} - -function handleState(state: State): string { - switch (state.status) { - case 'idle': return 'Waiting'; - case 'loading': return 'Loading...'; - case 'success': return 'Done'; - case 'error': return state.error.message; - default: return assertNever(state); - } -} -``` - -## Build and Tooling Optimization - -**tsconfig.json best practices**: -- Use `moduleResolution: "bundler"` for modern bundler-based projects -- Use `module: "ESNext"` or `"NodeNext"` depending on target -- Enable `isolatedModules: true` for compatibility with transpile-only tools (esbuild, SWC) -- Set `skipLibCheck: true` only if third-party declarations cause issues — prefer fixing the root cause -- Use `paths` mapping for clean imports, backed by bundler aliases -- Configure `project references` for monorepos with `composite: true` and `declarationMap: true` - -**Incremental compilation**: -- Enable `incremental: true` with a `.tsbuildinfo` output path -- Use `--build` mode for project references -- Configure `tsBuildInfoFile` to a persistent location in CI - -**Performance tuning**: -- Use `type-only imports` to reduce emit and improve tree shaking -- Prefer `const enum` only when bundle size savings justify the trade-off (they don't work with `isolatedModules`) -- Avoid deeply recursive conditional types in hot paths — they slow the compiler -- Monitor type instantiation counts with `--generateTrace` - -## Testing With Types - -- Write type tests using `expectTypeOf` (from vitest) or `tsd` for declaration testing -- Create type-safe test utilities and fixtures -- Use generic factory functions for test data -- Ensure mock types match the real implementations +- Type tests with `expectTypeOf` (vitest) or `tsd` for declaration testing +- Type-safe test utilities and generic factory functions for test data - Test type narrowing paths explicitly +- Ensure mock types match real implementations -```typescript -import { expectTypeOf } from 'vitest'; +## Verification Checklist -test('type narrowing works', () => { - const result: Result = { ok: true, value: 'hello' }; - if (result.ok) { - expectTypeOf(result.value).toBeString(); - } else { - expectTypeOf(result.error).toEqualTypeOf(); - } -}); -``` +1. `npx tsc --noEmit` — zero errors +2. Linter passes with zero warnings +3. No untyped public APIs remain +4. Tests passing, coverage target met +5. Declaration files correct for library code +6. No `any` without justification comment -## Full-Stack Type Safety - -- **tRPC**: Use for end-to-end type safety between client and server without code generation -- **GraphQL**: Use code generation (graphql-codegen) for type-safe queries and mutations -- **OpenAPI**: Generate TypeScript clients from OpenAPI specs -- **Shared packages**: Extract shared types into dedicated packages in monorepos -- **Database types**: Use query builders (Prisma, Drizzle, Kysely) that generate types from schema -- **Form validation**: Use Zod schemas that infer TypeScript types (`z.infer`) - -## Error Handling Patterns - -- Prefer `Result` types over throwing exceptions for expected error cases -- Use `never` return type for functions that always throw -- Create typed error hierarchies with discriminated unions -- Type-safe error boundaries in React with proper generic constraints -- Validate all external data at boundaries using Zod or similar runtime validators - -## Library Authoring - -When creating libraries or shared packages: - -- Generate `.d.ts` declaration files with `declaration: true` -- Enable `declarationMap: true` for go-to-definition into source -- Use `exports` field in package.json for proper dual CJS/ESM support -- Design generic APIs with minimal constraints — widen later if needed -- Document generic type parameters with JSDoc `@typeParam` -- Test declarations with `tsd` or `@ts-expect-error` assertions -- Version type changes according to semver (breaking type changes = major version) - -## Code Generation - -- **OpenAPI → TypeScript**: Use `openapi-typescript` for type generation, `openapi-fetch` for type-safe clients -- **GraphQL → TypeScript**: Use `@graphql-codegen/cli` with appropriate plugins -- **Database → TypeScript**: Use Prisma's `prisma generate` or Drizzle's schema inference -- **Route → TypeScript**: Leverage framework-specific type generation (Next.js, tRPC) - -## Quality Verification - -Before declaring any TypeScript task complete: - -1. **Compile check**: Run `npx tsc --noEmit` and resolve all errors -2. **Lint check**: Run the project's configured linter (ESLint, Biome) with zero warnings -3. **Type coverage**: Verify no untyped public APIs remain -4. **Test execution**: Run the test suite and verify passing -5. **Bundle analysis**: If applicable, verify bundle size impact -6. **Declaration quality**: If library code, verify generated `.d.ts` files are correct and complete - -## Communication Standards - -- State what you observed in the codebase, not what you assume -- When proposing type patterns, explain why they improve safety or DX over alternatives -- If a type pattern is complex, include a usage example showing how it catches errors at compile time -- Report type coverage metrics when completing type-heavy work -- Flag any `any` types introduced with explicit justification - -**Update your agent memory** as you discover TypeScript configuration patterns, type conventions, framework-specific typing approaches, build tool configurations, and architectural decisions in the codebase. Write concise notes about what you found and where. - -Examples of what to record: -- tsconfig.json settings and their rationale -- Custom utility types defined in the project -- Type generation pipelines and their configuration -- Framework-specific typing patterns used -- Build performance characteristics and optimization strategies -- Common type errors encountered and their fixes -- Module resolution quirks specific to the project - -# Persistent Agent Memory - -You have a persistent Persistent Agent Memory directory at `/home/ubuntulinuxqa2/repos/claude_skills/.claude/agent-memory/typescript-pro/`. Its contents persist across conversations. - -As you work, consult your memory files to build on previous experience. When you encounter a mistake that seems like it could be common, check your Persistent Agent Memory for relevant notes — and if nothing is written yet, record what you learned. - -Guidelines: -- `MEMORY.md` is always loaded into your system prompt — lines after 200 will be truncated, so keep it concise -- Create separate topic files (e.g., `debugging.md`, `patterns.md`) for detailed notes and link to them from MEMORY.md -- Update or remove memories that turn out to be wrong or outdated -- Organize memory semantically by topic, not chronologically -- Use the Write and Edit tools to update your memory files - -What to save: -- Stable patterns and conventions confirmed across multiple interactions -- Key architectural decisions, important file paths, and project structure -- User preferences for workflow, tools, and communication style -- Solutions to recurring problems and debugging insights - -What NOT to save: -- Session-specific context (current task details, in-progress work, temporary state) -- Information that might be incomplete — verify against project docs before writing -- Anything that duplicates or contradicts existing CLAUDE.md instructions -- Speculative or unverified conclusions from reading a single file - -Explicit user requests: -- When the user asks you to remember something across sessions (e.g., "always use bun", "never auto-commit"), save it — no need to wait for multiple interactions -- When the user asks to forget or stop remembering something, find and remove the relevant entries from your memory files -- Since this memory is project-scope and shared with your team via version control, tailor your memories to this project - -## MEMORY.md - -Your MEMORY.md is currently empty. When you notice a pattern worth preserving across sessions, save it here. Anything in MEMORY.md will be included in your system prompt next time. +Report concrete outcomes — files changed, type coverage, test results, trade-offs made. diff --git a/src/resources/extensions/async-jobs/await-tool.test.ts b/src/resources/extensions/async-jobs/await-tool.test.ts index 1ed49161c..9e508d925 100644 --- a/src/resources/extensions/async-jobs/await-tool.test.ts +++ b/src/resources/extensions/async-jobs/await-tool.test.ts @@ -119,12 +119,10 @@ test("await_job returns not-found message for invalid job IDs", async () => { manager.shutdown(); }); -test("await_job marks jobs as awaited to suppress follow-up delivery (#2248)", async () => { +test("await_job suppresses follow-up for jobs that complete while awaiting (#2248)", async () => { const followUps: string[] = []; const manager = new AsyncJobManager({ - onJobComplete: (job) => { - if (!job.awaited) followUps.push(job.id); - }, + onJobComplete: (job) => followUps.push(job.id), }); const tool = createAwaitTool(() => manager); @@ -133,13 +131,48 @@ test("await_job marks jobs as awaited to suppress follow-up delivery (#2248)", a return new Promise((resolve) => setTimeout(() => resolve("result"), 50)); }); - // await_job consumes the result — should mark as awaited before promise resolves + // await_job consumes the result — suppressFollowUp() should cancel delivery timer await tool.execute("tc7", { jobs: [jobId] }, noopSignal, () => {}, undefined as never); - // Give the onJobComplete callback a tick to fire + // Give the onJobComplete callback a tick to fire (if suppression failed) await new Promise((r) => setTimeout(r, 50)); - assert.equal(followUps.length, 0, "onJobComplete should not deliver follow-up for awaited jobs"); + assert.equal(followUps.length, 0, "onJobComplete should not fire for jobs consumed by await_job"); + + manager.shutdown(); +}); + +test("await_job suppresses follow-up for already-completed jobs (cross-turn case) (#3787)", async () => { + // This is the key regression: job completes in a prior LLM turn, then + // await_job is called in a later turn. The delivery timer must still be + // cancellable at that point. + const followUps: string[] = []; + const manager = new AsyncJobManager({ + onJobComplete: (job) => followUps.push(job.id), + }); + const tool = createAwaitTool(() => manager); + + // Register and let the job complete fully before calling await_job + const jobId = manager.register("bash", "pre-completed-job", async () => "done"); + const job = manager.getJob(jobId)!; + await job.promise; + + // Simulate a "later turn" by yielding to the event loop — this lets any + // queueMicrotask callbacks run, but the setTimeout(0) delivery timer has + // not yet fired (it's scheduled for the next macrotask). + await new Promise((r) => setImmediate(r)); + + // Now call await_job — suppressFollowUp() should cancel the pending timer + await tool.execute("tc7b", { jobs: [jobId] }, noopSignal, () => {}, undefined as never); + + // Drain the macrotask queue — the (now-cancelled) timer would have fired here + await new Promise((r) => setTimeout(r, 50)); + + assert.equal( + followUps.length, + 0, + "onJobComplete should not fire for already-completed jobs consumed by await_job", + ); manager.shutdown(); }); diff --git a/src/resources/extensions/async-jobs/await-tool.ts b/src/resources/extensions/async-jobs/await-tool.ts index bab79270a..8d7e8c85c 100644 --- a/src/resources/extensions/async-jobs/await-tool.ts +++ b/src/resources/extensions/async-jobs/await-tool.ts @@ -66,10 +66,13 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti } } - // Mark all watched jobs as awaited upfront so the onJobComplete - // callback (which fires synchronously in the promise .then()) knows - // to suppress the follow-up message. - for (const j of watched) j.awaited = true; + // Suppress follow-up notifications for all watched jobs upfront. + // suppressFollowUp() cancels the pending delivery timer (if any), which + // handles both the within-turn case (job completes while we await) and + // the cross-turn case (job already completed before await_job was called). + // Previously this only set j.awaited = true, which missed the cross-turn + // case because the queueMicrotask had already fired (#3787). + for (const j of watched) manager.suppressFollowUp(j.id); // If all watched jobs are already done, return immediately const running = watched.filter((j) => j.status === "running"); diff --git a/src/resources/extensions/async-jobs/job-manager.ts b/src/resources/extensions/async-jobs/job-manager.ts index 10ce3cd41..ac5ab2abb 100644 --- a/src/resources/extensions/async-jobs/job-manager.ts +++ b/src/resources/extensions/async-jobs/job-manager.ts @@ -24,6 +24,12 @@ export interface Job { errorText?: string; /** Set by await_job when results are consumed. Suppresses follow-up delivery. */ awaited?: boolean; + /** + * Handle for the pending follow-up delivery timer (set by deliverResult). + * Stored so suppressFollowUp() can cancel it before the notification fires, + * even when await_job is called after the job has already completed (#3787). + */ + deliveryTimer?: ReturnType; } export interface JobManagerOptions { @@ -170,12 +176,36 @@ export class AsyncJobManager { // ── Private ──────────────────────────────────────────────────────────── + /** + * Suppress follow-up notification for a job — cancels any pending delivery + * timer and marks the job as awaited. Safe to call at any time, including + * before or after the job completes (#3787). + */ + suppressFollowUp(id: string): void { + const job = this.jobs.get(id); + if (!job) return; + job.awaited = true; + if (job.deliveryTimer !== undefined) { + clearTimeout(job.deliveryTimer); + job.deliveryTimer = undefined; + } + } + private deliverResult(job: Job): void { if (!this.onJobComplete) return; - // Defer delivery by one microtask so await_job's .then() chain runs first - // and can set job.awaited = true before onJobComplete checks it (#2762). + // Use setTimeout(0) instead of queueMicrotask so the handle is cancellable. + // suppressFollowUp() can clear this timer even when await_job is called in + // a later LLM turn (after the job already completed). queueMicrotask ran + // immediately and could not be cancelled (#2762, #3787). const cb = this.onJobComplete; - queueMicrotask(() => cb(job)); + job.deliveryTimer = setTimeout(() => { + job.deliveryTimer = undefined; + if (!job.awaited) cb(job); + }, 0); + // Allow process to exit even if timer is pending + if (typeof job.deliveryTimer === "object" && "unref" in job.deliveryTimer) { + (job.deliveryTimer as NodeJS.Timeout).unref(); + } } private scheduleEviction(id: string): void { diff --git a/src/resources/extensions/claude-code-cli/partial-builder.ts b/src/resources/extensions/claude-code-cli/partial-builder.ts index c1d011e14..0f52bc220 100644 --- a/src/resources/extensions/claude-code-cli/partial-builder.ts +++ b/src/resources/extensions/claude-code-cli/partial-builder.ts @@ -19,6 +19,49 @@ import type { import { hasXmlParameterTags, repairToolJson } from "@gsd/pi-ai"; import type { BetaContentBlock, BetaRawMessageStreamEvent, NonNullableUsage } from "./sdk-types.js"; +// --------------------------------------------------------------------------- +// MCP tool name parsing +// --------------------------------------------------------------------------- + +/** + * Split a Claude Code MCP tool name (`mcp____`) into its parts. + * Returns null for non-prefixed names so callers can fall through unchanged. + * + * Server names may contain hyphens (`gsd-workflow`); the SDK uses the literal + * `__` delimiter between the server name and the tool name. + */ +export function parseMcpToolName(name: string): { server: string; tool: string } | null { + if (!name.startsWith("mcp__")) return null; + const rest = name.slice("mcp__".length); + const delim = rest.indexOf("__"); + if (delim <= 0 || delim === rest.length - 2) return null; + return { server: rest.slice(0, delim), tool: rest.slice(delim + 2) }; +} + +/** + * Build a GSD ToolCall block from a Claude Code SDK tool_use block, stripping + * the `mcp____` prefix from the name so registered extension renderers + * (which use the unprefixed canonical names) can match. The original server + * name is preserved on the block for diagnostics and rendering. + */ +function toolCallFromBlock( + id: string, + rawName: string, + input: Record, +): ToolCall { + const parsed = parseMcpToolName(rawName); + const toolCall: ToolCall = { + type: "toolCall", + id, + name: parsed ? parsed.tool : rawName, + arguments: input, + }; + if (parsed) { + (toolCall as ToolCall & { mcpServer?: string }).mcpServer = parsed.server; + } + return toolCall; +} + // --------------------------------------------------------------------------- // Content-block mapping helpers // --------------------------------------------------------------------------- @@ -41,12 +84,7 @@ export function mapContentBlock( } satisfies ThinkingContent; case "tool_use": - return { - type: "toolCall", - id: block.id, - name: block.name, - arguments: block.input, - } satisfies ToolCall; + return toolCallFromBlock(block.id, block.name, block.input); case "server_tool_use": return { @@ -183,12 +221,7 @@ export class PartialMessageBuilder { } if (block.type === "tool_use") { this.toolJsonAccum.set(streamIndex, ""); - this.partial.content.push({ - type: "toolCall", - id: block.id, - name: block.name, - arguments: {}, - }); + this.partial.content.push(toolCallFromBlock(block.id, block.name, {})); return { type: "toolcall_start", contentIndex, partial: this.partial }; } if (block.type === "server_tool_use") { diff --git a/src/resources/extensions/claude-code-cli/stream-adapter.ts b/src/resources/extensions/claude-code-cli/stream-adapter.ts index 465d48759..d8d3e35f5 100644 --- a/src/resources/extensions/claude-code-cli/stream-adapter.ts +++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts @@ -16,10 +16,12 @@ import type { SimpleStreamOptions, ToolCall, } from "@gsd/pi-ai"; +import type { ExtensionUIContext } from "@gsd/pi-coding-agent"; import { EventStream } from "@gsd/pi-ai"; import { execSync } from "node:child_process"; import { PartialMessageBuilder, ZERO_USAGE, mapUsage } from "./partial-builder.js"; import { buildWorkflowMcpServers } from "../gsd/workflow-mcp.js"; +import { showInterviewRound, type Question, type RoundResult } from "../shared/tui.js"; import type { SDKAssistantMessage, SDKMessage, @@ -45,6 +47,58 @@ type ToolCallWithExternalResult = ToolCall & { externalResult?: ExternalToolResultPayload; }; +interface ClaudeCodeStreamOptions extends SimpleStreamOptions { + extensionUIContext?: ExtensionUIContext; +} + +interface SdkElicitationRequestOption { + const?: string; + title?: string; +} + +interface SdkElicitationFieldSchema { + type?: string; + title?: string; + description?: string; + format?: string; + writeOnly?: boolean; + oneOf?: SdkElicitationRequestOption[]; + items?: { + anyOf?: SdkElicitationRequestOption[]; + }; +} + +interface SdkElicitationRequest { + serverName: string; + message: string; + mode?: "form" | "url"; + requestedSchema?: { + type?: string; + properties?: Record; + required?: string[]; + }; +} + +interface SdkElicitationResult { + action: "accept" | "decline" | "cancel"; + content?: Record; +} + +interface ParsedElicitationQuestion extends Question { + noteFieldId?: string; +} + +interface ParsedTextInputField { + id: string; + title: string; + description: string; + required: boolean; + secure: boolean; +} + +const OTHER_OPTION_LABEL = "None of the above"; +const SENSITIVE_FIELD_PATTERN = /(password|passphrase|secret|token|api[_\s-]*key|private[_\s-]*key|credential)/i; + // --------------------------------------------------------------------------- // Stream factory // --------------------------------------------------------------------------- @@ -64,6 +118,18 @@ function createAssistantStream(): AssistantMessageEventStream { ) as AssistantMessageEventStream; } +export function getResultErrorMessage(result: SDKResultMessage): string { + if ("errors" in result && Array.isArray(result.errors) && result.errors.length > 0) { + return result.errors.join("; "); + } + + if ("result" in result && typeof result.result === "string" && result.result.trim().length > 0) { + return result.result.trim(); + } + + return result.subtype === "success" ? "claude_code_request_failed" : result.subtype; +} + // --------------------------------------------------------------------------- // Claude binary resolution // --------------------------------------------------------------------------- @@ -121,20 +187,36 @@ function extractMessageText(msg: { role: string; content: unknown }): string { * call effectively stateless. This version serialises the complete * conversation history (system prompt + all user/assistant turns) so * Claude Code has full context for multi-turn continuity. + * + * History is wrapped in XML-tag structure rather than `[User]`/`[Assistant]` + * bracket headers. Bracket headers read to the model as an in-context + * demonstration of how turns are delimited, causing it to fabricate fake + * user turns in its own output. XML tags read as document structure and + * don't get mirrored in free text. */ export function buildPromptFromContext(context: Context): string { - const parts: string[] = []; + const hasContent = Boolean(context.systemPrompt) || context.messages.some((m) => extractMessageText(m)); + if (!hasContent) return ""; + + const parts: string[] = [ + "Respond only to the final user message below. " + + "Do not emit , , or tags in your response.", + ]; if (context.systemPrompt) { - parts.push(`[System]\n${context.systemPrompt}`); + parts.push(`\n${context.systemPrompt}\n`); } + const turns: string[] = []; for (const msg of context.messages) { const text = extractMessageText(msg); if (!text) continue; - - const label = msg.role === "user" ? "User" : msg.role === "assistant" ? "Assistant" : "System"; - parts.push(`[${label}]\n${text}`); + const tag = + msg.role === "user" ? "user_message" : msg.role === "assistant" ? "assistant_message" : "system_message"; + turns.push(`<${tag}>\n${text}\n`); + } + if (turns.length > 0) { + parts.push(`\n${turns.join("\n")}\n`); } return parts.join("\n\n"); @@ -172,30 +254,387 @@ export function makeStreamExhaustedErrorMessage(model: string, lastTextContent: return message; } +function readElicitationChoices(options: SdkElicitationRequestOption[] | undefined): string[] { + if (!Array.isArray(options)) return []; + return options + .map((option) => (typeof option?.const === "string" ? option.const : typeof option?.title === "string" ? option.title : "")) + .filter((option): option is string => option.length > 0); +} + +export function parseAskUserQuestionsElicitation( + request: Pick, +): ParsedElicitationQuestion[] | null { + if (request.mode && request.mode !== "form") return null; + const properties = request.requestedSchema?.properties; + if (!properties || typeof properties !== "object") return null; + + const questions: ParsedElicitationQuestion[] = []; + + for (const [fieldId, rawField] of Object.entries(properties)) { + if (fieldId.endsWith("__note")) continue; + if (!rawField || typeof rawField !== "object") return null; + + const header = typeof rawField.title === "string" && rawField.title.length > 0 ? rawField.title : fieldId; + const question = typeof rawField.description === "string" ? rawField.description : ""; + + if (rawField.type === "array") { + const options = readElicitationChoices(rawField.items?.anyOf).map((label) => ({ label, description: "" })); + if (options.length === 0) return null; + questions.push({ + id: fieldId, + header, + question, + options, + allowMultiple: true, + }); + continue; + } + + if (rawField.type === "string") { + const noteFieldId = Object.prototype.hasOwnProperty.call(properties, `${fieldId}__note`) + ? `${fieldId}__note` + : undefined; + const options = readElicitationChoices(rawField.oneOf) + .filter((label) => label !== OTHER_OPTION_LABEL) + .map((label) => ({ label, description: "" })); + if (options.length === 0) return null; + questions.push({ + id: fieldId, + header, + question, + options, + noteFieldId, + }); + continue; + } + + return null; + } + + return questions.length > 0 ? questions : null; +} + +function isSecureElicitationField( + requestMessage: string, + fieldId: string, + field: SdkElicitationFieldSchema, +): boolean { + if (field.format === "password") return true; + if (field.writeOnly === true) return true; + + const rawField = field as Record; + if (rawField.sensitive === true || rawField["x-sensitive"] === true) return true; + + const haystack = [ + requestMessage, + fieldId.replace(/[_-]+/g, " "), + typeof field.title === "string" ? field.title : "", + typeof field.description === "string" ? field.description : "", + ] + .join(" ") + .toLowerCase(); + + return SENSITIVE_FIELD_PATTERN.test(haystack); +} + +export function parseTextInputElicitation( + request: Pick, +): ParsedTextInputField[] | null { + if (request.mode && request.mode !== "form") return null; + const schema = request.requestedSchema as + | ({ properties?: Record; keys?: Record } & Record) + | undefined; + const fieldsSource = schema?.properties && typeof schema.properties === "object" + ? schema.properties + : schema?.keys && typeof schema.keys === "object" + ? schema.keys + : undefined; + if (!fieldsSource) return null; + + const requiredSet = new Set( + Array.isArray(request.requestedSchema?.required) + ? request.requestedSchema.required.filter((value): value is string => typeof value === "string") + : [], + ); + + const fields: ParsedTextInputField[] = []; + for (const [fieldId, field] of Object.entries(fieldsSource)) { + if (!field || typeof field !== "object") continue; + if (field.type !== "string") continue; + if (Array.isArray(field.oneOf) && field.oneOf.length > 0) continue; + + fields.push({ + id: fieldId, + title: typeof field.title === "string" && field.title.length > 0 ? field.title : fieldId, + description: typeof field.description === "string" ? field.description : "", + required: requiredSet.has(fieldId), + secure: isSecureElicitationField(request.message, fieldId, field), + }); + } + + return fields.length > 0 ? fields : null; +} + +export function roundResultToElicitationContent( + questions: ParsedElicitationQuestion[], + result: RoundResult, +): Record { + const content: Record = {}; + + for (const question of questions) { + const answer = result.answers[question.id]; + if (!answer) continue; + + if (question.allowMultiple) { + const selected = Array.isArray(answer.selected) ? answer.selected : [answer.selected]; + content[question.id] = selected; + continue; + } + + const selected = Array.isArray(answer.selected) ? answer.selected[0] ?? "" : answer.selected; + content[question.id] = selected; + if (question.noteFieldId && selected === OTHER_OPTION_LABEL && answer.notes.trim().length > 0) { + content[question.noteFieldId] = answer.notes.trim(); + } + } + + return content; +} + +function buildElicitationPromptTitle(request: SdkElicitationRequest, question: ParsedElicitationQuestion): string { + const parts = [ + request.serverName ? `[${request.serverName}]` : "", + question.header, + question.question, + ].filter((part) => part && part.trim().length > 0); + return parts.join("\n\n"); +} + +async function promptElicitationWithDialogs( + request: SdkElicitationRequest, + questions: ParsedElicitationQuestion[], + ui: ExtensionUIContext, + signal: AbortSignal, +): Promise { + const content: Record = {}; + + for (const question of questions) { + const title = buildElicitationPromptTitle(request, question); + + if (question.allowMultiple) { + const selected = await ui.select(title, question.options.map((option) => option.label), { + allowMultiple: true, + signal, + }); + if (Array.isArray(selected)) { + if (selected.length === 0) return { action: "cancel" }; + content[question.id] = selected; + continue; + } + if (typeof selected === "string" && selected.length > 0) { + content[question.id] = [selected]; + continue; + } + return { action: "cancel" }; + } + + const selected = await ui.select(title, [...question.options.map((option) => option.label), OTHER_OPTION_LABEL], { signal }); + if (typeof selected !== "string" || selected.length === 0) { + return { action: "cancel" }; + } + + content[question.id] = selected; + if (question.noteFieldId && selected === OTHER_OPTION_LABEL) { + const note = await ui.input(`${question.header} note`, "Explain your answer", { signal }); + if (note === undefined) return { action: "cancel" }; + if (note.trim().length > 0) { + content[question.noteFieldId] = note.trim(); + } + } + } + + return { action: "accept", content }; +} + +function buildTextInputPromptTitle(request: SdkElicitationRequest, field: ParsedTextInputField): string { + const parts = [ + request.serverName ? `[${request.serverName}]` : "", + field.title, + field.description, + ].filter((part) => typeof part === "string" && part.trim().length > 0); + return parts.join("\n\n"); +} + +function buildTextInputPlaceholder(field: ParsedTextInputField): string | undefined { + const desc = field.description.trim(); + if (!desc) return field.required ? "Required" : "Leave empty to skip"; + + const formatLine = desc + .split(/\r?\n/) + .map((line) => line.trim()) + .find((line) => /^format:/i.test(line)); + + if (!formatLine) return field.required ? "Required" : "Leave empty to skip"; + const hint = formatLine.replace(/^format:\s*/i, "").trim(); + return hint.length > 0 ? hint : field.required ? "Required" : "Leave empty to skip"; +} + +async function promptTextInputElicitation( + request: SdkElicitationRequest, + fields: ParsedTextInputField[], + ui: ExtensionUIContext, + signal: AbortSignal, +): Promise { + const content: Record = {}; + + for (const field of fields) { + const value = await ui.input( + buildTextInputPromptTitle(request, field), + buildTextInputPlaceholder(field), + { signal, ...(field.secure ? { secure: true } : {}) }, + ); + if (value === undefined) { + return { action: "cancel" }; + } + content[field.id] = value; + } + + return { action: "accept", content }; +} + +export function createClaudeCodeElicitationHandler( + ui: ExtensionUIContext | undefined, +): ((request: SdkElicitationRequest, options: { signal: AbortSignal }) => Promise) | undefined { + if (!ui) return undefined; + + return async (request, { signal }) => { + if (request.mode === "url") { + return { action: "decline" }; + } + + const questions = parseAskUserQuestionsElicitation(request); + if (questions) { + const interviewResult = await showInterviewRound(questions, { signal }, { ui } as any).catch(() => undefined); + if (interviewResult && Object.keys(interviewResult.answers).length > 0) { + return { + action: "accept", + content: roundResultToElicitationContent(questions, interviewResult), + }; + } + + return promptElicitationWithDialogs(request, questions, ui, signal); + } + + const textFields = parseTextInputElicitation(request); + if (textFields) { + return promptTextInputElicitation(request, textFields, ui, signal); + } + + return { action: "decline" }; + }; +} + +/** + * Aborted by the caller's AbortSignal — distinct from exhaustion. GSD's + * agent loop keys off `stopReason === "aborted"` to treat this as a clean + * user cancel instead of a retry-eligible provider failure. + */ +export function makeAbortedMessage(model: string, lastTextContent: string): AssistantMessage { + const message: AssistantMessage = { + role: "assistant", + content: lastTextContent + ? [{ type: "text", text: lastTextContent }] + : [{ type: "text", text: "Claude Code stream aborted by caller" }], + api: "anthropic-messages", + provider: "claude-code", + model, + usage: { ...ZERO_USAGE }, + stopReason: "aborted", + timestamp: Date.now(), + }; + return message; +} + // --------------------------------------------------------------------------- // SDK options builder // --------------------------------------------------------------------------- +/** + * Resolve the Claude Code permission mode for the current run. + * + * GSD subagents run underneath a host Claude Code session the user has + * already consented to, and their work (edits, shell inspection, MCP calls) + * spans the full workflow toolset. Defaulting the inner SDK to + * `bypassPermissions` avoids per-tool approval prompts that offer no + * meaningful safety beyond what the host session and the subagent prompts + * already enforce. `GSD_CLAUDE_CODE_PERMISSION_MODE` lets security-conscious + * users opt into a stricter mode (`acceptEdits`, `default`, `plan`). + * + * Tradeoff: bypass means a prompt-injection payload read from an untrusted + * file could trigger tool calls without a second gate. Accepted for GSD + * because the workflow is explicit user intent and the alternative + * (#4099) is continuous approval fatigue that blocks real work. + */ +export async function resolveClaudePermissionMode( + env: NodeJS.ProcessEnv = process.env, +): Promise<"bypassPermissions" | "acceptEdits" | "default" | "plan"> { + const override = env.GSD_CLAUDE_CODE_PERMISSION_MODE?.trim(); + if (override === "bypassPermissions" || override === "acceptEdits" || override === "default" || override === "plan") { + return override; + } + return "bypassPermissions"; +} + /** * Build the options object passed to the Claude Agent SDK's `query()` call. * * Extracted for testability — callers can verify session persistence, * beta flags, and other configuration without mocking the full SDK. + * + * `permissionMode` / `allowDangerouslySkipPermissions` are resolved through + * {@link resolveClaudePermissionMode} so interactive runs don't silently + * bypass the SDK's permission gate. Callers that want the old always-bypass + * behaviour pass `permissionMode: "bypassPermissions"` explicitly. */ -export function buildSdkOptions(modelId: string, prompt: string): Record { +export function buildSdkOptions( + modelId: string, + prompt: string, + overrides?: { permissionMode?: "bypassPermissions" | "acceptEdits" | "default" | "plan" }, + extraOptions: Record = {}, +): Record { const mcpServers = buildWorkflowMcpServers(); + const permissionMode = overrides?.permissionMode ?? "bypassPermissions"; + const disallowedTools = ["AskUserQuestion"]; + // Pre-authorize the safe built-ins and every registered workflow MCP + // server's tools. `acceptEdits` mode (the interactive default) only + // auto-approves file edits — Read/Glob/Grep, basic shell inspection, and + // every `mcp__gsd-workflow__*` call still surface as "This command + // requires approval" and block GSD actions (#4099). + const allowedTools = [ + "Read", + "Write", + "Edit", + "Glob", + "Grep", + "Bash(ls:*)", + "Bash(pwd)", + ...(mcpServers ? Object.keys(mcpServers).map((serverName) => `mcp__${serverName}__*`) : []), + ]; return { pathToClaudeCodeExecutable: getClaudePath(), model: modelId, includePartialMessages: true, persistSession: true, cwd: process.cwd(), - permissionMode: "bypassPermissions", - allowDangerouslySkipPermissions: true, + permissionMode, + allowDangerouslySkipPermissions: permissionMode === "bypassPermissions", settingSources: ["project"], systemPrompt: { type: "preset", preset: "claude_code" }, + disallowedTools, + ...(allowedTools.length > 0 ? { allowedTools } : {}), ...(mcpServers ? { mcpServers } : {}), betas: modelId.includes("sonnet") ? ["context-1m-2025-08-07"] : [], + ...extraOptions, }; } @@ -291,18 +730,41 @@ export function extractToolResultsFromSdkUserMessage(message: SDKUserMessage): A return extracted; } -function attachExternalResultsToToolCalls( - toolCalls: AssistantMessage["content"], +function attachExternalResultsToToolBlocks( + toolBlocks: AssistantMessage["content"], toolResultsById: ReadonlyMap, ): void { - for (const block of toolCalls) { - if (block.type !== "toolCall") continue; + for (const block of toolBlocks) { + if (block.type !== "toolCall" && block.type !== "serverToolUse") continue; const externalResult = toolResultsById.get(block.id); if (!externalResult) continue; - (block as ToolCallWithExternalResult).externalResult = externalResult; + (block as ToolCallWithExternalResult & { id: string }).externalResult = externalResult; } } +/** + * Merge tool-call blocks from the active partial-message builder into the + * running list of intermediate tool calls, preserving order and de-duping + * by tool-call id. Exposed for testing the F3 fix (final-turn tool calls + * dropped when `result` arrives without a preceding synthetic `user`). + */ +export function mergePendingToolCalls( + intermediate: AssistantMessage["content"], + pending: AssistantMessage["content"], +): AssistantMessage["content"] { + const alreadyIncluded = new Set(); + for (const block of intermediate) { + if (block.type === "toolCall") alreadyIncluded.add(block.id); + } + for (const block of pending) { + if (block.type !== "toolCall") continue; + if (alreadyIncluded.has(block.id)) continue; + alreadyIncluded.add(block.id); + intermediate.push(block); + } + return intermediate; +} + // --------------------------------------------------------------------------- // streamSimple implementation // --------------------------------------------------------------------------- @@ -337,8 +799,8 @@ async function pumpSdkMessages( /** Track the last text content seen across all assistant turns for the final message. */ let lastTextContent = ""; let lastThinkingContent = ""; - /** Collect tool calls from intermediate SDK turns for tool_execution events. */ - const intermediateToolCalls: AssistantMessage["content"] = []; + /** Collect tool blocks from intermediate SDK turns for tool execution rendering. */ + const intermediateToolBlocks: AssistantMessage["content"] = []; /** Preserve real external tool results from Claude Code's synthetic user messages. */ const toolResultsById = new Map(); @@ -359,7 +821,19 @@ async function pumpSdkMessages( } const prompt = buildPromptFromContext(context); - const sdkOpts = buildSdkOptions(modelId, prompt); + const permissionMode = await resolveClaudePermissionMode(); + const sdkOpts = buildSdkOptions( + modelId, + prompt, + { permissionMode }, + typeof (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext === "object" + ? { + onElicitation: createClaudeCodeElicitationHandler( + (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext, + ), + } + : {}, + ); const queryResult = sdk.query({ prompt, @@ -383,7 +857,17 @@ async function pumpSdkMessages( stream.push({ type: "start", partial: initialPartial }); for await (const msg of queryResult as AsyncIterable) { - if (options?.signal?.aborted) break; + if (options?.signal?.aborted) { + // User-initiated cancel — emit an aborted error so the agent + // loop classifies this as a deliberate stop, not a transient + // provider failure that should be retried. + stream.push({ + type: "error", + reason: "aborted", + error: makeAbortedMessage(modelId, lastTextContent), + }); + return; + } switch (msg.type) { // -- Init -- @@ -439,9 +923,9 @@ async function pumpSdkMessages( lastTextContent = block.text; } else if (block.type === "thinking" && block.thinking) { lastThinkingContent = block.thinking; - } else if (block.type === "toolCall") { - // Collect tool calls for externalToolExecution rendering - intermediateToolCalls.push(block); + } else if (block.type === "toolCall" || block.type === "serverToolUse") { + // Collect tool blocks for externalToolExecution rendering + intermediateToolBlocks.push(block); } } } @@ -451,24 +935,33 @@ async function pumpSdkMessages( for (const { toolUseId, result } of extractToolResultsFromSdkUserMessage(msg as SDKUserMessage)) { toolResultsById.set(toolUseId, result); } - attachExternalResultsToToolCalls(intermediateToolCalls, toolResultsById); + attachExternalResultsToToolBlocks(intermediateToolBlocks, toolResultsById); // Push a synthetic toolcall_end for each tool call from this turn // so the TUI can render tool results in real-time during the SDK // session instead of waiting until the entire session completes. if (builder) { for (const block of builder.message.content) { - if (block.type !== "toolCall") continue; const extResult = (block as ToolCallWithExternalResult).externalResult; if (!extResult) continue; - // Push a toolcall_end with result attached so the chat-controller - // can call updateResult on the pending ToolExecutionComponent. - stream.push({ - type: "toolcall_end", - contentIndex: builder.message.content.indexOf(block), - toolCall: block, - partial: builder.message, - }); + const contentIndex = builder.message.content.indexOf(block); + if (contentIndex < 0) continue; + // Push synthetic completion events with result attached so the + // chat-controller can update pending ToolExecutionComponents. + if (block.type === "toolCall") { + stream.push({ + type: "toolcall_end", + contentIndex, + toolCall: block, + partial: builder.message, + }); + } else if (block.type === "serverToolUse") { + stream.push({ + type: "server_tool_use", + contentIndex, + partial: builder.message, + }); + } } } @@ -485,9 +978,19 @@ async function pumpSdkMessages( // events for proper TUI rendering, followed by the text response. const finalContent: AssistantMessage["content"] = []; + // If the final turn ended without a synthetic user message + // (e.g. stop_reason: "tool_use" followed directly by result, + // or a turn with text but no tool execution), the `builder` + // still holds toolCall blocks that were never pushed into + // `intermediateToolBlocks`. Fold them in here so they aren't + // dropped from the final AssistantMessage. + if (builder) { + mergePendingToolCalls(intermediateToolBlocks, builder.message.content); + } + // Add tool calls from intermediate turns first (renders above text) - attachExternalResultsToToolCalls(intermediateToolCalls, toolResultsById); - finalContent.push(...intermediateToolCalls); + attachExternalResultsToToolBlocks(intermediateToolBlocks, toolResultsById); + finalContent.push(...intermediateToolBlocks); // Add text/thinking from the last turn if (builder && builder.message.content.length > 0) { @@ -522,11 +1025,7 @@ async function pumpSdkMessages( }; if (result.is_error) { - const errText = - "errors" in result - ? (result as any).errors?.join("; ") - : result.subtype; - finalMessage.errorMessage = errText; + finalMessage.errorMessage = getResultErrorMessage(result); stream.push({ type: "error", reason: "error", error: finalMessage }); } else { stream.push({ type: "done", reason: "stop", message: finalMessage }); diff --git a/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts index 01c853a14..cff2a6830 100644 --- a/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts +++ b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts @@ -1,7 +1,7 @@ import { describe, test } from "node:test"; import assert from "node:assert/strict"; -import { PartialMessageBuilder } from "../partial-builder.ts"; -import type { BetaRawMessageStreamEvent } from "../sdk-types.ts"; +import { mapContentBlock, parseMcpToolName, PartialMessageBuilder } from "../partial-builder.ts"; +import type { BetaContentBlock, BetaRawMessageStreamEvent } from "../sdk-types.ts"; describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => { /** @@ -148,3 +148,92 @@ describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => { } }); }); + +describe("parseMcpToolName", () => { + test("splits mcp____ into parts", () => { + assert.deepEqual( + parseMcpToolName("mcp__gsd-workflow__gsd_plan_milestone"), + { server: "gsd-workflow", tool: "gsd_plan_milestone" }, + ); + }); + + test("preserves server names containing hyphens", () => { + assert.deepEqual( + parseMcpToolName("mcp__my-cool-server__do_thing"), + { server: "my-cool-server", tool: "do_thing" }, + ); + }); + + test("preserves tool names containing underscores", () => { + assert.deepEqual( + parseMcpToolName("mcp__srv__a_b_c_d"), + { server: "srv", tool: "a_b_c_d" }, + ); + }); + + test("returns null for non-prefixed names", () => { + assert.equal(parseMcpToolName("Bash"), null); + assert.equal(parseMcpToolName("gsd_plan_milestone"), null); + }); + + test("returns null for malformed prefixes", () => { + assert.equal(parseMcpToolName("mcp__"), null); + assert.equal(parseMcpToolName("mcp__server"), null); + assert.equal(parseMcpToolName("mcp__server__"), null); + assert.equal(parseMcpToolName("mcp____tool"), null); + }); +}); + +describe("PartialMessageBuilder — MCP tool name normalization", () => { + test("strips mcp____ prefix on content_block_start", () => { + const builder = new PartialMessageBuilder("claude-sonnet-4-20250514"); + const event = builder.handleEvent({ + type: "content_block_start", + index: 0, + content_block: { + type: "tool_use", + id: "tool_1", + name: "mcp__gsd-workflow__gsd_plan_milestone", + input: {}, + }, + } as BetaRawMessageStreamEvent); + + assert.ok(event, "event should not be null"); + assert.equal(event!.type, "toolcall_start"); + if (event!.type === "toolcall_start") { + const toolCall = (event.partial.content[event.contentIndex] as any); + assert.equal(toolCall.name, "gsd_plan_milestone"); + assert.equal(toolCall.mcpServer, "gsd-workflow"); + } + }); + + test("leaves non-MCP tool names untouched", () => { + const builder = new PartialMessageBuilder("claude-sonnet-4-20250514"); + const event = builder.handleEvent({ + type: "content_block_start", + index: 0, + content_block: { type: "tool_use", id: "tool_1", name: "Bash", input: {} }, + } as BetaRawMessageStreamEvent); + + assert.ok(event); + if (event!.type === "toolcall_start") { + const toolCall = (event.partial.content[event.contentIndex] as any); + assert.equal(toolCall.name, "Bash"); + assert.equal(toolCall.mcpServer, undefined); + } + }); + + test("mapContentBlock strips MCP prefix on full tool_use blocks", () => { + const block: BetaContentBlock = { + type: "tool_use", + id: "tool_2", + name: "mcp__gsd-workflow__gsd_task_complete", + input: { taskId: "T001" }, + }; + const mapped = mapContentBlock(block) as any; + assert.equal(mapped.type, "toolCall"); + assert.equal(mapped.name, "gsd_task_complete"); + assert.equal(mapped.mcpServer, "gsd-workflow"); + assert.deepEqual(mapped.arguments, { taskId: "T001" }); + }); +}); diff --git a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts index 28d4efdb8..a600852a4 100644 --- a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts +++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts @@ -5,13 +5,21 @@ import { join, resolve } from "node:path"; import { tmpdir } from "node:os"; import { makeStreamExhaustedErrorMessage, + getResultErrorMessage, + makeAbortedMessage, + mergePendingToolCalls, + resolveClaudePermissionMode, buildPromptFromContext, buildSdkOptions, + createClaudeCodeElicitationHandler, extractToolResultsFromSdkUserMessage, getClaudeLookupCommand, + parseAskUserQuestionsElicitation, + parseTextInputElicitation, parseClaudeLookupOutput, + roundResultToElicitationContent, } from "../stream-adapter.ts"; -import type { Context, Message } from "@gsd/pi-ai"; +import type { AssistantMessage, Context, Message } from "@gsd/pi-ai"; import type { SDKUserMessage } from "../sdk-types.ts"; // --------------------------------------------------------------------------- @@ -36,6 +44,57 @@ describe("stream-adapter — exhausted stream fallback (#2575)", () => { }); }); +describe("stream-adapter — result error text (#3776)", () => { + test("prefers SDK result text when an error arrives with subtype success", () => { + const message = getResultErrorMessage({ + type: "result", + subtype: "success", + uuid: "uuid-1", + session_id: "session-1", + duration_ms: 1, + duration_api_ms: 1, + is_error: true, + num_turns: 1, + result: 'API Error: 529 {"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', + stop_reason: null, + total_cost_usd: 0, + usage: { + input_tokens: 0, + output_tokens: 0, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0, + }, + }); + + assert.match(message, /API Error: 529/); + assert.doesNotMatch(message, /^success$/i); + }); + + test("falls back to a stable classifier when success errors have no text", () => { + const message = getResultErrorMessage({ + type: "result", + subtype: "success", + uuid: "uuid-2", + session_id: "session-2", + duration_ms: 1, + duration_api_ms: 1, + is_error: true, + num_turns: 1, + result: " ", + stop_reason: null, + total_cost_usd: 0, + usage: { + input_tokens: 0, + output_tokens: 0, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0, + }, + }); + + assert.equal(message, "claude_code_request_failed"); + }); +}); + // --------------------------------------------------------------------------- // Bug #2859 — stateless provider regression tests // --------------------------------------------------------------------------- @@ -108,6 +167,98 @@ describe("stream-adapter — full context prompt (#2859)", () => { }); }); +// --------------------------------------------------------------------------- +// Bug #4102 — transcript fabrication regression tests +// --------------------------------------------------------------------------- + +describe("stream-adapter — no transcript fabrication (#4102)", () => { + test("buildPromptFromContext never emits forbidden [User]/[Assistant] bracket headers", () => { + const context: Context = { + systemPrompt: "You are a helpful assistant.", + messages: [ + { role: "user", content: "First" } as Message, + { + role: "assistant", + content: [{ type: "text", text: "Second" }], + api: "anthropic-messages", + provider: "claude-code", + model: "claude-sonnet-4-20250514", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + } as Message, + { role: "user", content: "Third" } as Message, + ], + }; + + const prompt = buildPromptFromContext(context); + + assert.ok(!prompt.includes("[User]"), "prompt must not include literal [User] bracket header"); + assert.ok(!prompt.includes("[Assistant]"), "prompt must not include literal [Assistant] bracket header"); + assert.ok(!prompt.includes("[System]"), "prompt must not include literal [System] bracket header"); + }); + + test("buildPromptFromContext wraps history in XML-tag structure", () => { + const context: Context = { + systemPrompt: "You are helpful.", + messages: [ + { role: "user", content: "Hello" } as Message, + { + role: "assistant", + content: [{ type: "text", text: "Hi there" }], + api: "anthropic-messages", + provider: "claude-code", + model: "claude-sonnet-4-20250514", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + } as Message, + ], + }; + + const prompt = buildPromptFromContext(context); + + assert.ok(prompt.includes(""), "prompt must wrap history in "); + assert.ok(prompt.includes(""), "prompt must close "); + assert.ok(prompt.includes("\nHello\n"), "user turn must use tags"); + assert.ok(prompt.includes("\nHi there\n"), "assistant turn must use tags"); + assert.ok(prompt.includes("\nYou are helpful.\n"), "system prompt must use tags"); + }); + + test("buildPromptFromContext includes a do-not-echo-tags directive as primary instruction", () => { + const context: Context = { + messages: [{ role: "user", content: "Anything" } as Message], + }; + + const prompt = buildPromptFromContext(context); + + assert.ok( + prompt.startsWith("Respond only to the final user message"), + "primary directive must lead the prompt", + ); + assert.ok(prompt.includes("Do not emit "), "directive must forbid emitting user_message tag"); + assert.ok(prompt.includes(""), "directive must mention assistant_message tag"); + }); + + test("buildPromptFromContext omits when there are no messages but a system prompt", () => { + const context: Context = { + systemPrompt: "Seed", + messages: [], + }; + + const prompt = buildPromptFromContext(context); + + assert.ok(prompt.includes(""), "system prompt must still render"); + assert.ok(!prompt.includes(""), "no history wrapper when messages are empty"); + }); + + test("buildPromptFromContext still returns empty string when context is entirely empty", () => { + const context: Context = { messages: [] }; + const prompt = buildPromptFromContext(context); + assert.equal(prompt, "", "empty context must not emit a bare directive"); + }); +}); + describe("stream-adapter — Claude Code external tool results", () => { test("extractToolResultsFromSdkUserMessage maps tool_result content to tool payloads", () => { const message: SDKUserMessage = { @@ -217,6 +368,55 @@ describe("stream-adapter — session persistence (#2859)", () => { assert.equal(srv.env.GSD_CLI_PATH, "/tmp/gsd"); assert.equal(srv.env.GSD_PERSIST_WRITE_GATE_STATE, "1"); assert.equal(srv.env.GSD_WORKFLOW_PROJECT_ROOT, "/tmp/project"); + assert.deepEqual(options.disallowedTools, ["AskUserQuestion"]); + assert.deepEqual(options.allowedTools, [ + "Read", + "Write", + "Edit", + "Glob", + "Grep", + "Bash(ls:*)", + "Bash(pwd)", + "mcp__gsd-workflow__*", + ]); + } finally { + process.env.GSD_WORKFLOW_MCP_COMMAND = prev.GSD_WORKFLOW_MCP_COMMAND; + process.env.GSD_WORKFLOW_MCP_NAME = prev.GSD_WORKFLOW_MCP_NAME; + process.env.GSD_WORKFLOW_MCP_ARGS = prev.GSD_WORKFLOW_MCP_ARGS; + process.env.GSD_WORKFLOW_MCP_ENV = prev.GSD_WORKFLOW_MCP_ENV; + process.env.GSD_WORKFLOW_MCP_CWD = prev.GSD_WORKFLOW_MCP_CWD; + } + }); + + test("buildSdkOptions disables AskUserQuestion for custom workflow MCP server names", () => { + const prev = { + GSD_WORKFLOW_MCP_COMMAND: process.env.GSD_WORKFLOW_MCP_COMMAND, + GSD_WORKFLOW_MCP_NAME: process.env.GSD_WORKFLOW_MCP_NAME, + GSD_WORKFLOW_MCP_ARGS: process.env.GSD_WORKFLOW_MCP_ARGS, + GSD_WORKFLOW_MCP_ENV: process.env.GSD_WORKFLOW_MCP_ENV, + GSD_WORKFLOW_MCP_CWD: process.env.GSD_WORKFLOW_MCP_CWD, + }; + try { + process.env.GSD_WORKFLOW_MCP_COMMAND = "node"; + process.env.GSD_WORKFLOW_MCP_NAME = "custom-workflow"; + process.env.GSD_WORKFLOW_MCP_ARGS = JSON.stringify(["packages/mcp-server/dist/cli.js"]); + process.env.GSD_WORKFLOW_MCP_ENV = JSON.stringify({ GSD_CLI_PATH: "/tmp/gsd" }); + process.env.GSD_WORKFLOW_MCP_CWD = "/tmp/project"; + + const options = buildSdkOptions("claude-sonnet-4-20250514", "test"); + const mcpServers = options.mcpServers as Record; + assert.ok(mcpServers?.["custom-workflow"], "expected custom workflow server config"); + assert.deepEqual(options.disallowedTools, ["AskUserQuestion"]); + assert.deepEqual(options.allowedTools, [ + "Read", + "Write", + "Edit", + "Glob", + "Grep", + "Bash(ls:*)", + "Bash(pwd)", + "mcp__custom-workflow__*", + ]); } finally { process.env.GSD_WORKFLOW_MCP_COMMAND = prev.GSD_WORKFLOW_MCP_COMMAND; process.env.GSD_WORKFLOW_MCP_NAME = prev.GSD_WORKFLOW_MCP_NAME; @@ -252,6 +452,9 @@ describe("stream-adapter — session persistence (#2859)", () => { const mcpServers = (options as any).mcpServers; if (mcpServers) { assert.ok(mcpServers["gsd-workflow"], "if present, must be gsd-workflow"); + assert.deepEqual((options as any).disallowedTools, ["AskUserQuestion"]); + } else { + assert.deepEqual((options as any).disallowedTools, ["AskUserQuestion"]); } rmSync(emptyDir, { recursive: true, force: true }); } finally { @@ -298,6 +501,7 @@ describe("stream-adapter — session persistence (#2859)", () => { assert.equal(srv.env.GSD_CLI_PATH, "/tmp/gsd"); assert.equal(srv.env.GSD_PERSIST_WRITE_GATE_STATE, "1"); assert.equal(srv.env.GSD_WORKFLOW_PROJECT_ROOT, resolvedRepoDir); + assert.deepEqual(options.disallowedTools, ["AskUserQuestion"]); } finally { process.chdir(originalCwd); rmSync(repoDir, { recursive: true, force: true }); @@ -309,6 +513,414 @@ describe("stream-adapter — session persistence (#2859)", () => { process.env.GSD_CLI_PATH = prev.GSD_CLI_PATH; } }); + + test("buildSdkOptions preserves runtime callbacks such as onElicitation", () => { + const prev = { + GSD_WORKFLOW_MCP_COMMAND: process.env.GSD_WORKFLOW_MCP_COMMAND, + GSD_WORKFLOW_MCP_NAME: process.env.GSD_WORKFLOW_MCP_NAME, + GSD_WORKFLOW_MCP_ARGS: process.env.GSD_WORKFLOW_MCP_ARGS, + GSD_WORKFLOW_MCP_ENV: process.env.GSD_WORKFLOW_MCP_ENV, + GSD_WORKFLOW_MCP_CWD: process.env.GSD_WORKFLOW_MCP_CWD, + }; + const onElicitation = async () => ({ action: "decline" as const }); + try { + delete process.env.GSD_WORKFLOW_MCP_COMMAND; + delete process.env.GSD_WORKFLOW_MCP_NAME; + delete process.env.GSD_WORKFLOW_MCP_ARGS; + delete process.env.GSD_WORKFLOW_MCP_ENV; + delete process.env.GSD_WORKFLOW_MCP_CWD; + const options = buildSdkOptions("claude-sonnet-4-20250514", "test", undefined, { onElicitation }); + assert.equal(options.onElicitation, onElicitation); + } finally { + process.env.GSD_WORKFLOW_MCP_COMMAND = prev.GSD_WORKFLOW_MCP_COMMAND; + process.env.GSD_WORKFLOW_MCP_NAME = prev.GSD_WORKFLOW_MCP_NAME; + process.env.GSD_WORKFLOW_MCP_ARGS = prev.GSD_WORKFLOW_MCP_ARGS; + process.env.GSD_WORKFLOW_MCP_ENV = prev.GSD_WORKFLOW_MCP_ENV; + process.env.GSD_WORKFLOW_MCP_CWD = prev.GSD_WORKFLOW_MCP_CWD; + } + }); +}); + +describe("stream-adapter — MCP elicitation bridge", () => { + const askUserQuestionsRequest = { + serverName: "gsd-workflow", + message: "Please answer the following question(s).", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + properties: { + storage_scope: { + type: "string", + title: "Storage", + description: "Does this app need to sync across devices?", + oneOf: [ + { const: "Local-only (Recommended)", title: "Local-only (Recommended)" }, + { const: "Cloud-synced", title: "Cloud-synced" }, + { const: "None of the above", title: "None of the above" }, + ], + }, + storage_scope__note: { + type: "string", + title: "Storage Note", + description: "Optional note for None of the above.", + }, + platform: { + type: "array", + title: "Platform", + description: "Where should it run?", + items: { + anyOf: [ + { const: "Web", title: "Web" }, + { const: "Desktop", title: "Desktop" }, + { const: "Mobile", title: "Mobile" }, + ], + }, + }, + }, + }, + }; + + test("parseAskUserQuestionsElicitation rebuilds interview questions from the MCP schema", () => { + const questions = parseAskUserQuestionsElicitation(askUserQuestionsRequest); + assert.deepEqual(questions, [ + { + id: "storage_scope", + header: "Storage", + question: "Does this app need to sync across devices?", + options: [ + { label: "Local-only (Recommended)", description: "" }, + { label: "Cloud-synced", description: "" }, + ], + noteFieldId: "storage_scope__note", + }, + { + id: "platform", + header: "Platform", + question: "Where should it run?", + options: [ + { label: "Web", description: "" }, + { label: "Desktop", description: "" }, + { label: "Mobile", description: "" }, + ], + allowMultiple: true, + }, + ]); + }); + + test("roundResultToElicitationContent preserves notes for None of the above", () => { + const questions = parseAskUserQuestionsElicitation(askUserQuestionsRequest); + assert.ok(questions); + + const content = roundResultToElicitationContent(questions, { + endInterview: false, + answers: { + storage_scope: { + selected: "None of the above", + notes: "Needs selective sync later", + }, + platform: { + selected: ["Web", "Desktop"], + notes: "", + }, + }, + }); + + assert.deepEqual(content, { + storage_scope: "None of the above", + storage_scope__note: "Needs selective sync later", + platform: ["Web", "Desktop"], + }); + }); + + test("createClaudeCodeElicitationHandler accepts interview-style answers from custom UI", async () => { + const handler = createClaudeCodeElicitationHandler({ + custom: async (_factory: any) => ({ + endInterview: false, + answers: { + storage_scope: { + selected: "Cloud-synced", + notes: "", + }, + platform: { + selected: ["Web", "Mobile"], + notes: "", + }, + }, + }), + } as any); + + assert.ok(handler); + const result = await handler!(askUserQuestionsRequest, { signal: new AbortController().signal }); + assert.deepEqual(result, { + action: "accept", + content: { + storage_scope: "Cloud-synced", + platform: ["Web", "Mobile"], + }, + }); + }); + + test("createClaudeCodeElicitationHandler falls back to dialog prompts when custom UI is unavailable", async () => { + const ui = { + custom: async () => undefined, + select: async (_title: string, options: string[], opts?: { allowMultiple?: boolean }) => { + if (opts?.allowMultiple) return ["Desktop", "Mobile"]; + return options.includes("None of the above") ? "None of the above" : options[0]; + }, + input: async () => "CLI-only deployment target", + }; + const handler = createClaudeCodeElicitationHandler(ui as any); + assert.ok(handler); + + const result = await handler!(askUserQuestionsRequest, { signal: new AbortController().signal }); + assert.deepEqual(result, { + action: "accept", + content: { + storage_scope: "None of the above", + storage_scope__note: "CLI-only deployment target", + platform: ["Desktop", "Mobile"], + }, + }); + }); + + test("parseTextInputElicitation recognizes secure free-text MCP forms", () => { + const request = { + serverName: "gsd-workflow", + message: "Enter values for environment variables.", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + properties: { + TEST_PASSWORD: { + type: "string", + title: "TEST_PASSWORD", + description: "Format: min 8 characters\nLeave empty to skip.", + }, + PROJECT_NAME: { + type: "string", + title: "PROJECT_NAME", + description: "Human-readable project name.", + }, + }, + }, + }; + + const parsed = parseTextInputElicitation(request as any); + assert.deepEqual(parsed, [ + { + id: "TEST_PASSWORD", + title: "TEST_PASSWORD", + description: "Format: min 8 characters\nLeave empty to skip.", + required: false, + secure: true, + }, + { + id: "PROJECT_NAME", + title: "PROJECT_NAME", + description: "Human-readable project name.", + required: false, + secure: false, + }, + ]); + }); + + test("parseTextInputElicitation accepts legacy keys schema and skips unsupported fields", () => { + const request = { + serverName: "gsd-workflow", + message: "Enter secure values", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + keys: { + API_TOKEN: { + type: "string", + title: "API_TOKEN", + description: "Leave empty to skip.", + }, + META: { + type: "object", + title: "metadata", + }, + }, + }, + }; + + const parsed = parseTextInputElicitation(request as any); + assert.deepEqual(parsed, [ + { + id: "API_TOKEN", + title: "API_TOKEN", + description: "Leave empty to skip.", + required: false, + secure: true, + }, + ]); + }); + + test("createClaudeCodeElicitationHandler collects secure_env_collect fields through input dialogs", async () => { + const secureRequest = { + serverName: "gsd-workflow", + message: "Enter values for environment variables.", + mode: "form" as const, + requestedSchema: { + type: "object" as const, + properties: { + TEST_SECURE_FIELD: { + type: "string", + title: "TEST_SECURE_FIELD", + description: "Format: Your secure testing password\nLeave empty to skip.", + }, + }, + }, + }; + + const inputCalls: Array<{ opts?: { secure?: boolean } }> = []; + const handler = createClaudeCodeElicitationHandler({ + input: async (_title: string, _placeholder?: string, opts?: { secure?: boolean }) => { + inputCalls.push({ opts }); + return "example-secure-input"; + }, + } as any); + assert.ok(handler); + + const result = await handler!(secureRequest as any, { signal: new AbortController().signal }); + assert.deepEqual(result, { + action: "accept", + content: { + TEST_SECURE_FIELD: "example-secure-input", + }, + }); + assert.equal(inputCalls.length, 1); + assert.equal(inputCalls[0]?.opts?.secure, true, "secure_env_collect fields should request secure input"); + }); +}); + +// --------------------------------------------------------------------------- +// F2 — abort vs stream-exhausted classification +// --------------------------------------------------------------------------- + +describe("stream-adapter — abort classification (F2)", () => { + test("makeAbortedMessage sets stopReason to 'aborted', not 'error'", () => { + const message = makeAbortedMessage("claude-sonnet-4-6", ""); + assert.equal(message.stopReason, "aborted"); + assert.equal(message.errorMessage, undefined); + }); + + test("makeAbortedMessage preserves last-seen text content", () => { + const message = makeAbortedMessage("claude-sonnet-4-6", "partial mid-stream text"); + assert.deepEqual(message.content, [{ type: "text", text: "partial mid-stream text" }]); + }); + + test("aborted message is distinguishable from stream-exhausted error", () => { + const aborted = makeAbortedMessage("claude-sonnet-4-6", ""); + const exhausted = makeStreamExhaustedErrorMessage("claude-sonnet-4-6", ""); + assert.notEqual(aborted.stopReason, exhausted.stopReason); + assert.equal(exhausted.errorMessage, "stream_exhausted_without_result"); + }); +}); + +// --------------------------------------------------------------------------- +// F3 — final-turn tool calls not dropped +// --------------------------------------------------------------------------- + +describe("stream-adapter — final-turn tool-call merge (F3)", () => { + function toolCall(id: string, name = "bash"): AssistantMessage["content"][number] { + return { type: "toolCall", id, name, arguments: {} }; + } + + test("mergePendingToolCalls appends tool calls not already in intermediate", () => { + const intermediate: AssistantMessage["content"] = [toolCall("tool-1")]; + const pending: AssistantMessage["content"] = [ + toolCall("tool-2"), + { type: "text", text: "trailing text" }, + ]; + const merged = mergePendingToolCalls(intermediate, pending); + assert.equal(merged.length, 2); + assert.equal((merged[0] as any).id, "tool-1"); + assert.equal((merged[1] as any).id, "tool-2"); + }); + + test("mergePendingToolCalls is idempotent across duplicate ids", () => { + const intermediate: AssistantMessage["content"] = [toolCall("tool-1")]; + const pending: AssistantMessage["content"] = [toolCall("tool-1"), toolCall("tool-2")]; + const merged = mergePendingToolCalls(intermediate, pending); + assert.equal(merged.length, 2); + assert.deepEqual( + merged.map((b) => (b as any).id), + ["tool-1", "tool-2"], + ); + }); + + test("mergePendingToolCalls ignores non-toolCall blocks from pending", () => { + const intermediate: AssistantMessage["content"] = []; + const pending: AssistantMessage["content"] = [ + { type: "text", text: "hello" }, + { type: "thinking", thinking: "pondering" }, + toolCall("tool-1"), + ]; + const merged = mergePendingToolCalls(intermediate, pending); + assert.equal(merged.length, 1); + assert.equal((merged[0] as any).id, "tool-1"); + }); +}); + +// --------------------------------------------------------------------------- +// F10 — permission mode is configurable +// --------------------------------------------------------------------------- + +describe("stream-adapter — permission mode (F10)", () => { + // Earlier tests in this file set GSD_WORKFLOW_MCP_* env vars and restore + // them by reassigning from `prev.*`. When `prev.*` was undefined, node + // coerces the assignment to the literal string "undefined", which then + // fails JSON.parse inside buildWorkflowMcpServers. Clear the relevant + // slots before each permission-mode test so buildSdkOptions doesn't throw. + function clearWorkflowMcpEnv(): void { + for (const key of [ + "GSD_WORKFLOW_MCP_COMMAND", + "GSD_WORKFLOW_MCP_NAME", + "GSD_WORKFLOW_MCP_ARGS", + "GSD_WORKFLOW_MCP_ENV", + "GSD_WORKFLOW_MCP_CWD", + ]) { + if (process.env[key] === undefined || process.env[key] === "undefined") { + delete process.env[key]; + } + } + } + + test("buildSdkOptions defaults to bypassPermissions for backwards compatibility", () => { + clearWorkflowMcpEnv(); + const opts = buildSdkOptions("claude-sonnet-4-6", "test"); + assert.equal(opts.permissionMode, "bypassPermissions"); + assert.equal(opts.allowDangerouslySkipPermissions, true); + }); + + test("buildSdkOptions respects explicit acceptEdits override", () => { + clearWorkflowMcpEnv(); + const opts = buildSdkOptions("claude-sonnet-4-6", "test", { permissionMode: "acceptEdits" }); + assert.equal(opts.permissionMode, "acceptEdits"); + assert.equal( + opts.allowDangerouslySkipPermissions, + false, + "allowDangerouslySkipPermissions must be false for non-bypass modes", + ); + }); + + test("resolveClaudePermissionMode honours the GSD_CLAUDE_CODE_PERMISSION_MODE env override", async () => { + const env = { GSD_CLAUDE_CODE_PERMISSION_MODE: "acceptEdits" } as NodeJS.ProcessEnv; + const mode = await resolveClaudePermissionMode(env); + assert.equal(mode, "acceptEdits"); + }); + + test("resolveClaudePermissionMode rejects unknown override values (fallback path)", async () => { + const env = { GSD_CLAUDE_CODE_PERMISSION_MODE: "nonsense" } as NodeJS.ProcessEnv; + const mode = await resolveClaudePermissionMode(env); + // Unknown override falls back to auto-detect → either bypass or acceptEdits + assert.ok( + mode === "bypassPermissions" || mode === "acceptEdits", + `expected bypass or acceptEdits, got ${mode}`, + ); + }); }); describe("stream-adapter — Windows Claude path lookup (#3770)", () => { diff --git a/src/resources/extensions/get-secrets-from-user.ts b/src/resources/extensions/get-secrets-from-user.ts index a8f1cfe36..967752048 100644 --- a/src/resources/extensions/get-secrets-from-user.ts +++ b/src/resources/extensions/get-secrets-from-user.ts @@ -126,7 +126,7 @@ async function collectOneSecret( ): Promise { if (!ctx.hasUI) return null; - return ctx.ui.custom((tui: any, theme: any, _kb: any, done: (r: string | null) => void) => { + const customResult = await ctx.ui.custom((tui: any, theme: any, _kb: any, done: (r: string | null) => void) => { let value = ""; let cachedLines: string[] | undefined; @@ -223,6 +223,29 @@ async function collectOneSecret( handleInput, }; }); + + // RPC/web surfaces may not implement ctx.ui.custom(). Fall back to a + // standard input prompt so users can still provide the secret. + if (customResult !== undefined) { + return customResult; + } + + if (typeof ctx.ui?.input !== "function") { + return null; + } + + const inputTitle = `Secure value for ${keyName} (${pageIndex + 1}/${totalPages})`; + const inputPlaceholder = hint || "Enter secret value"; + const inputResult = await ctx.ui.input( + inputTitle, + inputPlaceholder, + { secure: true }, + ); + if (typeof inputResult !== "string") { + return null; + } + const trimmed = inputResult.trim(); + return trimmed.length > 0 ? trimmed : null; } /** diff --git a/src/resources/extensions/gsd/auto-dashboard.ts b/src/resources/extensions/gsd/auto-dashboard.ts index e69cb78ad..e18e24599 100644 --- a/src/resources/extensions/gsd/auto-dashboard.ts +++ b/src/resources/extensions/gsd/auto-dashboard.ts @@ -6,7 +6,13 @@ * or AutoContext dependency. State accessors are passed as callbacks. */ -import type { ExtensionContext, ExtensionCommandContext, SessionMessageEntry } from "@gsd/pi-coding-agent"; +import type { + ExtensionContext, + ExtensionCommandContext, + SessionMessageEntry, + ReadonlyFooterDataProvider, + Theme, +} from "@gsd/pi-coding-agent"; import type { GSDState } from "./types.js"; import { getCurrentBranch } from "./worktree.js"; import { getActiveHook } from "./post-unit-hooks.js"; @@ -17,7 +23,6 @@ import { resolveSliceFile, } from "./paths.js"; import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js"; -import { formatShortcut } from "./files.js"; import { readFileSync, writeFileSync, existsSync } from "node:fs"; import { execFileSync } from "node:child_process"; import { truncateToWidth, visibleWidth } from "@gsd/pi-tui"; @@ -38,6 +43,7 @@ import { type RtkSessionSavings, } from "../shared/rtk-session-stats.js"; import { logWarning } from "./workflow-logger.js"; +import { formattedShortcutPair } from "./shortcut-defs.js"; // ─── UAT Slice Extraction ───────────────────────────────────────────────────── @@ -358,12 +364,23 @@ function getLastCommit(basePath: string): { timeAgo: string; message: string } | // ─── Footer Factory ─────────────────────────────────────────────────────────── /** - * Footer factory that renders zero lines — hides the built-in footer entirely. - * All footer info (pwd, branch, tokens, cost, model) is shown inside the - * progress widget instead, so there's no gap or redundancy. + * Footer factory used by auto-mode. + * Keep footer minimal but preserve extension status context from setStatus(). */ -export const hideFooter = () => ({ - render(_width: number): string[] { return []; }, +function sanitizeFooterStatus(text: string): string { + return text.replace(/\s+/g, " ").trim(); +} + +export const hideFooter = (_tui: unknown, theme: Theme, footerData: ReadonlyFooterDataProvider) => ({ + render(width: number): string[] { + const extensionStatuses = footerData.getExtensionStatuses(); + if (extensionStatuses.size === 0) return []; + const statusLine = Array.from(extensionStatuses.entries()) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([, text]) => sanitizeFooterStatus(text)) + .join(" "); + return [truncateToWidth(theme.fg("dim", statusLine), width, theme.fg("dim", "..."))]; + }, invalidate() {}, dispose() {}, }); @@ -646,14 +663,6 @@ export function updateProgressWidget( : ""; lines.push(rightAlign(headerLeft, headerRight, width)); - // Worktree/branch right-aligned below header - const branchLabel = worktreeName && cachedBranch - ? `${worktreeName} (${cachedBranch})` - : cachedBranch ?? ""; - if (branchLabel) { - lines.push(rightAlign("", theme.fg("dim", branchLabel), width)); - } - // Show health signal details when degraded (yellow/red) if (score.level !== "green" && score.signals.length > 0 && widgetMode !== "min") { // Show up to 3 most relevant signals in compact form @@ -917,15 +926,17 @@ export function updateProgressWidget( // Hints line const hintParts: string[] = []; hintParts.push("esc pause"); - hintParts.push(`${formatShortcut("Ctrl+Alt+G")} dashboard`); + hintParts.push(`${formattedShortcutPair("dashboard")} dashboard`); + hintParts.push(`${formattedShortcutPair("parallel")} parallel`); const hintStr = theme.fg("dim", hintParts.join(" | ")); const commitStr = lastCommit ? theme.fg("dim", `${lastCommit.timeAgo} ago: ${commitMsg}`) : ""; + const locationStr = theme.fg("dim", widgetPwd); if (commitStr) { - lines.push(rightAlign(`${pad}${commitStr}`, hintStr, width)); + lines.push(rightAlign(`${pad}${locationStr} · ${commitStr}`, hintStr, width)); } else { - lines.push(rightAlign("", hintStr, width)); + lines.push(rightAlign(`${pad}${locationStr}`, hintStr, width)); } lines.push(...ui.bar()); diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts index 7dc1593f8..c369d23a7 100644 --- a/src/resources/extensions/gsd/auto-model-selection.ts +++ b/src/resources/extensions/gsd/auto-model-selection.ts @@ -5,14 +5,16 @@ */ import type { Api, Model } from "@gsd/pi-ai"; +import { getProviderCapabilities } from "@gsd/pi-ai"; import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent"; import type { GSDPreferences } from "./preferences.js"; import { resolveModelWithFallbacksForUnit, resolveDynamicRoutingConfig } from "./preferences.js"; import type { ComplexityTier } from "./complexity-classifier.js"; import { classifyUnitComplexity, tierLabel } from "./complexity-classifier.js"; -import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabilityOverrides } from "./model-router.js"; +import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabilityOverrides, adjustToolSet, filterToolsForProvider } from "./model-router.js"; import { getLedger, getProjectTotals } from "./metrics.js"; import { unitPhaseLabel } from "./auto-dashboard.js"; +import { getSessionModelOverride } from "./session-model-override.js"; export interface ModelSelectionResult { /** Routing metadata for metrics recording */ @@ -24,10 +26,17 @@ export interface ModelSelectionResult { export function resolvePreferredModelConfig( unitType: string, autoModeStartModel: { provider: string; id: string } | null, + /** When false, only return explicit per-phase model configs — do not + * synthesize a routing ceiling from dynamic_routing.tier_models (#3962). */ + isAutoMode = true, ) { const explicitConfig = resolveModelWithFallbacksForUnit(unitType); if (explicitConfig) return explicitConfig; + // In interactive mode, don't synthesize a routing-based model config. + // The user's session model (/model) should be used as-is (#3962). + if (!isAutoMode) return undefined; + const routingConfig = resolveDynamicRoutingConfig(); if (!routingConfig.enabled || !routingConfig.tier_models) return undefined; @@ -61,8 +70,18 @@ export async function selectAndApplyModel( verbose: boolean, autoModeStartModel: { provider: string; id: string } | null, retryContext?: { isRetry: boolean; previousTier?: string }, + /** When false (interactive/guided-flow), skip dynamic routing and use the session model. + * Dynamic routing only applies in auto-mode where cost optimization is expected. (#3962) */ + isAutoMode = true, + /** Explicit /gsd model pin captured at bootstrap for long-running auto loops. */ + sessionModelOverride?: { provider: string; id: string } | null, ): Promise { - const modelConfig = resolvePreferredModelConfig(unitType, autoModeStartModel); + const effectiveSessionModelOverride = sessionModelOverride === undefined + ? getSessionModelOverride(ctx.sessionManager.getSessionId()) + : (sessionModelOverride ?? undefined); + const modelConfig = effectiveSessionModelOverride + ? undefined + : resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode); let routing: { tier: string; modelDowngraded: boolean } | null = null; let appliedModel: Model | null = null; @@ -70,7 +89,13 @@ export async function selectAndApplyModel( const availableModels = ctx.modelRegistry.getAvailable(); // ─── Dynamic Model Routing ───────────────────────────────────────── + // Dynamic routing (complexity-based downgrading) only applies in auto-mode. + // Interactive/guided-flow dispatches use the user's session model directly, + // respecting their /model selection without silent downgrades (#3962). const routingConfig = resolveDynamicRoutingConfig(); + if (!isAutoMode) { + routingConfig.enabled = false; + } let effectiveModelConfig = modelConfig; let routingTierLabel = ""; @@ -122,19 +147,16 @@ export async function selectAndApplyModel( const escalated = escalateTier(retryContext.previousTier as ComplexityTier); if (escalated) { classification = { ...classification, tier: escalated, reason: "escalated after failure" }; - if (verbose) { - ctx.ui.notify( - `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`, - "info", - ); - } + // Always notify on tier escalation — model changes should be visible (#3962) + ctx.ui.notify( + `Tier escalation: ${retryContext.previousTier} → ${escalated} (retry after failure)`, + "info", + ); } } // Load user capability overrides from preferences (D-17: deep-merged with built-in profiles) - const capabilityOverrides = loadCapabilityOverrides( - (prefs as { modelOverrides?: Record }> } | undefined) ?? {}, - ); + const capabilityOverrides = loadCapabilityOverrides(prefs ?? {}); // Fire before_model_select hook (ADR-004, D-03) // Hook can override model selection entirely by returning { modelId } @@ -196,24 +218,23 @@ export async function selectAndApplyModel( primary: routingResult.modelId, fallbacks: routingResult.fallbacks, }; - if (verbose) { - if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) { - // Verbose scoring breakdown for capability-scored decisions (D-20) - const tierLbl = tierLabel(classification.tier); - const scores = Object.entries(routingResult.capabilityScores) - .sort(([, a], [, b]) => b - a) - .map(([id, score]) => `${id}: ${score.toFixed(1)}`) - .join(", "); - ctx.ui.notify( - `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`, - "info", - ); - } else { - ctx.ui.notify( - `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`, - "info", - ); - } + // Always notify on model downgrade — users should see when their + // model selection is overridden, not just in verbose mode (#3962). + if (routingResult.selectionMethod === "capability-scored" && routingResult.capabilityScores) { + const tierLbl = tierLabel(classification.tier); + const scores = Object.entries(routingResult.capabilityScores) + .sort(([, a], [, b]) => b - a) + .map(([id, score]) => `${id}: ${score.toFixed(1)}`) + .join(", "); + ctx.ui.notify( + `Dynamic routing [${tierLbl}]: ${routingResult.modelId} (capability-scored) — ${scores}`, + "info", + ); + } else { + ctx.ui.notify( + `Dynamic routing [${tierLabel(classification.tier)}]: ${routingResult.modelId} (${classification.reason})`, + "info", + ); } } routingTierLabel = ` [${tierLabel(classification.tier)}]`; @@ -246,12 +267,45 @@ export async function selectAndApplyModel( const ok = await pi.setModel(model, { persist: false }); if (ok) { appliedModel = model; + + // ADR-005: Adjust active tool set for the selected model's provider capabilities. + // Hard-filter incompatible tools, then let extensions override via adjust_tool_set hook. + const activeToolNames = pi.getActiveTools(); + const { toolNames: compatibleTools, removedTools } = adjustToolSet(activeToolNames, model.api); + let finalToolNames = compatibleTools; + + // Fire adjust_tool_set hook — extensions can override the filtered tool set + if (routingConfig.hooks !== false) { + const hookResult = await pi.emitAdjustToolSet({ + selectedModelApi: model.api, + selectedModelProvider: model.provider, + selectedModelId: model.id, + activeToolNames, + filteredTools: removedTools, + }); + if (hookResult?.toolNames) { + finalToolNames = hookResult.toolNames; + } + } + + // Apply the filtered tool set if any tools were removed + if (removedTools.length > 0 || finalToolNames.length !== activeToolNames.length) { + pi.setActiveTools(finalToolNames); + } + if (verbose) { const fallbackNote = modelId === effectiveModelConfig.primary ? "" : ` (fallback from ${effectiveModelConfig.primary})`; const phase = unitPhaseLabel(unitType); ctx.ui.notify(`Model [${phase}]${routingTierLabel}: ${model.provider}/${model.id}${fallbackNote}`, "info"); + // ADR-005: Report tools filtered due to provider incompatibility + if (removedTools.length > 0) { + ctx.ui.notify( + `Tool compatibility: ${removedTools.length} tools filtered for ${model.api} — ${removedTools.join(", ")}`, + "info", + ); + } } break; } else { diff --git a/src/resources/extensions/gsd/auto-observability.ts b/src/resources/extensions/gsd/auto-observability.ts deleted file mode 100644 index 0715a9ac4..000000000 --- a/src/resources/extensions/gsd/auto-observability.ts +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Pre-dispatch observability checks for auto-mode units. - * Validates plan/summary file quality and builds repair instructions - * for the agent to fix gaps before proceeding with the unit. - */ - -import type { ExtensionContext } from "@gsd/pi-coding-agent"; -import { - validatePlanBoundary, - validateExecuteBoundary, - validateCompleteBoundary, - formatValidationIssues, -} from "./observability-validator.js"; -import type { ValidationIssue } from "./observability-validator.js"; -import { parseUnitId } from "./unit-id.js"; - -export async function collectObservabilityWarnings( - ctx: ExtensionContext, - basePath: string, - unitType: string, - unitId: string, -): Promise { - // Hook units have custom artifacts — skip standard observability checks - if (unitType.startsWith("hook/")) return []; - - const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId); - - if (!mid || !sid) return []; - - let issues = [] as Awaited>; - - if (unitType === "plan-slice") { - issues = await validatePlanBoundary(basePath, mid, sid); - } else if (unitType === "execute-task" && tid) { - issues = await validateExecuteBoundary(basePath, mid, sid, tid); - } else if (unitType === "complete-slice") { - issues = await validateCompleteBoundary(basePath, mid, sid); - } - - if (issues.length > 0) { - ctx.ui.notify( - `Observability check (${unitType}) found ${issues.length} warning${issues.length === 1 ? "" : "s"}:\n${formatValidationIssues(issues)}`, - "warning", - ); - } - - return issues; -} - -export function buildObservabilityRepairBlock(issues: ValidationIssue[]): string { - if (issues.length === 0) return ""; - const items = issues.map(issue => { - const fileName = issue.file.split("/").pop() || issue.file; - let line = `- **${fileName}**: ${issue.message}`; - if (issue.suggestion) line += ` → ${issue.suggestion}`; - return line; - }); - return [ - "", - "---", - "", - "## Pre-flight: Observability gaps to fix FIRST", - "", - "The following issues were detected in plan/summary files for this unit.", - "**Read each flagged file, apply the fix described, then proceed with the unit.**", - "", - ...items, - "", - "---", - "", - ].join("\n"); -} diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts index b0bd77dd2..dc710830f 100644 --- a/src/resources/extensions/gsd/auto-post-unit.ts +++ b/src/resources/extensions/gsd/auto-post-unit.ts @@ -25,6 +25,7 @@ import { buildTaskFileName, } from "./paths.js"; import { invalidateAllCaches } from "./cache.js"; +import { rebuildState } from "./doctor.js"; import { parseUnitId } from "./unit-id.js"; import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js"; import { @@ -367,6 +368,12 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV } }); + // Keep the on-disk STATE.md aligned with the live derived state after + // ordinary unit completion, before any worktree state is synced back. + await runSafely("postUnit", "state-rebuild", async () => { + await rebuildState(s.basePath); + }); + // Sync worktree state back to project root (skipped for lightweight sidecars) if (!opts?.skipWorktreeSync && s.originalBasePath && s.originalBasePath !== s.basePath) { await runSafely("postUnit", "worktree-sync", () => { diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts index 5e8bff3c4..28217afd6 100644 --- a/src/resources/extensions/gsd/auto-prompts.ts +++ b/src/resources/extensions/gsd/auto-prompts.ts @@ -24,7 +24,13 @@ import { getLoadedSkills, type Skill } from "@gsd/pi-coding-agent"; import { join, basename } from "node:path"; import { existsSync } from "node:fs"; import { computeBudgets, resolveExecutorContextWindow, truncateAtSectionBoundary } from "./context-budget.js"; -import { getPendingGates } from "./gsd-db.js"; +import { getPendingGates, getPendingGatesForTurn } from "./gsd-db.js"; +import { + GATE_REGISTRY, + assertGateCoverage, + getGatesForTurn, + type GateDefinition, +} from "./gate-registry.js"; import { formatDecisionsCompact, formatRequirementsCompact } from "./structured-data-formatter.js"; import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js"; import { logWarning } from "./workflow-logger.js"; @@ -997,7 +1003,7 @@ export async function buildDiscussMilestonePrompt(mid: string, midTitle: string, milestoneId: mid, milestoneTitle: midTitle, inlinedTemplates: discussTemplates, - structuredQuestionsAvailable: "true", + structuredQuestionsAvailable: "false", commitInstruction: "Do not commit planning artifacts — .gsd/ is managed externally.", fastPathInstruction: "", }); @@ -1395,6 +1401,17 @@ export async function buildExecuteTaskPrompt( const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : ""; + // Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the + // gates that plan-slice actually seeded for this task — tasks with no + // external dependencies legitimately skip Q5, tasks with no runtime + // load dimension skip Q6, etc. + const etPending = getPendingGatesForTurn(mid, sid, "execute-task", tid); + assertGateCoverage(etPending, "execute-task", { requireAll: false }); + const gatesToClose = renderGatesToCloseBlock( + getGatesForTurn("execute-task"), + { pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true }, + ); + return loadPrompt("execute-task", { overridesSection, runtimeContext, @@ -1412,6 +1429,7 @@ export async function buildExecuteTaskPrompt( taskSummaryPath, inlinedTemplates, verificationBudget, + gatesToClose, skillActivation: buildSkillActivationBlock({ base, milestoneId: mid, @@ -1477,6 +1495,19 @@ export async function buildCompleteSlicePrompt( const sliceSummaryPath = join(base, `${sliceRel}/${sid}-SUMMARY.md`); const sliceUatPath = join(base, `${sliceRel}/${sid}-UAT.md`); + // Gates owned by complete-slice (e.g. Q8). Pull from the DB so the + // prompt only prompts for gates the plan actually seeded. The tool + // handler closes each gate based on the SUMMARY.md section content + // after the assistant calls gsd_complete_slice. + const csPending = getPendingGatesForTurn(mid, sid, "complete-slice"); + // coverage check: every pending row must be owned by complete-slice. + // requireAll:false because a slice may have already closed some gates. + assertGateCoverage(csPending, "complete-slice", { requireAll: false }); + const gatesToClose = renderGatesToCloseBlock( + getGatesForTurn("complete-slice"), + { pending: new Set(csPending.map((g) => g.gate_id)), allowOmit: true }, + ); + return loadPrompt("complete-slice", { workingDirectory: base, milestoneId: mid, sliceId: sid, sliceTitle: sTitle, @@ -1485,6 +1516,7 @@ export async function buildCompleteSlicePrompt( inlinedContext, sliceSummaryPath, sliceUatPath, + gatesToClose, }); } @@ -1503,7 +1535,9 @@ export async function buildCompleteMilestonePrompt( try { const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { - sliceIds = getMilestoneSlices(mid).map(s => s.id); + sliceIds = getMilestoneSlices(mid) + .filter(s => s.status !== "skipped") + .map(s => s.id); } } catch (err) { logWarning("prompt", `buildCompleteMilestonePrompt DB lookup failed: ${err instanceof Error ? err.message : String(err)}`); @@ -1597,7 +1631,9 @@ export async function buildValidateMilestonePrompt( try { const { isDbAvailable, getMilestoneSlices } = await import("./gsd-db.js"); if (isDbAvailable()) { - valSliceIds = getMilestoneSlices(mid).map(s => s.id); + valSliceIds = getMilestoneSlices(mid) + .filter(s => s.status !== "skipped") + .map(s => s.id); } } catch (err) { logWarning("prompt", `buildValidateMilestonePrompt slice IDs lookup failed: ${err instanceof Error ? err.message : String(err)}`); @@ -1671,6 +1707,16 @@ export async function buildValidateMilestonePrompt( const validationOutputPath = join(base, `${relMilestonePath(base, mid)}/${mid}-VALIDATION.md`); const roadmapOutputPath = `${relMilestonePath(base, mid)}/${mid}-ROADMAP.md`; + // Every milestone validation turn owns MV01–MV04 unconditionally: the + // registry is the source of truth for which gates the validator must + // address, and the block below is what the template renders so the + // assistant can never accidentally skip one. + const mvGates = getGatesForTurn("validate-milestone"); + const gatesToEvaluate = renderGatesToCloseBlock(mvGates, { + pending: new Set(mvGates.map((g) => g.id)), + allowOmit: false, + }); + return loadPrompt("validate-milestone", { workingDirectory: base, milestoneId: mid, @@ -1679,6 +1725,7 @@ export async function buildValidateMilestonePrompt( inlinedContext, validationPath: validationOutputPath, remediationRound: String(remediationRound), + gatesToEvaluate, skillActivation: buildSkillActivationBlock({ base, milestoneId: mid, @@ -1951,27 +1998,51 @@ export async function buildReactiveExecutePrompt( } // ─── Gate Evaluation ────────────────────────────────────────────────────── +// +// Gate definitions (question, guidance, owner turn) now live in +// gate-registry.ts so that prompt builders, dispatch rules, state +// derivation, and tool handlers all consult the same source of truth. +// See gate-registry.ts for the full ownership map. -const GATE_QUESTIONS: Record = { - Q3: { - question: "How can this be exploited?", - guidance: [ - "Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.", - "Map data exposure risks: PII, tokens, secrets accessible through this slice.", - "Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.", - "If none apply, return verdict 'omitted' with rationale explaining why.", - ].join("\n"), - }, - Q4: { - question: "What existing promises does this break?", - guidance: [ - "List which existing requirements (R001, R003, etc.) are touched by this slice.", - "Identify what must be re-tested after shipping.", - "Flag decisions that should be revisited given the new scope.", - "If no existing requirements are affected, return verdict 'omitted'.", - ].join("\n"), - }, -}; +/** + * Render a "Gates to Close" block for turns like `complete-slice` and + * `validate-milestone` that own gates which are closed as a side-effect + * of writing artifact sections (not via a dedicated gate-evaluate + * subagent loop). + * + * Returns a plain-text block or an empty string if there are no gates to + * close, so callers can drop it straight into a template variable. + */ +function renderGatesToCloseBlock( + gates: ReadonlyArray, + opts: { pending: ReadonlySet; allowOmit: boolean }, +): string { + const applicable = gates.filter((g) => opts.pending.has(g.id)); + if (applicable.length === 0) return ""; + + const lines: string[] = []; + lines.push("## Gates to Close"); + lines.push(""); + lines.push( + "These quality gates are still pending for this unit. You MUST address every one before calling the closing tool — the handler closes the DB row based on whether the corresponding artifact section is present.", + ); + lines.push(""); + for (const def of applicable) { + lines.push(`### ${def.id} — ${def.promptSection}`); + lines.push(""); + lines.push(`**Question:** ${def.question}`); + lines.push(""); + lines.push(def.guidance); + if (opts.allowOmit) { + lines.push(""); + lines.push( + `If this gate genuinely does not apply to this unit, leave the **${def.promptSection}** section empty and the handler will record it as \`omitted\`. Otherwise, fill the section with concrete evidence.`, + ); + } + lines.push(""); + } + return lines.join("\n").trimEnd(); +} export async function buildParallelResearchSlicesPrompt( mid: string, @@ -2007,28 +2078,39 @@ export async function buildGateEvaluatePrompt( mid: string, midTitle: string, sid: string, sTitle: string, base: string, ): Promise { - const pending = getPendingGates(mid, sid, "slice"); + // Pull only the gates this turn actually owns (Q3/Q4). Filter via the + // registry so that scope:"slice" gates owned by other turns (Q8) can't + // leak into this prompt and can't block dispatch via silent skip. + const pending = getPendingGatesForTurn(mid, sid, "gate-evaluate"); + + // Fails loudly if the pending list contains a gate id the registry + // doesn't own for this turn. Missing owned gates is allowed here — + // `gate-evaluate` is dispatched whenever *any* of its owned gates are + // pending, not only when all of them are. + assertGateCoverage(pending, "gate-evaluate", { requireAll: false }); // Load the slice plan for context const planFile = resolveSliceFile(base, mid, sid, "PLAN"); const planContent = planFile ? (await loadFile(planFile)) ?? "(plan file empty)" : "(plan file not found)"; - // Build per-gate subagent prompts + // Build per-gate subagent prompts from the pending rows. Because the + // registry has already validated every row, `getGateDefinition` cannot + // return undefined here. + const pendingIds = new Set(pending.map((g) => g.gate_id)); + const gateDefs = getGatesForTurn("gate-evaluate").filter((def) => pendingIds.has(def.id)); + const subagentSections: string[] = []; const gateListLines: string[] = []; - for (const gate of pending) { - const meta = GATE_QUESTIONS[gate.gate_id]; - if (!meta) continue; - - gateListLines.push(`- **${gate.gate_id}**: ${meta.question}`); + for (const def of gateDefs) { + gateListLines.push(`- **${def.id}**: ${def.question}`); const subPrompt = [ - `You are evaluating quality gate **${gate.gate_id}** for slice ${sid} (${sTitle}).`, + `You are evaluating quality gate **${def.id}** for slice ${sid} (${sTitle}).`, "", - `## Question: ${meta.question}`, + `## Question: ${def.question}`, "", - meta.guidance, + def.guidance, "", "## Slice Plan", "", @@ -2040,14 +2122,14 @@ export async function buildGateEvaluatePrompt( `Call the \`gsd_save_gate_result\` tool with:`, `- \`milestoneId\`: "${mid}"`, `- \`sliceId\`: "${sid}"`, - `- \`gateId\`: "${gate.gate_id}"`, + `- \`gateId\`: "${def.id}"`, "- `verdict`: \"pass\" (no concerns), \"flag\" (concerns found), or \"omitted\" (not applicable)", "- `rationale`: one-sentence justification", "- `findings`: detailed markdown findings (or empty if omitted)", ].join("\n"); subagentSections.push([ - `### ${gate.gate_id}: ${meta.question}`, + `### ${def.id}: ${def.question}`, "", "Use this as the prompt for a `subagent` call:", "", diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts index 92086af16..3fb3d8336 100644 --- a/src/resources/extensions/gsd/auto-recovery.ts +++ b/src/resources/extensions/gsd/auto-recovery.ts @@ -272,6 +272,16 @@ export function verifyExpectedArtifact( if (!isValidationTerminal(validationContent)) return false; } + if (unitType === "plan-milestone") { + try { + const roadmap = parseLegacyRoadmap(readFileSync(absPath, "utf-8")); + if (roadmap.slices.length === 0) return false; + } catch (err) { + logWarning("recovery", `plan-milestone roadmap verification failed: ${err instanceof Error ? err.message : String(err)}`); + return false; + } + } + // plan-slice must produce a plan with actual task entries, not just a scaffold. // The plan file may exist from a prior discussion/context step with only headings // but no tasks. Without this check the artifact is considered "complete" and the diff --git a/src/resources/extensions/gsd/auto-start.ts b/src/resources/extensions/gsd/auto-start.ts index 3f737c638..6c22d38ef 100644 --- a/src/resources/extensions/gsd/auto-start.ts +++ b/src/resources/extensions/gsd/auto-start.ts @@ -83,8 +83,9 @@ import { join } from "node:path"; import { sep as pathSep } from "node:path"; import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js"; -import { resolveDefaultSessionModel } from "./preferences-models.js"; +import { resolveDefaultSessionModel, resolveDynamicRoutingConfig } from "./preferences-models.js"; import type { WorktreeResolver } from "./worktree-resolver.js"; +import { getSessionModelOverride } from "./session-model-override.js"; export interface BootstrapDeps { shouldUseWorktreeIsolation: () => boolean; @@ -266,13 +267,42 @@ export async function bootstrapAutoSession( // Capture the user's session model before guided-flow dispatch can apply a // phase-specific planning model for a discuss turn (#2829). // - // GSD PREFERENCES.md takes priority over the session model from settings.json - // (#3517). The session model (ctx.model) comes from findInitialModel() which - // reads defaultProvider/defaultModel from ~/.gsd/agent/settings.json. When - // the user has explicit model preferences in PREFERENCES.md, those should win. + // Precedence: + // 1) Explicit session override via /gsd model (this session) + // 2) GSD model preferences from PREFERENCES.md (validated against live auth) + // 3) Current session model from settings/session restore (if provider ready) + // + // This preserves #3517 defaults while honoring explicit runtime model + // selection for subsequent /gsd runs in the same session. + const manualSessionOverride = getSessionModelOverride(ctx.sessionManager.getSessionId()); const preferredModel = resolveDefaultSessionModel(ctx.model?.provider); - const startModelSnapshot = preferredModel - ?? (ctx.model + // Validate the preferred model against the live registry + provider auth so + // an unconfigured PREFERENCES.md entry (no API key / OAuth) can't become the + // start-model snapshot. Without this, every subsequent unit would try to + // fall back to an unusable model. + let validatedPreferredModel: { provider: string; id: string } | undefined; + if (preferredModel) { + const { resolveModelId } = await import("./auto-model-selection.js"); + const available = ctx.modelRegistry.getAvailable(); + const match = resolveModelId( + `${preferredModel.provider}/${preferredModel.id}`, + available, + ctx.model?.provider, + ); + if (match) { + validatedPreferredModel = { provider: match.provider, id: match.id }; + } else { + ctx.ui.notify( + `Preferred model ${preferredModel.provider}/${preferredModel.id} from PREFERENCES.md is not configured; falling back to session default.`, + "warning", + ); + } + } + const sessionModelReady = + ctx.model && ctx.modelRegistry.isProviderRequestReady(ctx.model.provider); + const startModelSnapshot = manualSessionOverride + ?? validatedPreferredModel + ?? (sessionModelReady && ctx.model ? { provider: ctx.model.provider, id: ctx.model.id } : null); @@ -335,19 +365,9 @@ export async function bootstrapAutoSession( } } - if (ctx.model?.provider === "claude-code") { - try { - const { ensureProjectWorkflowMcpConfig } = await import("./mcp-project-config.js"); - const result = ensureProjectWorkflowMcpConfig(base); - if (result.status !== "unchanged") { - ctx.ui.notify(`Claude Code MCP prepared at ${result.configPath}`, "info"); - } - } catch (err) { - ctx.ui.notify( - `Claude Code MCP prep failed: ${err instanceof Error ? err.message : String(err)}`, - "warning", - ); - } + { + const { prepareWorkflowMcpForProject } = await import("./workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, base); } // Initialize GitServiceImpl @@ -604,6 +624,9 @@ export async function bootstrapAutoSession( s.consecutiveCompleteBootstraps = 0; // ── Initialize session state ── + // Notify shared phase state so subagent conflict checks can fire + const { activateGSD: activateGSDPhaseState } = await import("../shared/gsd-phase-state.js"); + activateGSDPhaseState(); s.active = true; s.stepMode = requestedStepMode; s.verbose = verboseMode; @@ -688,7 +711,7 @@ export async function bootstrapAutoSession( } // ── DB lifecycle ── - const gsdDbPath = join(s.basePath, ".gsd", "gsd.db"); + const gsdDbPath = resolveProjectRootDbPath(s.basePath); const gsdDirPath = join(s.basePath, ".gsd"); if (existsSync(gsdDirPath) && !existsSync(gsdDbPath)) { const hasDecisions = existsSync(join(gsdDirPath, "DECISIONS.md")); @@ -741,6 +764,7 @@ export async function bootstrapAutoSession( id: startModelSnapshot.id, }; } + s.manualSessionModelOverride = manualSessionOverride ?? null; // Apply worker model override from parallel orchestrator (#worker-model). // GSD_WORKER_MODEL is injected by the coordinator when parallel.worker_model @@ -778,6 +802,39 @@ export async function bootstrapAutoSession( : "Will loop until milestone complete."; ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info"); + // Show dynamic routing status so users know upfront if models will be + // downgraded for simple tasks (#3962). + // Use the same effective logic as selectAndApplyModel: check flat-rate + // provider suppression and resolve the actual ceiling model. + const routingConfig = resolveDynamicRoutingConfig(); + const startModelLabel = s.autoModeStartModel + ? `${s.autoModeStartModel.provider}/${s.autoModeStartModel.id}` + : ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "default"; + + // Flat-rate providers (e.g. GitHub Copilot, claude-code) suppress routing + // at dispatch time (#3453) — reflect that in the banner. + const { isFlatRateProvider } = await import("./auto-model-selection.js"); + const effectiveProvider = s.autoModeStartModel?.provider ?? ctx.model?.provider; + const effectivelyEnabled = routingConfig.enabled + && !(effectiveProvider && isFlatRateProvider(effectiveProvider)); + + // The actual ceiling may come from tier_models.heavy, not the start model. + const effectiveCeiling = (routingConfig.enabled && routingConfig.tier_models?.heavy) + ? routingConfig.tier_models.heavy + : startModelLabel; + + if (effectivelyEnabled) { + ctx.ui.notify( + `Dynamic routing: enabled — simple tasks may use cheaper models (ceiling: ${effectiveCeiling})`, + "info", + ); + } else { + ctx.ui.notify( + `Dynamic routing: disabled — all tasks will use ${startModelLabel}`, + "info", + ); + } + updateSessionLock( lockBase(), "starting", diff --git a/src/resources/extensions/gsd/auto-tool-tracking.ts b/src/resources/extensions/gsd/auto-tool-tracking.ts index 9e7ffc049..cab495813 100644 --- a/src/resources/extensions/gsd/auto-tool-tracking.ts +++ b/src/resources/extensions/gsd/auto-tool-tracking.ts @@ -92,7 +92,7 @@ export function clearInFlightTools(): void { * handler. When these errors occur, retrying the same unit will produce the same * failure, so the retry loop must be broken. */ -const TOOL_INVOCATION_ERROR_RE = /Validation failed for tool|Expected ',' or '\}' in JSON|Unexpected end of JSON|Unexpected token.*in JSON/i; +const TOOL_INVOCATION_ERROR_RE = /Validation failed for tool|Expected ',' or '\}'(?: after property value)?(?: in JSON)?|Unexpected end of JSON|Unexpected token.*in JSON/i; /** * Returns true if the error message indicates a tool invocation failure due to diff --git a/src/resources/extensions/gsd/auto-worktree.ts b/src/resources/extensions/gsd/auto-worktree.ts index a0e14c663..2f6ad4036 100644 --- a/src/resources/extensions/gsd/auto-worktree.ts +++ b/src/resources/extensions/gsd/auto-worktree.ts @@ -2043,7 +2043,7 @@ export function mergeMilestoneToMain( // 12. Remove worktree directory first (must happen before branch deletion) try { removeWorktree(originalBasePath_, milestoneId, { - branch: null as unknown as string, + branch: milestoneBranch, deleteBranch: false, }); } catch (err) { diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 79b7fdc37..47e29c0bc 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -115,6 +115,7 @@ import { resetSkillTelemetry, } from "./skill-telemetry.js"; import { getRtkSessionSavings } from "../shared/rtk-session-stats.js"; +import { deactivateGSD } from "../shared/gsd-phase-state.js"; import { initMetrics, resetMetrics, @@ -126,6 +127,7 @@ import { import { setLogBasePath, logWarning, logError } from "./workflow-logger.js"; import { homedir } from "node:os"; import { join } from "node:path"; +import { pathToFileURL } from "node:url"; import { readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs"; import { atomicWriteSync } from "./atomic-write.js"; import { @@ -164,6 +166,7 @@ import { reconcileMergeState, } from "./auto-recovery.js"; import { resolveDispatch, DISPATCH_RULES } from "./auto-dispatch.js"; +import { getErrorMessage } from "./error-utils.js"; import { initRegistry, convertDispatchRules } from "./rule-registry.js"; import { emitJournalEvent as _emitJournalEvent, type JournalEntry } from "./journal.js"; import { @@ -271,6 +274,53 @@ function restoreProjectRootEnv(): void { s.projectRootEnvCaptured = false; } +function captureMilestoneLockEnv(milestoneId: string | null): void { + if (!s.milestoneLockEnvCaptured) { + s.hadMilestoneLockEnv = Object.prototype.hasOwnProperty.call(process.env, "GSD_MILESTONE_LOCK"); + s.previousMilestoneLockEnv = process.env.GSD_MILESTONE_LOCK ?? null; + s.milestoneLockEnvCaptured = true; + } + + if (milestoneId) { + process.env.GSD_MILESTONE_LOCK = milestoneId; + } else { + delete process.env.GSD_MILESTONE_LOCK; + } +} + +function restoreMilestoneLockEnv(): void { + if (!s.milestoneLockEnvCaptured) return; + + if (s.hadMilestoneLockEnv && s.previousMilestoneLockEnv !== null) { + process.env.GSD_MILESTONE_LOCK = s.previousMilestoneLockEnv; + } else { + delete process.env.GSD_MILESTONE_LOCK; + } + + s.previousMilestoneLockEnv = null; + s.hadMilestoneLockEnv = false; + s.milestoneLockEnvCaptured = false; +} + +export function startAutoDetached( + ctx: ExtensionCommandContext, + pi: ExtensionAPI, + base: string, + verboseMode: boolean, + options?: { + step?: boolean; + interrupted?: InterruptedSessionAssessment; + milestoneLock?: string | null; + }, +): void { + void startAuto(ctx, pi, base, verboseMode, options).catch((err) => { + const message = getErrorMessage(err); + ctx.ui.notify(`Auto-start failed: ${message}`, "error"); + logWarning("engine", `auto start error: ${message}`, { file: "auto.ts" }); + debugLog("auto-start-failed", { error: message }); + }); +} + export function shouldUseWorktreeIsolation(): boolean { const prefs = loadEffectiveGSDPreferences()?.preferences?.git; if (prefs?.isolation === "worktree") return true; @@ -548,11 +598,13 @@ function buildSnapshotOpts( _unitType: string, _unitId: string, ): { + autoSessionKey?: string; continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number; } & Record { return { + ...(s.autoStartTime > 0 ? { autoSessionKey: String(s.autoStartTime) } : {}), promptCharCount: s.lastPromptCharCount, baselineCharCount: s.lastBaselineCharCount, ...(s.currentUnitRouting ?? {}), @@ -571,8 +623,10 @@ function handleLostSessionLock( }); s.active = false; s.paused = false; + deactivateGSD(); clearUnitTimeout(); restoreProjectRootEnv(); + restoreMilestoneLockEnv(); deregisterSigtermHandler(); clearCmuxSidebar(loadEffectiveGSDPreferences()?.preferences); const base = lockBase(); @@ -607,8 +661,10 @@ function handleLostSessionLock( function cleanupAfterLoopExit(ctx: ExtensionContext): void { s.currentUnit = null; s.active = false; + deactivateGSD(); clearUnitTimeout(); restoreProjectRootEnv(); + restoreMilestoneLockEnv(); // Clear crash lock and release session lock so the next `/gsd next` does // not see a stale lock with the current PID and treat it as a "remote" @@ -621,9 +677,13 @@ function cleanupAfterLoopExit(ctx: ExtensionContext): void { logWarning("session", `lock cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" }); } - ctx.ui.setStatus("gsd-auto", undefined); - ctx.ui.setWidget("gsd-progress", undefined); - ctx.ui.setFooter(undefined); + // A transient provider-error pause intentionally leaves the paused badge + // visible so the user still has a resumable auto-mode signal on screen. + if (!s.paused) { + ctx.ui.setStatus("gsd-auto", undefined); + ctx.ui.setWidget("gsd-progress", undefined); + ctx.ui.setFooter(undefined); + } // Restore CWD out of worktree back to original project root if (s.originalBasePath) { @@ -735,7 +795,22 @@ export async function stopAuto( debugLog("stop-cleanup-worktree", { error: e instanceof Error ? e.message : String(e) }); } - // ── Step 5: DB cleanup ── + // ── Step 5: Rebuild state while DB is still open (#3599) ── + // rebuildState() calls deriveState() which needs the DB for authoritative + // state. Previously this ran after closeDatabase(), forcing a filesystem + // fallback that could disagree with the DB-backed dispatch decisions — + // a split-brain where dispatch says "blocked" but STATE.md shows work. + if (s.basePath) { + try { + await rebuildState(s.basePath); + } catch (e) { + debugLog("stop-rebuild-state-failed", { + error: e instanceof Error ? e.message : String(e), + }); + } + } + + // ── Step 6: DB cleanup ── if (isDbAvailable()) { try { const { closeDatabase } = await import("./gsd-db.js"); @@ -747,7 +822,7 @@ export async function stopAuto( } } - // ── Step 6: Restore basePath and chdir ── + // ── Step 7: Restore basePath and chdir ── try { if (s.originalBasePath) { s.basePath = s.originalBasePath; @@ -762,7 +837,7 @@ export async function stopAuto( debugLog("stop-cleanup-basepath", { error: e instanceof Error ? e.message : String(e) }); } - // ── Step 7: Ledger notification ── + // ── Step 8: Ledger notification ── try { const ledger = getLedger(); if (ledger && ledger.units.length > 0) { @@ -778,17 +853,6 @@ export async function stopAuto( debugLog("stop-cleanup-ledger", { error: e instanceof Error ? e.message : String(e) }); } - // ── Step 8: Rebuild state ── - if (s.basePath) { - try { - await rebuildState(s.basePath); - } catch (e) { - debugLog("stop-rebuild-state-failed", { - error: e instanceof Error ? e.message : String(e), - }); - } - } - // ── Step 9: Cmux sidebar / event log ── try { clearCmuxSidebar(loadedPreferences); @@ -879,6 +943,7 @@ export async function stopAuto( ctx?.ui.setWidget("gsd-progress", undefined); ctx?.ui.setFooter(undefined); restoreProjectRootEnv(); + restoreMilestoneLockEnv(); // Reset all session state in one call s.reset(); @@ -932,6 +997,7 @@ export async function pauseAuto( activeEngineId: s.activeEngineId, activeRunDir: s.activeRunDir, autoStartTime: s.autoStartTime, + milestoneLock: s.sessionMilestoneLock ?? undefined, }; const runtimeDir = join(gsdRoot(s.originalBasePath || s.basePath), "runtime"); mkdirSync(runtimeDir, { recursive: true }); @@ -969,7 +1035,9 @@ export async function pauseAuto( s.active = false; s.paused = true; + deactivateGSD(); restoreProjectRootEnv(); + restoreMilestoneLockEnv(); s.pendingVerificationRetry = null; s.verificationRetryCount.clear(); ctx?.ui.setStatus("gsd-auto", "paused"); @@ -1153,6 +1221,7 @@ export async function startAuto( options?: { step?: boolean; interrupted?: InterruptedSessionAssessment; + milestoneLock?: string | null; }, ): Promise { if (s.active) { @@ -1162,6 +1231,12 @@ export async function startAuto( const requestedStepMode = options?.step ?? false; const interruptedAssessment = options?.interrupted ?? null; + if (options?.milestoneLock !== undefined) { + s.sessionMilestoneLock = options.milestoneLock ?? null; + } + if (s.sessionMilestoneLock) { + captureMilestoneLockEnv(s.sessionMilestoneLock); + } // Escape stale worktree cwd from a previous milestone (#608). base = escapeStaleWorktree(base); @@ -1193,6 +1268,7 @@ export async function startAuto( s.originalBasePath = meta.originalBasePath || base; s.stepMode = meta.stepMode ?? requestedStepMode; s.autoStartTime = meta.autoStartTime || Date.now(); + s.sessionMilestoneLock = meta.milestoneLock ?? null; s.paused = true; try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); } ctx.ui.notify( @@ -1227,6 +1303,7 @@ export async function startAuto( s.pausedUnitType = meta.unitType ?? null; s.pausedUnitId = meta.unitId ?? null; s.autoStartTime = meta.autoStartTime || Date.now(); + s.sessionMilestoneLock = meta.milestoneLock ?? null; s.paused = true; try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); } ctx.ui.notify( @@ -1246,6 +1323,10 @@ export async function startAuto( if (!s.autoStartTime || s.autoStartTime <= 0) s.autoStartTime = Date.now(); } + if (s.sessionMilestoneLock) { + captureMilestoneLockEnv(s.sessionMilestoneLock); + } + if (!s.paused) { s.stepMode = requestedStepMode; } @@ -1333,8 +1414,17 @@ export async function startAuto( restoreHookState(s.basePath); // Re-sync managed resources on resume so long-lived auto sessions pick up // bundled extension updates before resume-time verification/state logic runs. + // GSD_PKG_ROOT is set by loader.ts and points to the gsd-pi package root. + // The relative import ("../../../resource-loader.js") only works from the source + // tree; deployed extensions live at ~/.gsd/agent/extensions/gsd/ where the + // relative path resolves to ~/.gsd/agent/resource-loader.js which doesn't exist. + // Using GSD_PKG_ROOT constructs a correct absolute path in both contexts (#3949). const agentDir = process.env.GSD_CODING_AGENT_DIR || join(process.env.GSD_HOME || homedir(), ".gsd", "agent"); - const { initResources } = await import("../../../" + "resource-loader.js"); + const pkgRoot = process.env.GSD_PKG_ROOT; + const resourceLoaderPath = pkgRoot + ? pathToFileURL(join(pkgRoot, "dist", "resource-loader.js")).href + : new URL("../../../resource-loader.js", import.meta.url).href; + const { initResources } = await import(resourceLoaderPath); initResources(agentDir); // Open the project DB before rebuild/derive so resume uses DB-backed // state instead of falling back to stale markdown parsing (#2940). @@ -1631,9 +1721,6 @@ export async function dispatchHookUnit( return true; } -// Direct phase dispatch → auto-direct-dispatch.ts -export { dispatchDirectPhase } from "./auto-direct-dispatch.js"; - // Re-export recovery functions for external consumers export { buildLoopRemediationSteps, diff --git a/src/resources/extensions/gsd/auto/infra-errors.ts b/src/resources/extensions/gsd/auto/infra-errors.ts index 17c1a553d..d0132724c 100644 --- a/src/resources/extensions/gsd/auto/infra-errors.ts +++ b/src/resources/extensions/gsd/auto/infra-errors.ts @@ -46,3 +46,41 @@ export function isInfrastructureError(err: unknown): string | null { if (msg.includes("database disk image is malformed")) return "SQLITE_CORRUPT"; return null; } + +/** + * Default wait duration when a cooldown error is detected but no specific + * expiry is available from AuthStorage (e.g., error propagated across + * process boundary without structured backoff data). + */ +export const COOLDOWN_FALLBACK_WAIT_MS = 35_000; // 35s — slightly longer than the 30s rate-limit backoff + +/** Maximum consecutive cooldown retries before the auto-loop gives up. */ +export const MAX_COOLDOWN_RETRIES = 5; + +/** + * Detect whether an error is a transient credential cooldown that should + * be waited out rather than counted as a consecutive failure. + * + * Prefers the structured `CredentialCooldownError` (code: AUTH_COOLDOWN) + * thrown by sdk.ts. Falls back to message matching for errors that + * propagated across process boundaries without the typed class. + */ +export function isTransientCooldownError(err: unknown): boolean { + if (err && typeof err === "object" && (err as Record).code === "AUTH_COOLDOWN") { + return true; + } + // Fallback: message match for cross-process error propagation + const msg = err instanceof Error ? err.message : String(err); + return /in a cooldown window/i.test(msg); +} + +/** + * Extract retryAfterMs from a CredentialCooldownError, if available. + * Returns undefined for unstructured errors or when no retry hint exists. + */ +export function getCooldownRetryAfterMs(err: unknown): number | undefined { + if (err && typeof err === "object" && (err as Record).code === "AUTH_COOLDOWN") { + return (err as Record).retryAfterMs as number | undefined; + } + return undefined; +} diff --git a/src/resources/extensions/gsd/auto/loop-deps.ts b/src/resources/extensions/gsd/auto/loop-deps.ts index ff63d8a3e..c68a68c3a 100644 --- a/src/resources/extensions/gsd/auto/loop-deps.ts +++ b/src/resources/extensions/gsd/auto/loop-deps.ts @@ -211,6 +211,8 @@ export interface LoopDeps { verbose: boolean, startModel: { provider: string; id: string } | null, retryContext?: { isRetry: boolean; previousTier?: string }, + isAutoMode?: boolean, + sessionModelOverride?: { provider: string; id: string } | null, ) => Promise<{ routing: { tier: string; modelDowngraded: boolean } | null; appliedModel: { provider: string; id: string } | null; diff --git a/src/resources/extensions/gsd/auto/loop.ts b/src/resources/extensions/gsd/auto/loop.ts index 3a0c8de10..eff106f33 100644 --- a/src/resources/extensions/gsd/auto/loop.ts +++ b/src/resources/extensions/gsd/auto/loop.ts @@ -27,8 +27,71 @@ import { runFinalize, } from "./phases.js"; import { debugLog } from "../debug-logger.js"; -import { isInfrastructureError } from "./infra-errors.js"; +import { isInfrastructureError, isTransientCooldownError, getCooldownRetryAfterMs, COOLDOWN_FALLBACK_WAIT_MS, MAX_COOLDOWN_RETRIES } from "./infra-errors.js"; import { resolveEngine } from "../engine-resolver.js"; +import { logWarning } from "../workflow-logger.js"; +import { gsdRoot } from "../paths.js"; +import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; +import { join } from "node:path"; + +// ── Stuck detection persistence (#3704) ────────────────────────────────── +// Persist stuck detection state to disk so it survives session restarts. +// Without this, restarting auto-mode resets all counters, allowing the +// same blocked unit to burn a full retry budget each session. +function stuckStatePath(basePath: string): string { + return join(gsdRoot(basePath), "runtime", "stuck-state.json"); +} + +function loadStuckState(basePath: string): { recentUnits: Array<{ key: string }>; stuckRecoveryAttempts: number } { + try { + const data = JSON.parse(readFileSync(stuckStatePath(basePath), "utf-8")); + return { + recentUnits: Array.isArray(data.recentUnits) ? data.recentUnits : [], + stuckRecoveryAttempts: typeof data.stuckRecoveryAttempts === "number" ? data.stuckRecoveryAttempts : 0, + }; + } catch (err) { + debugLog("autoLoop", { phase: "load-stuck-state-failed", error: err instanceof Error ? err.message : String(err) }); + return { recentUnits: [], stuckRecoveryAttempts: 0 }; + } +} + +function saveStuckState(basePath: string, state: LoopState): void { + try { + const filePath = stuckStatePath(basePath); + mkdirSync(join(gsdRoot(basePath), "runtime"), { recursive: true }); + writeFileSync(filePath, JSON.stringify({ + recentUnits: state.recentUnits.slice(-20), // keep last 20 entries + stuckRecoveryAttempts: state.stuckRecoveryAttempts, + updatedAt: new Date().toISOString(), + }) + "\n"); + } catch (err) { + debugLog("autoLoop", { phase: "save-stuck-state-failed", error: err instanceof Error ? err.message : String(err) }); + } +} + +// ── Memory pressure monitoring (#3331) ────────────────────────────────── +// Check heap usage every N iterations and trigger graceful shutdown before +// the OS OOM killer sends SIGKILL. The threshold is 90% of the V8 heap +// limit (--max-old-space-size or default ~1.5-4GB depending on platform). +const MEMORY_CHECK_INTERVAL = 5; // check every 5 iterations +const MEMORY_PRESSURE_THRESHOLD = 0.85; // 85% of heap limit + +function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: number; pct: number } { + const mem = process.memoryUsage(); + // v8.getHeapStatistics() gives heap_size_limit but requires import + // Use a conservative estimate: RSS > 3GB is danger zone on most systems + const heapMB = Math.round(mem.heapUsed / 1024 / 1024); + const rssMB = Math.round(mem.rss / 1024 / 1024); + // Try to get the actual V8 heap limit + let limitMB = 4096; // conservative default + try { + const v8 = require("node:v8"); + const stats = v8.getHeapStatistics(); + limitMB = Math.round(stats.heap_size_limit / 1024 / 1024); + } catch { limitMB = 4096; /* v8 stats unavailable — use conservative default */ } + const pct = heapMB / limitMB; + return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct }; +} /** * Main auto-mode execution loop. Iterates: derive → dispatch → guards → @@ -46,8 +109,15 @@ export async function autoLoop( ): Promise { debugLog("autoLoop", { phase: "enter" }); let iteration = 0; - const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 }; + // Load persisted stuck state so counters survive session restarts (#3704) + const persisted = loadStuckState(s.basePath); + const loopState: LoopState = { + recentUnits: persisted.recentUnits, + stuckRecoveryAttempts: persisted.stuckRecoveryAttempts, + consecutiveFinalizeTimeouts: 0, + }; let consecutiveErrors = 0; + let consecutiveCooldowns = 0; const recentErrorMessages: string[] = []; while (s.active) { @@ -73,6 +143,24 @@ export async function autoLoop( break; } + // ── Memory pressure check (#3331) ── + // Graceful shutdown before OOM killer sends SIGKILL. + if (iteration % MEMORY_CHECK_INTERVAL === 0) { + const mem = checkMemoryPressure(); + debugLog("autoLoop", { phase: "memory-check", ...mem }); + if (mem.pressured) { + logWarning("dispatch", `Memory pressure: ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%) — stopping auto-mode to prevent OOM kill`); + await deps.stopAuto( + ctx, + pi, + `Memory pressure: heap at ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%). ` + + `Stopping gracefully to prevent OOM kill after ${iteration} iterations. ` + + `Resume with /gsd auto to continue from where you left off.`, + ); + break; + } + } + if (!s.cmdCtx) { debugLog("autoLoop", { phase: "exit", reason: "no-cmdCtx" }); break; @@ -203,8 +291,10 @@ export async function autoLoop( deps.clearUnitTimeout(); consecutiveErrors = 0; + consecutiveCooldowns = 0; recentErrorMessages.length = 0; deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } }); + saveStuckState(s.basePath, loopState); // persist across session restarts (#3704) debugLog("autoLoop", { phase: "iteration-complete", iteration }); if (reconcileResult.outcome === "milestone-complete") { @@ -265,6 +355,7 @@ export async function autoLoop( if (finalizeResult.action === "continue") continue; consecutiveErrors = 0; // Iteration completed successfully + consecutiveCooldowns = 0; recentErrorMessages.length = 0; deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } }); debugLog("autoLoop", { phase: "iteration-complete", iteration }); @@ -300,6 +391,47 @@ export async function autoLoop( break; } + // ── Credential cooldown: wait and retry with bounded budget ── + // A 429 triggers a 30s credential backoff in AuthStorage. If the SDK's + // getApiKey() retries couldn't outlast the window, the error surfaces + // here. Wait for the cooldown to clear rather than counting it as a + // consecutive failure — but cap retries so we don't spin for hours + // on persistent quota exhaustion. + if (isTransientCooldownError(loopErr)) { + consecutiveCooldowns++; + const retryAfterMs = getCooldownRetryAfterMs(loopErr); + debugLog("autoLoop", { + phase: "cooldown-wait", + iteration, + consecutiveCooldowns, + retryAfterMs, + error: msg, + }); + + if (consecutiveCooldowns > MAX_COOLDOWN_RETRIES) { + ctx.ui.notify( + `Auto-mode stopped: ${consecutiveCooldowns} consecutive credential cooldowns — rate limit or quota may be persistently exhausted.`, + "error", + ); + await deps.stopAuto( + ctx, + pi, + `${consecutiveCooldowns} consecutive credential cooldowns exceeded retry budget`, + ); + break; + } + + const waitMs = (retryAfterMs !== undefined && retryAfterMs > 0 && retryAfterMs <= 60_000) + ? retryAfterMs + 500 // Use structured hint + small buffer + : COOLDOWN_FALLBACK_WAIT_MS; + ctx.ui.notify( + `Credentials in cooldown (${consecutiveCooldowns}/${MAX_COOLDOWN_RETRIES}) — waiting ${Math.round(waitMs / 1000)}s before retrying.`, + "warning", + ); + await new Promise(resolve => setTimeout(resolve, waitMs)); + continue; // Retry iteration without incrementing consecutiveErrors + } + consecutiveErrors++; recentErrorMessages.push(msg.length > 120 ? msg.slice(0, 120) + "..." : msg); debugLog("autoLoop", { diff --git a/src/resources/extensions/gsd/auto/phases.ts b/src/resources/extensions/gsd/auto/phases.ts index a3591e6ca..8151b4f3e 100644 --- a/src/resources/extensions/gsd/auto/phases.ts +++ b/src/resources/extensions/gsd/auto/phases.ts @@ -27,6 +27,7 @@ import { runUnit } from "./run-unit.js"; import { debugLog } from "../debug-logger.js"; import { PROJECT_FILES } from "../detection.js"; import { MergeConflictError } from "../git-service.js"; +import { setCurrentPhase, clearCurrentPhase } from "../../shared/gsd-phase-state.js"; import { join, basename, dirname, parse as parsePath } from "node:path"; import { existsSync, cpSync, readdirSync } from "node:fs"; import { logWarning, logError } from "../workflow-logger.js"; @@ -1068,6 +1069,7 @@ export async function runUnitPhase( const previousTier = s.currentUnitRouting?.tier; s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() }; + setCurrentPhase(unitType); s.lastToolInvocationError = null; // #2883: clear stale error from previous unit const unitStartSeq = ic.nextSeq(); deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: unitStartSeq, eventType: "unit-start", data: { unitType, unitId } }); @@ -1183,6 +1185,8 @@ export async function runUnitPhase( s.verbose, s.autoModeStartModel, sidecarItem ? undefined : { isRetry, previousTier }, + undefined, + s.manualSessionModelOverride, ); s.currentUnitRouting = modelResult.routing as AutoSession["currentUnitRouting"]; @@ -1527,6 +1531,7 @@ export async function runFinalize( // Detach session from the timed-out unit so late async completions // cannot mutate state for the next unit (#3757). s.currentUnit = null; + clearCurrentPhase(); loopState.consecutiveFinalizeTimeouts++; debugLog("autoLoop", { phase: "pre-verification-timeout", @@ -1624,6 +1629,7 @@ export async function runFinalize( // Detach session from the timed-out unit so late async completions // cannot mutate state for the next unit (#3757). s.currentUnit = null; + clearCurrentPhase(); loopState.consecutiveFinalizeTimeouts++; debugLog("autoLoop", { phase: "post-verification-timeout", diff --git a/src/resources/extensions/gsd/auto/session.ts b/src/resources/extensions/gsd/auto/session.ts index 4f8fc82e0..426713411 100644 --- a/src/resources/extensions/gsd/auto/session.ts +++ b/src/resources/extensions/gsd/auto/session.ts @@ -87,6 +87,10 @@ export class AutoSession { previousProjectRootEnv: string | null = null; hadProjectRootEnv = false; projectRootEnvCaptured = false; + previousMilestoneLockEnv: string | null = null; + hadMilestoneLockEnv = false; + milestoneLockEnvCaptured = false; + sessionMilestoneLock: string | null = null; gitService: GitServiceImpl | null = null; // ── Dispatch counters ──────────────────────────────────────────────────── @@ -107,6 +111,8 @@ export class AutoSession { // ── Model state ────────────────────────────────────────────────────────── autoModeStartModel: StartModel | null = null; + /** Explicit /gsd model pin captured at bootstrap (session-scoped policy override). */ + manualSessionModelOverride: StartModel | null = null; currentUnitModel: Model | null = null; /** Fully-qualified model ID (provider/id) set after selectAndApplyModel + hook overrides (#2899). */ currentDispatchedModelId: string | null = null; @@ -200,6 +206,10 @@ export class AutoSession { this.previousProjectRootEnv = null; this.hadProjectRootEnv = false; this.projectRootEnvCaptured = false; + this.previousMilestoneLockEnv = null; + this.hadMilestoneLockEnv = false; + this.milestoneLockEnvCaptured = false; + this.sessionMilestoneLock = null; this.gitService = null; // Dispatch @@ -214,6 +224,7 @@ export class AutoSession { // Model this.autoModeStartModel = null; + this.manualSessionModelOverride = null; this.currentUnitModel = null; this.currentDispatchedModelId = null; this.originalModelId = null; diff --git a/src/resources/extensions/gsd/bootstrap/db-tools.ts b/src/resources/extensions/gsd/bootstrap/db-tools.ts index 71d5ae9aa..dbb5849c9 100644 --- a/src/resources/extensions/gsd/bootstrap/db-tools.ts +++ b/src/resources/extensions/gsd/bootstrap/db-tools.ts @@ -1026,12 +1026,12 @@ export function registerDbTools(pi: ExtensionAPI): void { name: "gsd_save_gate_result", label: "Save Gate Result", description: - "Save the result of a quality gate evaluation (Q3-Q8) to the GSD database. " + + "Save the result of a quality gate evaluation (Q3-Q8 or MV01-MV04) to the GSD database. " + "Called by gate evaluation sub-agents after analyzing a specific quality question.", promptSnippet: "Save quality gate evaluation result (verdict, rationale, findings)", promptGuidelines: [ "Use gsd_save_gate_result after evaluating a quality gate question.", - "gateId must be one of: Q3, Q4, Q5, Q6, Q7, Q8.", + "gateId must be one of: Q3, Q4, Q5, Q6, Q7, Q8, MV01, MV02, MV03, MV04.", "verdict must be: pass (no concerns), flag (concerns found), or omitted (not applicable).", "rationale should be a one-sentence justification for the verdict.", "findings should contain detailed markdown analysis (or empty string if omitted).", @@ -1039,7 +1039,7 @@ export function registerDbTools(pi: ExtensionAPI): void { parameters: Type.Object({ milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }), sliceId: Type.String({ description: "Slice ID (e.g. S01)" }), - gateId: Type.String({ description: "Gate ID: Q3, Q4, Q5, Q6, Q7, or Q8" }), + gateId: Type.String({ description: "Gate ID: Q3, Q4, Q5, Q6, Q7, Q8, MV01, MV02, MV03, or MV04" }), taskId: Type.Optional(Type.String({ description: "Task ID for task-scoped gates (Q5/Q6/Q7)" })), verdict: Type.String({ description: "pass, flag, or omitted" }), rationale: Type.String({ description: "One-sentence justification" }), diff --git a/src/resources/extensions/gsd/bootstrap/register-hooks.ts b/src/resources/extensions/gsd/bootstrap/register-hooks.ts index 4bb105f71..ff6aefa83 100644 --- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts +++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts @@ -45,6 +45,8 @@ export function registerHooks(pi: ExtensionAPI): void { resetToolCallLoopGuard(); resetAskUserQuestionsCache(); await syncServiceTierStatus(ctx); + const { prepareWorkflowMcpForProject } = await import("../workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, process.cwd()); // Apply show_token_cost preference (#1515) try { @@ -85,6 +87,8 @@ export function registerHooks(pi: ExtensionAPI): void { resetAskUserQuestionsCache(); clearDiscussionFlowState(); await syncServiceTierStatus(ctx); + const { prepareWorkflowMcpForProject } = await import("../workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, process.cwd()); loadToolApiKeys(); }); @@ -117,6 +121,8 @@ export function registerHooks(pi: ExtensionAPI): void { return { cancel: true }; } const basePath = process.cwd(); + const { ensureDbOpen } = await import("./dynamic-tools.js"); + await ensureDbOpen(); const state = await deriveState(basePath); if (!state.activeMilestone || !state.activeSlice || !state.activeTask) return; if (state.phase !== "executing") return; @@ -175,14 +181,10 @@ export function registerHooks(pi: ExtensionAPI): void { // Only gate-shaped ask_user_questions calls should block execution. // The gate stays pending until the user selects the approval option. if (event.toolName === "ask_user_questions") { - const milestoneId = getDiscussionMilestoneId(discussionBasePath); - const inDiscussion = milestoneId !== null || isQueuePhaseActive(); - if (inDiscussion) { - const questions: any[] = (event.input as any)?.questions ?? []; - const questionId = questions.find((question) => typeof question?.id === "string" && isGateQuestionId(question.id))?.id; - if (typeof questionId === "string") { - setPendingGate(questionId); - } + const questions: any[] = (event.input as any)?.questions ?? []; + const questionId = questions.find((question) => typeof question?.id === "string" && isGateQuestionId(question.id))?.id; + if (typeof questionId === "string") { + setPendingGate(questionId); } } @@ -280,7 +282,6 @@ export function registerHooks(pi: ExtensionAPI): void { if (event.toolName !== "ask_user_questions") return; const milestoneId = getDiscussionMilestoneId(process.cwd()); const queueActive = isQueuePhaseActive(); - if (!milestoneId && !queueActive) return; const details = event.details as any; @@ -313,13 +314,16 @@ export function registerHooks(pi: ExtensionAPI): void { // Only unlock the gate if the user selected the first option (confirmation). // Cross-references against the question's defined options to reject free-form "Other" text. const answer = details.response?.answers?.[question.id]; + const inferredMilestoneId = extractDepthVerificationMilestoneId(question.id) ?? milestoneId; if (isDepthConfirmationAnswer(answer?.selected, question.options)) { - markDepthVerified(extractDepthVerificationMilestoneId(question.id) ?? milestoneId); + markDepthVerified(inferredMilestoneId); + clearPendingGate(); } break; } } + if (!milestoneId && !queueActive) return; if (!milestoneId) return; const basePath = process.cwd(); @@ -446,4 +450,12 @@ export function registerHooks(pi: ExtensionAPI): void { // Default: no override — let capability scoring handle selection return undefined; }); + + // Tool set adaptation hook (ADR-005 Phase 4) + // Extensions can override tool set after model selection by returning { toolNames: [...] } + // Return undefined to let the built-in provider compatibility filtering proceed. + pi.on("adjust_tool_set", async (_event) => { + // Default: no override — let provider capability filtering handle tool set + return undefined; + }); } diff --git a/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts b/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts index e3c947aff..eb8dc79b8 100644 --- a/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts +++ b/src/resources/extensions/gsd/bootstrap/register-shortcuts.ts @@ -1,79 +1,98 @@ import { existsSync } from "node:fs"; import { join } from "node:path"; -import type { ExtensionAPI } from "@gsd/pi-coding-agent"; +import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent"; import { Key } from "@gsd/pi-tui"; import { GSDDashboardOverlay } from "../dashboard-overlay.js"; import { GSDNotificationOverlay } from "../notification-overlay.js"; import { ParallelMonitorOverlay } from "../parallel-monitor-overlay.js"; +import { GSD_SHORTCUTS } from "../shortcut-defs.js"; import { projectRoot } from "../commands/context.js"; import { shortcutDesc } from "../../shared/mod.js"; export function registerShortcuts(pi: ExtensionAPI): void { - pi.registerShortcut(Key.ctrlAlt("g"), { - description: shortcutDesc("Open GSD dashboard", "/gsd status"), - handler: async (ctx) => { - const basePath = projectRoot(); - if (!existsSync(join(basePath, ".gsd"))) { - ctx.ui.notify("No .gsd/ directory found. Run /gsd to start.", "info"); - return; - } - await ctx.ui.custom( - (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done(true)), - { - overlay: true, - overlayOptions: { - width: "90%", - minWidth: 80, - maxHeight: "92%", - anchor: "center", - }, + const overlayOptions = { + width: "90%", + minWidth: 80, + maxHeight: "92%", + anchor: "center", + } as const; + + const openDashboardOverlay = async (ctx: ExtensionContext) => { + const basePath = projectRoot(); + if (!existsSync(join(basePath, ".gsd"))) { + ctx.ui.notify("No .gsd/ directory found. Run /gsd to start.", "info"); + return; + } + await ctx.ui.custom( + (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done(true)), + { + overlay: true, + overlayOptions, + }, + ); + }; + + const openNotificationsOverlay = async (ctx: ExtensionContext) => { + await ctx.ui.custom( + (tui, theme, _kb, done) => new GSDNotificationOverlay(tui, theme, () => done(true)), + { + overlay: true, + overlayOptions: { + width: "80%", + minWidth: 60, + maxHeight: "88%", + anchor: "center", + backdrop: true, }, - ); - }, + }, + ); + }; + + const openParallelOverlay = async (ctx: ExtensionContext) => { + const basePath = projectRoot(); + const parallelDir = join(basePath, ".gsd", "parallel"); + if (!existsSync(parallelDir)) { + ctx.ui.notify("No parallel workers found. Run /gsd parallel start first.", "info"); + return; + } + await ctx.ui.custom( + (tui, theme, _kb, done) => new ParallelMonitorOverlay(tui, theme, () => done(true), basePath), + { + overlay: true, + overlayOptions, + }, + ); + }; + + pi.registerShortcut(Key.ctrlAlt(GSD_SHORTCUTS.dashboard.key), { + description: shortcutDesc(GSD_SHORTCUTS.dashboard.action, GSD_SHORTCUTS.dashboard.command), + handler: openDashboardOverlay, }); - pi.registerShortcut(Key.ctrlAlt("n"), { - description: shortcutDesc("Open notification history", "/gsd notifications"), - handler: async (ctx) => { - await ctx.ui.custom( - (tui, theme, _kb, done) => new GSDNotificationOverlay(tui, theme, () => done(true)), - { - overlay: true, - overlayOptions: { - width: "80%", - minWidth: 60, - maxHeight: "88%", - anchor: "center", - backdrop: true, - }, - }, - ); - }, + // Fallback for terminals where Ctrl+Alt letter chords are not forwarded reliably. + pi.registerShortcut(Key.ctrlShift(GSD_SHORTCUTS.dashboard.key), { + description: shortcutDesc(`${GSD_SHORTCUTS.dashboard.action} (fallback)`, GSD_SHORTCUTS.dashboard.command), + handler: openDashboardOverlay, }); - pi.registerShortcut(Key.ctrlAlt("p"), { - description: shortcutDesc("Open parallel worker monitor", "/gsd parallel watch"), - handler: async (ctx) => { - const basePath = projectRoot(); - const parallelDir = join(basePath, ".gsd", "parallel"); - if (!existsSync(parallelDir)) { - ctx.ui.notify("No parallel workers found. Run /gsd parallel start first.", "info"); - return; - } - await ctx.ui.custom( - (tui, theme, _kb, done) => new ParallelMonitorOverlay(tui, theme, () => done(true)), - { - overlay: true, - overlayOptions: { - width: "90%", - minWidth: 80, - maxHeight: "92%", - anchor: "center", - }, - }, - ); - }, + pi.registerShortcut(Key.ctrlAlt(GSD_SHORTCUTS.notifications.key), { + description: shortcutDesc(GSD_SHORTCUTS.notifications.action, GSD_SHORTCUTS.notifications.command), + handler: openNotificationsOverlay, }); + + // Fallback for terminals where Ctrl+Alt letter chords are not forwarded reliably. + pi.registerShortcut(Key.ctrlShift(GSD_SHORTCUTS.notifications.key), { + description: shortcutDesc(`${GSD_SHORTCUTS.notifications.action} (fallback)`, GSD_SHORTCUTS.notifications.command), + handler: openNotificationsOverlay, + }); + + pi.registerShortcut(Key.ctrlAlt(GSD_SHORTCUTS.parallel.key), { + description: shortcutDesc(GSD_SHORTCUTS.parallel.action, GSD_SHORTCUTS.parallel.command), + handler: openParallelOverlay, + }); + + // No Ctrl+Shift+P fallback — conflicts with cycleModelBackward (shift+ctrl+p). + // Use Ctrl+Alt+P or /gsd parallel watch instead. } diff --git a/src/resources/extensions/gsd/bootstrap/system-context.ts b/src/resources/extensions/gsd/bootstrap/system-context.ts index 8fe3890df..3a336f9ee 100644 --- a/src/resources/extensions/gsd/bootstrap/system-context.ts +++ b/src/resources/extensions/gsd/bootstrap/system-context.ts @@ -19,6 +19,7 @@ import { deriveState } from "../state.js"; import { formatOverridesSection, formatShortcut, loadActiveOverrides, loadFile, parseContinue, parseSummary } from "../files.js"; import { toPosixPath } from "../../shared/mod.js"; import { markCmuxPromptShown, shouldPromptToEnableCmux } from "../../cmux/index.js"; +import { autoEnableCmuxPreferences } from "../commands-cmux.js"; const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd"); @@ -76,13 +77,16 @@ export async function buildBeforeAgentStartResult( shortcutDashboard: formatShortcut("Ctrl+Alt+G"), shortcutShell: formatShortcut("Ctrl+Alt+B"), }); - const loadedPreferences = loadEffectiveGSDPreferences(); + let loadedPreferences = loadEffectiveGSDPreferences(); if (shouldPromptToEnableCmux(loadedPreferences?.preferences)) { markCmuxPromptShown(); - ctx.ui.notify( - "cmux detected. Run /gsd cmux on to enable sidebar metadata, notifications, and visual subagent splits for this project.", - "info", - ); + if (autoEnableCmuxPreferences()) { + loadedPreferences = loadEffectiveGSDPreferences(); + ctx.ui.notify( + "cmux detected — auto-enabled. Run /gsd cmux off to disable.", + "info", + ); + } } let preferenceBlock = ""; @@ -289,6 +293,11 @@ function buildWorktreeContextBlock(): string { const RESUME_INTENT_PATTERNS = /^(continue|resume|ok|go|go ahead|proceed|keep going|carry on|next|yes|yeah|yep|sure|do it|let's go|pick up where you left off)$/; async function buildGuidedExecuteContextInjection(prompt: string, basePath: string): Promise { + const ensureStateDbOpen = async () => { + const { ensureDbOpen } = await import("./dynamic-tools.js"); + await ensureDbOpen(); + }; + const executeMatch = prompt.match(/Execute the next task:\s+(T\d+)\s+\("([^"]+)"\)\s+in slice\s+(S\d+)\s+of milestone\s+(M\d+(?:-[a-z0-9]{6})?)/i); if (executeMatch) { const [, taskId, taskTitle, sliceId, milestoneId] = executeMatch; @@ -298,6 +307,7 @@ async function buildGuidedExecuteContextInjection(prompt: string, basePath: stri const resumeMatch = prompt.match(/Resume interrupted work\.[\s\S]*?slice\s+(S\d+)\s+of milestone\s+(M\d+(?:-[a-z0-9]{6})?)/i); if (resumeMatch) { const [, sliceId, milestoneId] = resumeMatch; + await ensureStateDbOpen(); const state = await deriveState(basePath); if (state.activeMilestone?.id === milestoneId && state.activeSlice?.id === sliceId && state.activeTask) { return buildTaskExecutionContextInjection(basePath, milestoneId, sliceId, state.activeTask.id, state.activeTask.title); @@ -313,6 +323,7 @@ async function buildGuidedExecuteContextInjection(prompt: string, basePath: stri // replanning, gate evaluation, or other non-execution phases. const trimmed = prompt.trim().toLowerCase().replace(/[.!?,]+$/g, ""); if (RESUME_INTENT_PATTERNS.test(trimmed)) { + await ensureStateDbOpen(); const state = await deriveState(basePath); if (state.phase === "executing" && state.activeTask && state.activeMilestone && state.activeSlice) { return buildTaskExecutionContextInjection( diff --git a/src/resources/extensions/gsd/bootstrap/write-gate.ts b/src/resources/extensions/gsd/bootstrap/write-gate.ts index 0215faae8..b8e6cf8e5 100644 --- a/src/resources/extensions/gsd/bootstrap/write-gate.ts +++ b/src/resources/extensions/gsd/bootstrap/write-gate.ts @@ -47,13 +47,9 @@ let pendingGateId: string | null = null; /** * Recognized gate question ID patterns. - * These appear in both discuss-prepared.md (4-layer) and discuss.md (depth/requirements/roadmap). + * These appear in discuss.md (depth/requirements/roadmap). */ const GATE_QUESTION_PATTERNS = [ - "layer1_scope_gate", - "layer2_architecture_gate", - "layer3_error_gate", - "layer4_quality_gate", "depth_verification", ] as const; diff --git a/src/resources/extensions/gsd/codebase-generator.ts b/src/resources/extensions/gsd/codebase-generator.ts index f56d84079..b291c3c1f 100644 --- a/src/resources/extensions/gsd/codebase-generator.ts +++ b/src/resources/extensions/gsd/codebase-generator.ts @@ -71,13 +71,23 @@ interface EnumeratedFiles { // ─── Defaults ──────────────────────────────────────────────────────────────── const DEFAULT_EXCLUDES = [ + // ── AI / tooling meta ── + ".agents/", ".gsd/", ".planning/", ".plans/", ".claude/", ".cursor/", + ".bg-shell/", + + // ── Editor / IDE ── ".vscode/", + ".idea/", + + // ── VCS ── ".git/", + + // ── Dependencies & build artifacts ── "node_modules/", "dist/", "build/", @@ -85,7 +95,13 @@ const DEFAULT_EXCLUDES = [ "coverage/", "__pycache__/", ".venv/", + "venv/", "vendor/", + "target/", + + // ── Misc ── + ".cache/", + "tmp/", ]; const DEFAULT_MAX_FILES = 500; diff --git a/src/resources/extensions/gsd/commands-cmux.ts b/src/resources/extensions/gsd/commands-cmux.ts index e00f2dea2..a1b8f5ee4 100644 --- a/src/resources/extensions/gsd/commands-cmux.ts +++ b/src/resources/extensions/gsd/commands-cmux.ts @@ -1,5 +1,5 @@ import type { ExtensionCommandContext } from "@gsd/pi-coding-agent"; -import { existsSync, readFileSync } from "node:fs"; +import { existsSync, readFileSync, writeFileSync } from "node:fs"; import { clearCmuxSidebar, CmuxClient, detectCmuxEnvironment, resolveCmuxConfig } from "../cmux/index.js"; import { saveFile } from "./files.js"; import { @@ -9,6 +9,37 @@ import { } from "./preferences.js"; import { ensurePreferencesFile, serializePreferencesToFrontmatter } from "./commands-prefs-wizard.js"; +/** + * Auto-enable cmux in project preferences when detected but never configured. + * Called at boot (before agent start) — no ExtensionCommandContext needed. + * Returns true if preferences were written, false if skipped. + */ +export function autoEnableCmuxPreferences(): boolean { + const path = getProjectGSDPreferencesPath(); + if (!existsSync(path)) return false; + + const existing = loadProjectGSDPreferences(); + const prefs: Record = existing?.preferences ? { ...existing.preferences } : { version: 1 }; + prefs.cmux = { + enabled: true, + notifications: true, + sidebar: true, + splits: false, + browser: false, + ...((prefs.cmux as Record | undefined) ?? {}), + }; + (prefs.cmux as Record).enabled = true; + prefs.version = prefs.version || 1; + + const frontmatter = serializePreferencesToFrontmatter(prefs); + let body = "\n# GSD Skill Preferences\n\nSee `~/.gsd/agent/extensions/gsd/docs/preferences-reference.md` for full field documentation and examples.\n"; + const preserved = extractBodyAfterFrontmatter(readFileSync(path, "utf-8")); + if (preserved) body = preserved; + + writeFileSync(path, `---\n${frontmatter}---${body}`, "utf-8"); + return true; +} + function extractBodyAfterFrontmatter(content: string): string | null { const start = content.startsWith("---\n") ? 4 : content.startsWith("---\r\n") ? 5 : -1; if (start === -1) return null; diff --git a/src/resources/extensions/gsd/commands-handlers.ts b/src/resources/extensions/gsd/commands-handlers.ts index 16af7230b..25074d634 100644 --- a/src/resources/extensions/gsd/commands-handlers.ts +++ b/src/resources/extensions/gsd/commands-handlers.ts @@ -25,6 +25,26 @@ import { getAutoWorktreePath } from "./auto-worktree.js"; import { projectRoot } from "./commands/context.js"; import { loadPrompt } from "./prompt-loader.js"; +const UPDATE_REGISTRY_URL = "https://registry.npmjs.org/gsd-pi/latest"; +const UPDATE_FETCH_TIMEOUT_MS = 5000; + +async function fetchLatestVersionForCommand(): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), UPDATE_FETCH_TIMEOUT_MS); + + try { + const res = await fetch(UPDATE_REGISTRY_URL, { signal: controller.signal }); + if (!res.ok) return null; + const data = (await res.json()) as { version?: string }; + const latest = typeof data.version === "string" ? data.version.trim().replace(/^v/, "") : ""; + return latest.length > 0 ? latest : null; + } catch { + return null; + } finally { + clearTimeout(timeout); + } +} + export function dispatchDoctorHeal(pi: ExtensionAPI, scope: string | undefined, reportText: string, structuredIssues: string): void { const workflowPath = process.env.GSD_WORKFLOW_PATH ?? join(process.env.HOME ?? "~", ".gsd", "agent", "GSD-WORKFLOW.md"); const workflow = readFileSync(workflowPath, "utf-8"); @@ -58,6 +78,10 @@ export function parseDoctorArgs(args: string) { return { jsonMode, dryRun, fixFlag, includeBuild, includeTests, mode, requestedScope }; } +export function isDoctorHealActionable(issue: { fixable: boolean; severity: string }): boolean { + return issue.fixable && issue.severity !== "info"; +} + export async function handleDoctor(args: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise { const { jsonMode, dryRun, fixFlag, includeBuild, includeTests, mode, requestedScope } = parseDoctorArgs(args); const scope = await selectDoctorScope(projectRoot(), requestedScope); @@ -89,7 +113,7 @@ export async function handleDoctor(args: string, ctx: ExtensionCommandContext, p scope: effectiveScope, includeWarnings: true, }); - const actionable = unresolved.filter(issue => issue.severity === "error"); + const actionable = unresolved.filter(isDoctorHealActionable); if (actionable.length === 0) { ctx.ui.notify("Doctor heal found nothing actionable to hand off to the LLM.", "info"); return; @@ -394,13 +418,8 @@ export async function handleUpdate(ctx: ExtensionCommandContext): Promise ctx.ui.notify(`Current version: v${current}\nChecking npm registry...`, "info"); - let latest: string; - try { - latest = execSync(`npm view ${NPM_PACKAGE} version`, { - encoding: "utf-8", - stdio: ["ignore", "pipe", "ignore"], - }).trim(); - } catch { + const latest = await fetchLatestVersionForCommand(); + if (!latest) { ctx.ui.notify("Failed to reach npm registry. Check your network connection.", "error"); return; } diff --git a/src/resources/extensions/gsd/commands/context.ts b/src/resources/extensions/gsd/commands/context.ts index f4a5aa423..8007ecd27 100644 --- a/src/resources/extensions/gsd/commands/context.ts +++ b/src/resources/extensions/gsd/commands/context.ts @@ -1,7 +1,7 @@ import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; import { checkRemoteAutoSession, isAutoActive, isAutoPaused, stopAutoRemote } from "../auto.js"; -import { assertSafeDirectory } from "../validate-directory.js"; +import { validateDirectory } from "../validate-directory.js"; import { resolveProjectRoot } from "../worktree.js"; import { showNextAction } from "../../shared/tui.js"; import { handleStatus } from "./handlers/core.js"; @@ -12,6 +12,17 @@ export interface GsdDispatchContext { trimmed: string; } +/** + * Typed error for when GSD is run outside a valid project directory. + * Command handlers catch this to show a friendly message instead of a raw exception. + */ +export class GSDNoProjectError extends Error { + constructor(reason: string) { + super(reason); + this.name = "GSDNoProjectError"; + } +} + export function projectRoot(): string { let cwd: string; try { @@ -21,10 +32,10 @@ export function projectRoot(): string { cwd = process.env.HOME ?? "/"; } const root = resolveProjectRoot(cwd); - if (root !== cwd) { - assertSafeDirectory(cwd); - } else { - assertSafeDirectory(root); + const pathToCheck = root !== cwd ? cwd : root; + const result = validateDirectory(pathToCheck); + if (result.severity === "blocked") { + throw new GSDNoProjectError(result.reason ?? "GSD must be run inside a project directory."); } return root; } diff --git a/src/resources/extensions/gsd/commands/dispatcher.ts b/src/resources/extensions/gsd/commands/dispatcher.ts index a3d11344b..9ec6bae09 100644 --- a/src/resources/extensions/gsd/commands/dispatcher.ts +++ b/src/resources/extensions/gsd/commands/dispatcher.ts @@ -1,5 +1,6 @@ import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; +import { GSDNoProjectError } from "./context.js"; import { handleAutoCommand } from "./handlers/auto.js"; import { handleCoreCommand } from "./handlers/core.js"; import { handleOpsCommand } from "./handlers/ops.js"; @@ -21,10 +22,21 @@ export async function handleGSDCommand( () => handleOpsCommand(trimmed, ctx, pi), ]; - for (const handler of handlers) { - if (await handler()) { + try { + for (const handler of handlers) { + if (await handler()) { + return; + } + } + } catch (err) { + if (err instanceof GSDNoProjectError) { + ctx.ui.notify( + `${err.message} \`cd\` into a project directory first.`, + "warning", + ); return; } + throw err; } ctx.ui.notify(`Unknown: /gsd ${trimmed}. Run /gsd help for available commands.`, "warning"); diff --git a/src/resources/extensions/gsd/commands/handlers/auto.ts b/src/resources/extensions/gsd/commands/handlers/auto.ts index 923191cfb..283ff77ed 100644 --- a/src/resources/extensions/gsd/commands/handlers/auto.ts +++ b/src/resources/extensions/gsd/commands/handlers/auto.ts @@ -4,7 +4,7 @@ import { existsSync, readFileSync } from "node:fs"; import { resolve } from "node:path"; import { enableDebug } from "../../debug-logger.js"; -import { getAutoDashboardData, isAutoActive, isAutoPaused, pauseAuto, startAuto, stopAuto, stopAutoRemote } from "../../auto.js"; +import { getAutoDashboardData, isAutoActive, isAutoPaused, pauseAuto, startAutoDetached, stopAuto, stopAutoRemote } from "../../auto.js"; import { handleRate } from "../../commands-rate.js"; import { guardRemoteSession, projectRoot } from "../context.js"; import { findMilestoneIds } from "../../milestone-id-utils.js"; @@ -42,26 +42,6 @@ export function parseMilestoneTarget(input: string): { milestoneId: string | nul return { milestoneId: match[1], rest }; } -/** - * Set GSD_MILESTONE_LOCK to target a specific milestone, then run `fn`. - * Clears the env var when `fn` resolves or rejects, so the lock does not - * leak into subsequent commands in the same process. - */ -async function withMilestoneLock(milestoneId: string, fn: () => Promise): Promise { - const previous = process.env.GSD_MILESTONE_LOCK; - process.env.GSD_MILESTONE_LOCK = milestoneId; - try { - await fn(); - } finally { - // Restore previous value (undefined → delete, else restore). - if (previous === undefined) { - delete process.env.GSD_MILESTONE_LOCK; - } else { - process.env.GSD_MILESTONE_LOCK = previous; - } - } -} - export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise { if (trimmed === "next" || trimmed.startsWith("next ")) { if (trimmed.includes("--dry-run")) { @@ -84,13 +64,10 @@ export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandCo } } - if (milestoneId) { - await withMilestoneLock(milestoneId, () => - startAuto(ctx, pi, projectRoot(), verboseMode, { step: true }), - ); - } else { - await startAuto(ctx, pi, projectRoot(), verboseMode, { step: true }); - } + startAutoDetached(ctx, pi, projectRoot(), verboseMode, { + step: true, + milestoneLock: milestoneId, + }); return true; } @@ -128,13 +105,11 @@ export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandCo const { showHeadlessMilestoneCreation } = await import("../../guided-flow.js"); await showHeadlessMilestoneCreation(ctx, pi, projectRoot(), seedContent); } else if (milestoneId) { - // Target a specific milestone — use GSD_MILESTONE_LOCK so state - // derivation only sees this milestone (#2521). - await withMilestoneLock(milestoneId, () => - startAuto(ctx, pi, projectRoot(), verboseMode), - ); + startAutoDetached(ctx, pi, projectRoot(), verboseMode, { + milestoneLock: milestoneId, + }); } else { - await startAuto(ctx, pi, projectRoot(), verboseMode); + startAutoDetached(ctx, pi, projectRoot(), verboseMode); } return true; } @@ -175,10 +150,9 @@ export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandCo if (trimmed === "") { if (!(await guardRemoteSession(ctx, pi))) return true; - await startAuto(ctx, pi, projectRoot(), false, { step: true }); + startAutoDetached(ctx, pi, projectRoot(), false, { step: true }); return true; } return false; } - diff --git a/src/resources/extensions/gsd/commands/handlers/core.ts b/src/resources/extensions/gsd/commands/handlers/core.ts index e6824815c..51aaec2bc 100644 --- a/src/resources/extensions/gsd/commands/handlers/core.ts +++ b/src/resources/extensions/gsd/commands/handlers/core.ts @@ -8,11 +8,45 @@ import { ensurePreferencesFile, handlePrefs, handlePrefsMode, handlePrefsWizard import { runEnvironmentChecks } from "../../doctor-environment.js"; import { deriveState } from "../../state.js"; import { handleCmux } from "../../commands-cmux.js"; +import { setSessionModelOverride } from "../../session-model-override.js"; import { projectRoot } from "../context.js"; -import { formatShortcut } from "../../files.js"; +import { formattedShortcutPair } from "../../shortcut-defs.js"; -export function showHelp(ctx: ExtensionCommandContext): void { - const lines = [ +export function showHelp(ctx: ExtensionCommandContext, args = ""): void { + const summaryLines = [ + "GSD — Get Shit Done\n", + "QUICK START", + " /gsd start Start a workflow template", + " /gsd Run next unit (same as /gsd next)", + " /gsd auto Run all queued units continuously", + " /gsd pause Pause auto-mode", + " /gsd stop Stop auto-mode gracefully", + "", + "VISIBILITY", + ` /gsd status Dashboard (${formattedShortcutPair("dashboard")})`, + ` /gsd parallel watch Parallel monitor (${formattedShortcutPair("parallel")})`, + ` /gsd notifications Notification history (${formattedShortcutPair("notifications")})`, + " /gsd visualize Interactive 10-tab TUI", + " /gsd queue Show queued/dispatched units", + "", + "COURSE CORRECTION", + " /gsd steer Apply user override to active work", + " /gsd capture Quick-capture a thought to CAPTURES.md", + " /gsd triage Classify and route pending captures", + " /gsd undo Revert last completed unit [--force]", + " /gsd rethink Conversational project reorganization", + "", + "SETUP", + " /gsd init Project init wizard", + " /gsd setup Global setup status [llm|search|remote|keys|prefs]", + " /gsd model Switch active session model", + " /gsd prefs Manage preferences", + " /gsd doctor Diagnose and repair .gsd/ state", + "", + "Use /gsd help full for the complete command reference.", + ]; + + const fullLines = [ "GSD — Get Shit Done\n", "WORKFLOW", " /gsd start Start a workflow template (bugfix, spike, feature, hotfix, etc.)", @@ -26,12 +60,13 @@ export function showHelp(ctx: ExtensionCommandContext): void { " /gsd new-milestone Create milestone from headless context (used by gsd headless)", "", "VISIBILITY", - ` /gsd status Show progress dashboard (${formatShortcut("Ctrl+Alt+G")})`, + ` /gsd status Show progress dashboard (${formattedShortcutPair("dashboard")})`, + ` /gsd parallel watch Open parallel worker monitor (${formattedShortcutPair("parallel")})`, " /gsd visualize Interactive 10-tab TUI (progress, timeline, deps, metrics, health, agent, changes, knowledge, captures, export)", " /gsd queue Show queued/dispatched units and execution order", " /gsd history View execution history [--cost] [--phase] [--model] [N]", " /gsd changelog Show categorized release notes [version]", - ` /gsd notifications View persistent notification history [clear|tail|filter] (${formatShortcut("Ctrl+Alt+N")})`, + ` /gsd notifications View persistent notification history [clear|tail|filter] (${formattedShortcutPair("notifications")})`, "", "COURSE CORRECTION", " /gsd steer Apply user override to active work", @@ -71,7 +106,8 @@ export function showHelp(ctx: ExtensionCommandContext): void { " /gsd inspect Show SQLite DB diagnostics (schema, row counts, recent entries)", " /gsd update Update GSD to the latest version via npm", ]; - ctx.ui.notify(lines.join("\n"), "info"); + const full = ["full", "--full", "all"].includes(args.trim().toLowerCase()); + ctx.ui.notify((full ? fullLines : summaryLines).join("\n"), "info"); } export async function handleStatus(ctx: ExtensionCommandContext): Promise { @@ -92,9 +128,9 @@ export async function handleStatus(ctx: ExtensionCommandContext): Promise { overlay: true, overlayOptions: { - width: "70%", - minWidth: 60, - maxHeight: "90%", + width: "90%", + minWidth: 80, + maxHeight: "92%", anchor: "center", }, }, @@ -301,6 +337,17 @@ async function handleModel(trimmedArgs: string, ctx: ExtensionCommandContext, pi return; } + // /gsd model is an explicit per-session pin for GSD dispatches. + // This is captured at auto bootstrap so it survives internal session + // switches during /gsd auto and /gsd next runs. + const sessionId = ctx.sessionManager?.getSessionId?.(); + if (sessionId) { + setSessionModelOverride(sessionId, { + provider: targetModel.provider, + id: targetModel.id, + }); + } + ctx.ui.notify(`Model: ${targetModel.provider}/${targetModel.id}`, "info"); } @@ -309,8 +356,8 @@ export async function handleCoreCommand( ctx: ExtensionCommandContext, pi?: ExtensionAPI, ): Promise { - if (trimmed === "help" || trimmed === "h" || trimmed === "?") { - showHelp(ctx); + if (trimmed === "help" || trimmed === "h" || trimmed === "?" || trimmed.startsWith("help ")) { + showHelp(ctx, trimmed.startsWith("help ") ? trimmed.slice(5).trim() : ""); return true; } if (trimmed === "status") { diff --git a/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts b/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts index 16d30d49a..a7440f763 100644 --- a/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts +++ b/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts @@ -13,6 +13,8 @@ import { } from "../../notification-store.js"; import { GSDNotificationOverlay } from "../../notification-overlay.js"; +const MAX_INLINE_ENTRIES = 40; + function severityIcon(severity: NotifySeverity): string { switch (severity) { case "error": return "✗"; @@ -54,8 +56,9 @@ export async function handleNotificationsCommand( if (args === "tail" || args.startsWith("tail ")) { const countStr = args.replace(/^tail\s*/, "").trim(); const count = countStr ? parseInt(countStr, 10) : 20; - const n = isNaN(count) || count < 1 ? 20 : Math.min(count, 100); - const entries = readNotifications().slice(0, n); + const all = readNotifications(); + const n = isNaN(count) || count < 1 ? 20 : Math.min(count, MAX_INLINE_ENTRIES); + const entries = all.slice(0, n); if (entries.length === 0) { ctx.ui.notify("No notifications.", "info"); @@ -65,7 +68,10 @@ export async function handleNotificationsCommand( const lines = entries.map((e) => `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`, ); - ctx.ui.notify(`Last ${entries.length} notification(s):\n${lines.join("\n")}`, "info"); + const suffix = all.length > entries.length + ? `\n... and ${all.length - entries.length} more (open /gsd notifications to browse all)` + : ""; + ctx.ui.notify(`Last ${entries.length} notification(s):\n${lines.join("\n")}${suffix}`, "info"); return true; } @@ -86,7 +92,9 @@ export async function handleNotificationsCommand( const lines = entries.slice(0, 20).map((e) => `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`, ); - const suffix = entries.length > 20 ? `\n... and ${entries.length - 20} more` : ""; + const suffix = entries.length > 20 + ? `\n... and ${entries.length - 20} more (open /gsd notifications to browse all)` + : ""; ctx.ui.notify(`${severity} notifications (${entries.length}):\n${lines.join("\n")}${suffix}`, "info"); return true; } @@ -96,8 +104,8 @@ export async function handleNotificationsCommand( // Try overlay first (TUI mode) if (ctx.hasUI) { try { - await ctx.ui.custom( - (tui, theme, _kb, done) => new GSDNotificationOverlay(tui, theme, () => done()), + const result = await ctx.ui.custom( + (tui, theme, _kb, done) => new GSDNotificationOverlay(tui, theme, () => done(true)), { overlay: true, overlayOptions: { @@ -109,7 +117,9 @@ export async function handleNotificationsCommand( }, }, ); - return true; + if (result !== undefined) { + return true; + } } catch { // Fall through to text output if overlay fails } diff --git a/src/resources/extensions/gsd/commands/handlers/workflow.ts b/src/resources/extensions/gsd/commands/handlers/workflow.ts index 10282fbcc..85f6276e2 100644 --- a/src/resources/extensions/gsd/commands/handlers/workflow.ts +++ b/src/resources/extensions/gsd/commands/handlers/workflow.ts @@ -18,7 +18,7 @@ import { createRun, listRuns } from "../../run-manager.js"; import { setActiveEngineId, setActiveRunDir, - startAuto, + startAutoDetached, pauseAuto, isAutoActive, getActiveEngineId, @@ -77,7 +77,7 @@ async function handleCustomWorkflow( setActiveEngineId("custom"); setActiveRunDir(runDir); ctx.ui.notify(`Created workflow run: ${defName}\nRun dir: ${runDir}`, "info"); - await startAuto(ctx, pi, base, false); + startAutoDetached(ctx, pi, base, false); } catch (err) { // Clean up engine state so a failed workflow run doesn't pollute the next /gsd auto setActiveEngineId(null); @@ -157,13 +157,8 @@ async function handleCustomWorkflow( ctx.ui.notify("No custom workflow to resume. Use /gsd auto for dev workflow.", "warning"); return true; } - try { - await startAuto(ctx, pi, projectRoot(), false); - ctx.ui.notify("Custom workflow resumed.", "info"); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - ctx.ui.notify(`Failed to resume workflow: ${msg}`, "error"); - } + startAutoDetached(ctx, pi, projectRoot(), false); + ctx.ui.notify("Custom workflow resumed.", "info"); return true; } @@ -278,4 +273,3 @@ export function getNextMilestoneId(basePath: string): string { const uniqueIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; return nextMilestoneId(milestoneIds, uniqueIds); } - diff --git a/src/resources/extensions/gsd/context-injector.ts b/src/resources/extensions/gsd/context-injector.ts index 00dcae2c3..c5b90b752 100644 --- a/src/resources/extensions/gsd/context-injector.ts +++ b/src/resources/extensions/gsd/context-injector.ts @@ -16,7 +16,7 @@ import { readFileSync, existsSync } from "node:fs"; import { join, resolve, sep } from "node:path"; import type { StepDefinition } from "./definition-loader.js"; -import { readFrozenDefinition } from "./custom-workflow-engine.js"; +import { readFrozenDefinition } from "./definition-io.js"; /** Maximum characters per artifact to prevent context window blowout. */ const MAX_CONTEXT_CHARS = 10_000; diff --git a/src/resources/extensions/gsd/custom-workflow-engine.ts b/src/resources/extensions/gsd/custom-workflow-engine.ts index bcdbc8f4d..53d520cb9 100644 --- a/src/resources/extensions/gsd/custom-workflow-engine.ts +++ b/src/resources/extensions/gsd/custom-workflow-engine.ts @@ -22,7 +22,6 @@ import type { } from "./engine-types.js"; import { readFileSync } from "node:fs"; import { join } from "node:path"; -import { parse } from "yaml"; import { readGraph, writeGraph, @@ -32,15 +31,13 @@ import { type WorkflowGraph, } from "./graph.js"; import { injectContext } from "./context-injector.js"; -import type { WorkflowDefinition, StepDefinition } from "./definition-loader.js"; +import type { StepDefinition } from "./definition-loader.js"; +import { readFrozenDefinition } from "./definition-io.js"; import { parseUnitId } from "./unit-id.js"; +import { withFileLock } from "./file-lock.js"; -/** Read and parse the frozen DEFINITION.yaml from a run directory. */ -export function readFrozenDefinition(runDir: string): WorkflowDefinition { - const defPath = join(runDir, "DEFINITION.yaml"); - const raw = readFileSync(defPath, "utf-8"); - return parse(raw, { schema: "core" }) as WorkflowDefinition; -} +// Re-export for downstream consumers +export { readFrozenDefinition } from "./definition-io.js"; export class CustomWorkflowEngine implements WorkflowEngine { readonly engineId = "custom"; @@ -179,24 +176,28 @@ export class CustomWorkflowEngine implements WorkflowEngine { state: EngineState, completedStep: CompletedStep, ): Promise { - // Re-read the graph from disk so we do not overwrite concurrent - // workflow edits with a stale in-memory snapshot from deriveState(). - const graph = readGraph(this.runDir); + const graphPath = join(this.runDir, "GRAPH.yaml"); - // Extract stepId from "/" - const { milestone, slice, task } = parseUnitId(completedStep.unitId); - const stepId = task ?? slice ?? milestone; + return await withFileLock(graphPath, () => { + // Re-read the graph from disk so we do not overwrite concurrent + // workflow edits with a stale in-memory snapshot from deriveState(). + const graph = readGraph(this.runDir); - const updatedGraph = markStepComplete(graph, stepId); - writeGraph(this.runDir, updatedGraph); + // Extract stepId from "/" + const { milestone, slice, task } = parseUnitId(completedStep.unitId); + const stepId = task ?? slice ?? milestone; - const allDone = updatedGraph.steps.every( - (s) => s.status === "complete" || s.status === "expanded", - ); + const updatedGraph = markStepComplete(graph, stepId); + writeGraph(this.runDir, updatedGraph); - return { - outcome: allDone ? "milestone-complete" : "continue", - }; + const allDone = updatedGraph.steps.every( + (s) => s.status === "complete" || s.status === "expanded", + ); + + return { + outcome: allDone ? "milestone-complete" : "continue", + }; + }); } /** diff --git a/src/resources/extensions/gsd/dashboard-overlay.ts b/src/resources/extensions/gsd/dashboard-overlay.ts index 37bd547fb..bafcb23ac 100644 --- a/src/resources/extensions/gsd/dashboard-overlay.ts +++ b/src/resources/extensions/gsd/dashboard-overlay.ts @@ -3,7 +3,8 @@ * * Full-screen overlay showing auto-mode progress: milestone/slice/task * breakdown, current unit, completed units, timing, and activity log. - * Toggled with Ctrl+Alt+G (⌃⌥G on macOS) or opened from /gsd status. + * Toggled with Ctrl+Alt+G (⌃⌥G on macOS), Ctrl+Shift+G fallback, + * or opened from /gsd status. */ import type { Theme } from "@gsd/pi-coding-agent"; @@ -26,6 +27,7 @@ import { formatDuration, padRight, joinColumns, centerLine, fitColumns, STATUS_G import { estimateTimeRemaining } from "./auto-dashboard.js"; import { computeProgressScore, formatProgressLine } from "./progress-score.js"; import { runEnvironmentChecks, type EnvironmentCheckResult } from "./doctor-environment.js"; +import { formattedShortcutPair } from "./shortcut-defs.js"; function unitLabel(type: string): string { switch (type) { @@ -203,7 +205,12 @@ export class GSDDashboardOverlay { } handleInput(data: string): void { - if (matchesKey(data, Key.escape) || matchesKey(data, Key.ctrl("c")) || matchesKey(data, Key.ctrlAlt("g"))) { + if ( + matchesKey(data, Key.escape) || + matchesKey(data, Key.ctrl("c")) || + matchesKey(data, Key.ctrlAlt("g")) || + matchesKey(data, Key.ctrlShift("g")) + ) { this.dispose(); this.onClose(); return; @@ -587,7 +594,7 @@ export class GSDDashboardOverlay { lines.push(blank()); lines.push(hr()); - lines.push(centered(th.fg("dim", "↑↓ scroll · g/G top/end · esc close"))); + lines.push(centered(th.fg("dim", `↑↓ scroll · g/G top/end · Esc/${formattedShortcutPair("dashboard")} close`))); return lines; } diff --git a/src/resources/extensions/gsd/definition-io.ts b/src/resources/extensions/gsd/definition-io.ts new file mode 100644 index 000000000..ac0ed9a42 --- /dev/null +++ b/src/resources/extensions/gsd/definition-io.ts @@ -0,0 +1,18 @@ +/** + * definition-io.ts — Read frozen DEFINITION.yaml from a run directory. + * + * Extracted from custom-workflow-engine.ts to break the circular dependency + * between context-injector.ts and custom-workflow-engine.ts. + */ + +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { parse } from "yaml"; +import type { WorkflowDefinition } from "./definition-loader.js"; + +/** Read and parse the frozen DEFINITION.yaml from a run directory. */ +export function readFrozenDefinition(runDir: string): WorkflowDefinition { + const defPath = join(runDir, "DEFINITION.yaml"); + const raw = readFileSync(defPath, "utf-8"); + return parse(raw, { schema: "core" }) as WorkflowDefinition; +} diff --git a/src/resources/extensions/gsd/dispatch-guard.ts b/src/resources/extensions/gsd/dispatch-guard.ts index c687f1b30..4f7422f88 100644 --- a/src/resources/extensions/gsd/dispatch-guard.ts +++ b/src/resources/extensions/gsd/dispatch-guard.ts @@ -107,10 +107,32 @@ export function getPriorSliceCompletionBlocker( // it may be a cross-milestone reference handled elsewhere. } } else { + const milestoneUsesExplicitDeps = slices.some((slice) => slice.depends.length > 0); + if (milestoneUsesExplicitDeps) { + return null; + } + + // Positional fallback is only a heuristic for legacy slices with no + // declared dependencies. Skip any earlier slice that depends on the + // target, directly or transitively, or we can deadlock a valid zero-dep + // slice behind its own downstream dependents (#3720). + const reverseDependents = new Set(); + let changed = true; + while (changed) { + changed = false; + for (const slice of slices) { + if (reverseDependents.has(slice.id)) continue; + if (slice.depends.some((depId) => depId === targetSid || reverseDependents.has(depId))) { + reverseDependents.add(slice.id); + changed = true; + } + } + } + const targetIndex = slices.findIndex((slice) => slice.id === targetSid); const incomplete = slices .slice(0, targetIndex) - .find((slice) => !slice.done); + .find((slice) => !slice.done && !reverseDependents.has(slice.id)); if (incomplete) { return `Cannot dispatch ${unitType} ${unitId}: earlier slice ${targetMid}/${incomplete.id} is not complete.`; } diff --git a/src/resources/extensions/gsd/doctor-engine-checks.ts b/src/resources/extensions/gsd/doctor-engine-checks.ts index 8b74dcac4..e7fc57540 100644 --- a/src/resources/extensions/gsd/doctor-engine-checks.ts +++ b/src/resources/extensions/gsd/doctor-engine-checks.ts @@ -13,6 +13,20 @@ export async function checkEngineHealth( issues: DoctorIssue[], fixesApplied: string[], ): Promise { + const dbPath = join(basePath, ".gsd", "gsd.db"); + + if (!isDbAvailable() && existsSync(dbPath)) { + issues.push({ + severity: "warning", + code: "db_unavailable", + scope: "project", + unitId: "project", + message: "Database unavailable — using filesystem state derivation (degraded mode). State queries may be slower and less reliable.", + file: ".gsd/gsd.db", + fixable: false, + }); + } + // ── DB constraint violation detection (full doctor only, not pre-dispatch per D-10) ── try { if (isDbAvailable()) { diff --git a/src/resources/extensions/gsd/doctor-format.ts b/src/resources/extensions/gsd/doctor-format.ts index 841f7ee13..a22d64e97 100644 --- a/src/resources/extensions/gsd/doctor-format.ts +++ b/src/resources/extensions/gsd/doctor-format.ts @@ -2,6 +2,7 @@ import type { DoctorIssue, DoctorIssueCode, DoctorReport, DoctorSummary } from " function matchesScope(unitId: string, scope?: string): boolean { if (!scope) return true; + if (unitId === "project" || unitId === "environment") return true; return unitId === scope || unitId.startsWith(`${scope}/`) || unitId.startsWith(`${scope}`); } diff --git a/src/resources/extensions/gsd/doctor-providers.ts b/src/resources/extensions/gsd/doctor-providers.ts index e0f35341b..06242fc81 100644 --- a/src/resources/extensions/gsd/doctor-providers.ts +++ b/src/resources/extensions/gsd/doctor-providers.ts @@ -185,11 +185,35 @@ const PROVIDER_ROUTES: Record = { google: ["google-gemini-cli"], }; +/** + * Providers that use external CLI authentication (not API keys). + * These are always considered "ok" — the host CLI handles auth. + */ +const CLI_AUTH_PROVIDERS = new Set([ + "claude-code", + "openai-codex", + "google-gemini-cli", + "google-antigravity", +]); + function checkLlmProviders(): ProviderCheckResult[] { const required = collectConfiguredModelProviders(); const results: ProviderCheckResult[] = []; for (const providerId of required) { + // CLI-authenticated providers don't need API keys — skip key check + if (CLI_AUTH_PROVIDERS.has(providerId)) { + const info = PROVIDER_REGISTRY.find(p => p.id === providerId); + results.push({ + name: providerId, + label: info?.label ?? providerId, + category: "llm", + status: "ok", + message: `${info?.label ?? providerId} — CLI auth (no key needed)`, + required: true, + }); + continue; + } const info = PROVIDER_REGISTRY.find(p => p.id === providerId); const label = providerId === "anthropic-vertex" ? "Anthropic Vertex" diff --git a/src/resources/extensions/gsd/doctor-runtime-checks.ts b/src/resources/extensions/gsd/doctor-runtime-checks.ts index d2af2bd9a..5b4b99572 100644 --- a/src/resources/extensions/gsd/doctor-runtime-checks.ts +++ b/src/resources/extensions/gsd/doctor-runtime-checks.ts @@ -303,13 +303,16 @@ export async function checkRuntimeHealth( content.split("\n").map(l => l.trim()).filter(l => l && !l.startsWith("#")), ); - // Check for critical runtime patterns that must be present + // Check for critical runtime patterns that must be present. + // NOTE: GSD_RUNTIME_PATTERNS in gitignore.ts is the canonical source of truth. + // This is a minimal subset for the doctor check. const criticalPatterns = [ ".gsd/activity/", ".gsd/runtime/", ".gsd/auto.lock", - ".gsd/gsd.db", - ".gsd/completed-units.json", + ".gsd/gsd.db*", + ".gsd/completed-units*.json", + ".gsd/event-log.jsonl", ]; // If blanket .gsd/ or .gsd is present, all patterns are covered diff --git a/src/resources/extensions/gsd/doctor-types.ts b/src/resources/extensions/gsd/doctor-types.ts index 8c804b3b8..309848048 100644 --- a/src/resources/extensions/gsd/doctor-types.ts +++ b/src/resources/extensions/gsd/doctor-types.ts @@ -78,6 +78,7 @@ export type DoctorIssueCode = | "db_orphaned_slice" | "db_done_task_no_summary" | "db_duplicate_id" + | "db_unavailable" | "projection_drift"; /** diff --git a/src/resources/extensions/gsd/error-classifier.ts b/src/resources/extensions/gsd/error-classifier.ts index 604167451..f302418ea 100644 --- a/src/resources/extensions/gsd/error-classifier.ts +++ b/src/resources/extensions/gsd/error-classifier.ts @@ -44,10 +44,13 @@ export function resetRetryState(state: RetryState): void { const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i; const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i; +// OpenRouter affordability-style quota errors should be treated as transient +// so core retry logic can lower maxTokens and continue in-session. +const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i; const NETWORK_RE = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i; const SERVER_RE = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i; // ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first). -const CONNECTION_RE = /terminated|connection.?refused|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i; +const CONNECTION_RE = /terminated|connection.?(?:refused|error)|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i; // Catch-all for V8 JSON.parse errors: all modern variants end with "in JSON at position \d+". // This eliminates the need to enumerate every error message variant individually. const STREAM_RE = /in JSON at position \d+|Unexpected end of JSON|SyntaxError.*JSON/i; @@ -67,7 +70,7 @@ const RESET_DELAY_RE = /reset in (\d+)s/i; */ export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass { const isPermanent = PERMANENT_RE.test(errorMsg); - const isRateLimit = RATE_LIMIT_RE.test(errorMsg); + const isRateLimit = RATE_LIMIT_RE.test(errorMsg) || AFFORDABILITY_RE.test(errorMsg); // 1. Permanent — but rate limit takes precedence if (isPermanent && !isRateLimit) { diff --git a/src/resources/extensions/gsd/file-lock.ts b/src/resources/extensions/gsd/file-lock.ts new file mode 100644 index 000000000..a40c77854 --- /dev/null +++ b/src/resources/extensions/gsd/file-lock.ts @@ -0,0 +1,59 @@ +import { existsSync } from "node:fs"; + +function _require(name: string) { + try { + return require(name); + } catch { + try { + const gsdPiRequire = require("module").createRequire( + require("path").join(process.cwd(), "node_modules", "gsd-pi", "index.js") + ); + return gsdPiRequire(name); + } catch { + return null; + } + } +} + +export function withFileLockSync(filePath: string, fn: () => T): T { + const lockfile = _require("proper-lockfile"); + if (!lockfile) return fn(); + + if (!existsSync(filePath)) return fn(); + + try { + const release = lockfile.lockSync(filePath, { retries: 5, stale: 10000 }); + try { + return fn(); + } finally { + release(); + } + } catch (err: any) { + if (err.code === "ELOCKED") { + // Could not get lock after retries, let's fallback to un-locked instead of crashing the whole state machine + return fn(); + } + throw err; + } +} + +export async function withFileLock(filePath: string, fn: () => Promise | T): Promise { + const lockfile = _require("proper-lockfile"); + if (!lockfile) return await fn(); + + if (!existsSync(filePath)) return await fn(); + + try { + const release = await lockfile.lock(filePath, { retries: 5, stale: 10000 }); + try { + return await fn(); + } finally { + await release(); + } + } catch (err: any) { + if (err.code === "ELOCKED") { + return await fn(); + } + throw err; + } +} diff --git a/src/resources/extensions/gsd/file-watcher.ts b/src/resources/extensions/gsd/file-watcher.ts deleted file mode 100644 index a8b0be19c..000000000 --- a/src/resources/extensions/gsd/file-watcher.ts +++ /dev/null @@ -1,100 +0,0 @@ -import type { FSWatcher } from "chokidar"; -import type { EventBus } from "@gsd/pi-coding-agent"; -import { relative } from "node:path"; - -let watcher: FSWatcher | null = null; -let pending = new Map>(); - -const EVENT_MAP: Record = { - "settings.json": "settings-changed", - "auth.json": "auth-changed", - "models.json": "models-changed", -}; - -const EXTENSIONS_DIR = "extensions"; - -const IGNORED_PATTERNS = [ - "**/sessions/**", - "**/*.tmp", - "**/*.swp", - "**/*~", - "**/.DS_Store", -]; - -const DEBOUNCE_MS = 300; - -/** - * Start watching `agentDir` (e.g. `~/.gsd/agent/`) for config changes. - * Emits events on the supplied EventBus when watched files are modified. - */ -export async function startFileWatcher( - agentDir: string, - eventBus: EventBus, -): Promise { - if (watcher) { - await watcher.close(); - } - - const { watch } = await import("chokidar"); - - pending = new Map>(); - - function debounceEmit(event: string): void { - const existing = pending.get(event); - if (existing) clearTimeout(existing); - pending.set( - event, - setTimeout(() => { - pending.delete(event); - eventBus.emit(event, { timestamp: Date.now() }); - }, DEBOUNCE_MS), - ); - } - - function resolveEvent(filePath: string): string | null { - const rel = relative(agentDir, filePath); - if (rel.startsWith("..")) return null; - - // Check direct file matches - for (const [file, event] of Object.entries(EVENT_MAP)) { - if (rel === file) return event; - } - - // Check extensions directory - if (rel.startsWith(EXTENSIONS_DIR + "/") || rel === EXTENSIONS_DIR) { - return "extensions-changed"; - } - - return null; - } - - watcher = watch(agentDir, { - ignoreInitial: true, - depth: 2, - ignored: IGNORED_PATTERNS, - }); - - for (const eventType of ["add", "change", "unlink"] as const) { - watcher.on(eventType, (filePath: string) => { - const event = resolveEvent(filePath); - if (event) debounceEmit(event); - }); - } - - // Wait for watcher to be ready - await new Promise((resolve) => { - watcher!.on("ready", resolve); - }); -} - -/** - * Stop the file watcher and clean up resources. - */ -export async function stopFileWatcher(): Promise { - for (const timer of pending.values()) clearTimeout(timer); - pending.clear(); - if (watcher) { - await watcher.close(); - watcher = null; - } -} diff --git a/src/resources/extensions/gsd/forensics.ts b/src/resources/extensions/gsd/forensics.ts index ba2746f8b..76be923d8 100644 --- a/src/resources/extensions/gsd/forensics.ts +++ b/src/resources/extensions/gsd/forensics.ts @@ -650,19 +650,33 @@ function getDbCompletionCounts(): DbCompletionCounts | null { * Exported for testability. */ export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void { - // First, collect unique startedAt values per type/id key - const dispatchMap = new Map>(); + // First, collect unique startedAt values per type/id key, bucketed by + // autoSessionKey when available so cross-session recovery does not look + // like a within-session stuck loop. + const dispatchMap = new Map>>(); for (const u of units) { const key = `${u.type}/${u.id}`; - let starts = dispatchMap.get(key); + let sessionBuckets = dispatchMap.get(key); + if (!sessionBuckets) { + sessionBuckets = new Map(); + dispatchMap.set(key, sessionBuckets); + } + + const sessionKey = u.autoSessionKey ?? "__legacy__"; + let starts = sessionBuckets.get(sessionKey); if (!starts) { starts = new Set(); - dispatchMap.set(key, starts); + sessionBuckets.set(sessionKey, starts); } starts.add(u.startedAt); } - for (const [key, starts] of dispatchMap) { - const count = starts.size; + + for (const [key, sessionBuckets] of dispatchMap) { + const hasSessionAwareData = Array.from(sessionBuckets.keys()).some((sessionKey) => sessionKey !== "__legacy__"); + const count = hasSessionAwareData + ? Math.max(...Array.from(sessionBuckets.values(), (starts) => starts.size)) + : (sessionBuckets.get("__legacy__")?.size ?? 0); + if (count > 1) { const [unitType, ...idParts] = key.split("/"); anomalies.push({ @@ -671,7 +685,9 @@ export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomal unitType, unitId: idParts.join("/"), summary: `Unit ${key} was dispatched ${count} times`, - details: `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`, + details: hasSessionAwareData + ? `Repeated dispatch within the same auto session suggests the unit completed but its artifacts were not verified, or the state machine kept returning it. Cross-session recovery runs are ignored.` + : `Repeated dispatch suggests the unit completed but its artifacts weren't verified, or the state machine kept returning it.`, }); } } diff --git a/src/resources/extensions/gsd/gate-registry.ts b/src/resources/extensions/gsd/gate-registry.ts new file mode 100644 index 000000000..be9de87ee --- /dev/null +++ b/src/resources/extensions/gsd/gate-registry.ts @@ -0,0 +1,251 @@ +/** + * GSD Gate Registry — single source of truth for quality-gate ownership. + * + * Each gate declares which workflow turn owns it, the scope at which it is + * persisted in the `quality_gates` table, and the question/guidance text used + * in the prompt that turn sends. The registry replaces the ad-hoc + * `GATE_QUESTIONS` table that used to live in `auto-prompts.ts`, and every + * layer of the prompt system (prompt builders, dispatch rules, state + * derivation, tool handlers) consults it so a pending gate can never be + * silently dropped. + * + * Design notes: + * - `GATE_REGISTRY` is exhaustiveness-checked against `GateId` via + * `satisfies Record`, so adding a new GateId + * without a registry entry is a compile error. + * - `getGatesForTurn(turn)` returns the definitions a turn owns. + * - `assertGateCoverage(pending, turn)` throws a GSDError if the pending + * list for a turn contains unknown gates, or if any gate owned by the + * turn is missing from the pending list. + */ + +import { GSDError, GSD_PARSE_ERROR } from "./errors.js"; +import type { GateId, GateRow, GateScope } from "./types.js"; + +/** Which workflow turn is responsible for evaluating / closing a gate. */ +export type OwnerTurn = + | "gate-evaluate" + | "execute-task" + | "complete-slice" + | "validate-milestone"; + +export interface GateDefinition { + id: GateId; + scope: GateScope; + ownerTurn: OwnerTurn; + /** One-line question the assistant must answer. */ + question: string; + /** Markdown guidance describing what a good answer looks like. */ + guidance: string; + /** H3 section header used in the artifact the turn writes + * (e.g. "Operational Readiness" for Q8 in the slice summary). */ + promptSection: string; +} + +export const GATE_REGISTRY = { + Q3: { + id: "Q3", + scope: "slice", + ownerTurn: "gate-evaluate", + question: "How can this be exploited?", + guidance: [ + "Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.", + "Map data exposure risks: PII, tokens, secrets accessible through this slice.", + "Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.", + "If none apply, return verdict 'omitted' with rationale explaining why.", + ].join("\n"), + promptSection: "Abuse Surface", + }, + Q4: { + id: "Q4", + scope: "slice", + ownerTurn: "gate-evaluate", + question: "What existing promises does this break?", + guidance: [ + "List which existing requirements (R001, R003, etc.) are touched by this slice.", + "Identify what must be re-tested after shipping.", + "Flag decisions that should be revisited given the new scope.", + "If no existing requirements are affected, return verdict 'omitted'.", + ].join("\n"), + promptSection: "Broken Promises", + }, + Q5: { + id: "Q5", + scope: "task", + ownerTurn: "execute-task", + question: "What breaks when dependencies fail?", + guidance: [ + "Enumerate the task's external dependencies (APIs, filesystem, network, subprocesses).", + "Describe the failure path for each: timeout, malformed response, connection loss.", + "Verify the implementation handles each failure or explicitly bubbles the error.", + "Return verdict 'omitted' only if the task has no external dependencies.", + ].join("\n"), + promptSection: "Failure Modes", + }, + Q6: { + id: "Q6", + scope: "task", + ownerTurn: "execute-task", + question: "What is the 10x load breakpoint?", + guidance: [ + "Identify the resource that saturates first at 10x the expected load.", + "Describe the protection applied (pool sizing, rate limiting, pagination, caching).", + "Return verdict 'omitted' if the task has no runtime load dimension.", + ].join("\n"), + promptSection: "Load Profile", + }, + Q7: { + id: "Q7", + scope: "task", + ownerTurn: "execute-task", + question: "What negative tests protect this task?", + guidance: [ + "List malformed inputs, error paths, and boundary conditions the tests cover.", + "Point to the specific test files or cases that assert each negative scenario.", + "Return verdict 'omitted' only if the task has no meaningful negative surface.", + ].join("\n"), + promptSection: "Negative Tests", + }, + Q8: { + id: "Q8", + scope: "slice", + ownerTurn: "complete-slice", + question: "How will ops know this slice is healthy or broken?", + guidance: [ + "Describe the health signal (metric, log line, dashboard) that proves the slice works.", + "Describe the failure signal that triggers an alert or paging.", + "Document the recovery procedure and any monitoring gaps.", + "Return verdict 'omitted' only for slices with no runtime behavior at all.", + ].join("\n"), + promptSection: "Operational Readiness", + }, + MV01: { + id: "MV01", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Is every success criterion in the milestone roadmap satisfied?", + guidance: [ + "Walk the success-criteria checklist from the milestone roadmap.", + "For each criterion, point to the slice / assessment / verification evidence that proves it.", + "Return verdict 'flag' if any criterion is unmet or unverifiable.", + ].join("\n"), + promptSection: "Success Criteria Checklist", + }, + MV02: { + id: "MV02", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Does every slice have a SUMMARY.md and a passing assessment?", + guidance: [ + "Confirm every slice listed in the roadmap has a SUMMARY.md.", + "Confirm each slice has an ASSESSMENT verdict of 'pass' (or justified 'omitted').", + "Flag missing artifacts and slices with outstanding follow-ups or known limitations.", + ].join("\n"), + promptSection: "Slice Delivery Audit", + }, + MV03: { + id: "MV03", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Do the slices integrate end-to-end?", + guidance: [ + "Trace at least one cross-slice flow proving the pieces compose.", + "Flag gaps where two slices were built in isolation with no integration evidence.", + ].join("\n"), + promptSection: "Cross-Slice Integration", + }, + MV04: { + id: "MV04", + scope: "milestone", + ownerTurn: "validate-milestone", + question: "Are all touched requirements covered and still coherent?", + guidance: [ + "For each requirement advanced, validated, surfaced, or invalidated across the milestone's slices, confirm the milestone-level evidence matches.", + "Flag requirements that slices claim to advance but no artifact proves.", + ].join("\n"), + promptSection: "Requirement Coverage", + }, +} as const satisfies Record; + +export type GateRegistry = typeof GATE_REGISTRY; + +/** Stable ordered lists per owner turn — iteration order matches declaration. */ +const ORDERED_GATES: readonly GateDefinition[] = Object.values(GATE_REGISTRY) as readonly GateDefinition[]; + +/** Return every gate owned by a turn, in stable declaration order. */ +export function getGatesForTurn(turn: OwnerTurn): GateDefinition[] { + return ORDERED_GATES.filter((g) => g.ownerTurn === turn); +} + +/** Return the set of gate ids a turn owns. */ +export function getGateIdsForTurn(turn: OwnerTurn): Set { + return new Set(getGatesForTurn(turn).map((g) => g.id)); +} + +/** Look up a definition by gate id, or undefined if unknown. */ +export function getGateDefinition(id: string): GateDefinition | undefined { + return (GATE_REGISTRY as Record)[id]; +} + +/** Look up the owner turn for a gate id. Throws if the gate is unknown. */ +export function getOwnerTurn(id: GateId): OwnerTurn { + const def = GATE_REGISTRY[id]; + if (!def) { + throw new GSDError(GSD_PARSE_ERROR, `gate-registry: unknown gate id "${id}"`); + } + return def.ownerTurn; +} + +/** + * Assert that the pending gate rows for a turn match what the registry says + * the turn owns. Fails loudly rather than silently skipping. + * + * - Every row in `pending` must have a definition whose `ownerTurn` matches `turn`. + * (The caller is responsible for scoping the pending list — e.g. filtering + * by slice scope before passing it in.) + * - `options.requireAll` (default true): every gate the turn owns must appear + * in `pending`. Set to false for turns like `execute-task` that only need + * coverage for the subset of gates that were seeded (e.g. tasks with no + * external dependencies have no Q5 row). + */ +export function assertGateCoverage( + pending: ReadonlyArray>, + turn: OwnerTurn, + options: { requireAll?: boolean } = {}, +): void { + const requireAll = options.requireAll ?? true; + const expected = getGateIdsForTurn(turn); + const pendingIds = new Set(pending.map((g) => g.gate_id)); + + const unknown: string[] = []; + for (const id of pendingIds) { + const def = getGateDefinition(id); + if (!def) { + unknown.push(id); + continue; + } + if (def.ownerTurn !== turn) { + unknown.push(`${id} (owned by ${def.ownerTurn}, not ${turn})`); + } + } + + if (unknown.length > 0) { + throw new GSDError( + GSD_PARSE_ERROR, + `assertGateCoverage: turn "${turn}" received pending gates it does not own: ${unknown.join(", ")}`, + ); + } + + if (requireAll) { + const missing: GateId[] = []; + for (const id of expected) { + if (!pendingIds.has(id)) missing.push(id); + } + if (missing.length > 0) { + throw new GSDError( + GSD_PARSE_ERROR, + `assertGateCoverage: turn "${turn}" is missing required gates: ${missing.join(", ")}`, + ); + } + } +} diff --git a/src/resources/extensions/gsd/git-service.ts b/src/resources/extensions/gsd/git-service.ts index ae73a0e94..c937da714 100644 --- a/src/resources/extensions/gsd/git-service.ts +++ b/src/resources/extensions/gsd/git-service.ts @@ -192,22 +192,25 @@ export interface PreMergeCheckResult { /** * GSD runtime paths that should be excluded from smart staging. * These are transient/generated artifacts that should never be committed. - * Matches the union of SKIP_PATHS + SKIP_EXACT in worktree-manager.ts - * and the first 7 entries in gitignore.ts BASELINE_PATTERNS. + * + * NOTE: GSD_RUNTIME_PATTERNS in gitignore.ts is the canonical source of truth. + * This array must stay synchronized with it. */ export const RUNTIME_EXCLUSION_PATHS: readonly string[] = [ ".gsd/activity/", + ".gsd/forensics/", ".gsd/runtime/", ".gsd/worktrees/", + ".gsd/parallel/", ".gsd/auto.lock", ".gsd/metrics.json", - ".gsd/completed-units.json", + ".gsd/completed-units*.json", // covers completed-units.json and archived completed-units-{MID}.json + ".gsd/state-manifest.json", ".gsd/STATE.md", - ".gsd/gsd.db", - ".gsd/gsd.db-shm", // SQLite WAL sidecar — always created alongside gsd.db (#2296) - ".gsd/gsd.db-wal", // SQLite WAL sidecar — always created alongside gsd.db (#2296) - ".gsd/journal/", // daily-rotated JSONL event journal (#2296) - ".gsd/doctor-history.jsonl", // doctor run history (#2296) + ".gsd/gsd.db*", + ".gsd/journal/", + ".gsd/doctor-history.jsonl", + ".gsd/event-log.jsonl", ".gsd/DISCUSSION-MANIFEST.json", ]; diff --git a/src/resources/extensions/gsd/gitignore.ts b/src/resources/extensions/gsd/gitignore.ts index 8a80c3da5..98341849a 100644 --- a/src/resources/extensions/gsd/gitignore.ts +++ b/src/resources/extensions/gsd/gitignore.ts @@ -15,6 +15,12 @@ import { GIT_NO_PROMPT_ENV } from "./git-constants.js"; /** * GSD runtime patterns for git index cleanup. + * + * CANONICAL SOURCE OF TRUTH: This array is the authoritative list of runtime + * ignore patterns. Other modules (RUNTIME_EXCLUSION_PATHS in git-service.ts, + * SKIP_* arrays in worktree-manager.ts, criticalPatterns in doctor-runtime-checks.ts) + * must stay synchronized with this list. + * * With external state (symlink), these are a no-op in most cases, * but retained for backwards compatibility during migration. */ @@ -26,13 +32,13 @@ const GSD_RUNTIME_PATTERNS = [ ".gsd/parallel/", ".gsd/auto.lock", ".gsd/metrics.json", - ".gsd/completed-units.json", + ".gsd/completed-units*.json", // covers completed-units.json and archived completed-units-{MID}.json + ".gsd/state-manifest.json", ".gsd/STATE.md", - ".gsd/gsd.db", - ".gsd/gsd.db-shm", // SQLite WAL sidecar — always created alongside gsd.db (#2296) - ".gsd/gsd.db-wal", // SQLite WAL sidecar — always created alongside gsd.db (#2296) - ".gsd/journal/", // daily-rotated JSONL event journal (#2296) - ".gsd/doctor-history.jsonl", // doctor run history (#2296) + ".gsd/gsd.db*", + ".gsd/journal/", + ".gsd/doctor-history.jsonl", + ".gsd/event-log.jsonl", ".gsd/DISCUSSION-MANIFEST.json", ".gsd/milestones/**/*-CONTINUE.md", ".gsd/milestones/**/continue.md", diff --git a/src/resources/extensions/gsd/gsd-db.ts b/src/resources/extensions/gsd/gsd-db.ts index e440bdb44..f6d379048 100644 --- a/src/resources/extensions/gsd/gsd-db.ts +++ b/src/resources/extensions/gsd/gsd-db.ts @@ -10,6 +10,7 @@ import { existsSync, copyFileSync, mkdirSync, realpathSync } from "node:fs"; import { dirname } from "node:path"; import type { Decision, Requirement, GateRow, GateId, GateScope, GateStatus, GateVerdict } from "./types.js"; import { GSDError, GSD_STALE_STATE } from "./errors.js"; +import { getGateIdsForTurn, type OwnerTurn } from "./gate-registry.js"; import { logError, logWarning } from "./workflow-logger.js"; const _require = createRequire(import.meta.url); @@ -162,13 +163,36 @@ function openRawDb(path: string): unknown { const SCHEMA_VERSION = 14; +function indexExists(db: DbAdapter, name: string): boolean { + return !!db.prepare( + "SELECT 1 as present FROM sqlite_master WHERE type = 'index' AND name = ?", + ).get(name); +} + +function dedupeVerificationEvidenceRows(db: DbAdapter): void { + db.exec(` + DELETE FROM verification_evidence + WHERE rowid NOT IN ( + SELECT MIN(rowid) + FROM verification_evidence + GROUP BY task_id, slice_id, milestone_id, command, verdict + ) + `); +} + +function ensureVerificationEvidenceDedupIndex(db: DbAdapter): void { + if (indexExists(db, "idx_verification_evidence_dedup")) return; + dedupeVerificationEvidenceRows(db); + db.exec("CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedup ON verification_evidence(task_id, slice_id, milestone_id, command, verdict)"); +} + function initSchema(db: DbAdapter, fileBacked: boolean): void { if (fileBacked) db.exec("PRAGMA journal_mode=WAL"); if (fileBacked) db.exec("PRAGMA busy_timeout = 5000"); if (fileBacked) db.exec("PRAGMA synchronous = NORMAL"); if (fileBacked) db.exec("PRAGMA auto_vacuum = INCREMENTAL"); if (fileBacked) db.exec("PRAGMA cache_size = -8000"); // 8 MB page cache - if (fileBacked) db.exec("PRAGMA mmap_size = 67108864"); // 64 MB mmap + if (fileBacked && process.platform !== "darwin") db.exec("PRAGMA mmap_size = 67108864"); // 64 MB mmap db.exec("PRAGMA temp_store = MEMORY"); db.exec("PRAGMA foreign_keys = ON"); @@ -409,7 +433,7 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void { db.exec("CREATE INDEX IF NOT EXISTS idx_milestones_status ON milestones(status)"); db.exec("CREATE INDEX IF NOT EXISTS idx_quality_gates_pending ON quality_gates(milestone_id, slice_id, status)"); db.exec("CREATE INDEX IF NOT EXISTS idx_verification_evidence_task ON verification_evidence(milestone_id, slice_id, task_id)"); - db.exec("CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedup ON verification_evidence(task_id, slice_id, milestone_id, command, verdict)"); + ensureVerificationEvidenceDedupIndex(db); // v14 index — slice dependency lookups db.exec("CREATE INDEX IF NOT EXISTS idx_slice_deps_target ON slice_dependencies(milestone_id, depends_on_slice_id)"); @@ -742,7 +766,7 @@ function migrateSchema(db: DbAdapter): void { db.exec("CREATE INDEX IF NOT EXISTS idx_milestones_status ON milestones(status)"); db.exec("CREATE INDEX IF NOT EXISTS idx_quality_gates_pending ON quality_gates(milestone_id, slice_id, status)"); db.exec("CREATE INDEX IF NOT EXISTS idx_verification_evidence_task ON verification_evidence(milestone_id, slice_id, task_id)"); - db.exec("CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedup ON verification_evidence(task_id, slice_id, milestone_id, command, verdict)"); + ensureVerificationEvidenceDedupIndex(db); db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({ ":version": 13, ":applied_at": new Date().toISOString(), @@ -856,6 +880,7 @@ export function closeDatabase(): void { currentDb = null; currentPath = null; currentPid = 0; + _dbOpenAttempted = false; } } @@ -1540,6 +1565,30 @@ export interface TaskRow { } function rowToTask(row: Record): TaskRow { + const parseTaskArray = (value: unknown): string[] => { + if (Array.isArray(value)) { + return value.filter((entry): entry is string => typeof entry === "string"); + } + if (typeof value !== "string") return []; + + const trimmed = value.trim(); + if (!trimmed) return []; + + try { + const parsed = JSON.parse(trimmed); + if (Array.isArray(parsed)) { + return parsed.filter((entry): entry is string => typeof entry === "string"); + } + if (typeof parsed === "string" && parsed.trim()) { + return [parsed.trim()]; + } + } catch { + // Older/corrupt DB rows may contain raw comma-separated paths instead of JSON arrays. + } + + return trimmed.split(",").map((entry) => entry.trim()).filter(Boolean); + }; + return { milestone_id: row["milestone_id"] as string, slice_id: row["slice_id"] as string, @@ -1559,10 +1608,10 @@ function rowToTask(row: Record): TaskRow { full_summary_md: row["full_summary_md"] as string, description: (row["description"] as string) ?? "", estimate: (row["estimate"] as string) ?? "", - files: JSON.parse((row["files"] as string) || "[]"), + files: parseTaskArray(row["files"]), verify: (row["verify"] as string) ?? "", - inputs: JSON.parse((row["inputs"] as string) || "[]"), - expected_output: JSON.parse((row["expected_output"] as string) || "[]"), + inputs: parseTaskArray(row["inputs"]), + expected_output: parseTaskArray(row["expected_output"]), observability_impact: (row["observability_impact"] as string) ?? "", full_plan_md: (row["full_plan_md"] as string) ?? "", sequence: (row["sequence"] as number) ?? 0, @@ -2302,3 +2351,53 @@ export function getPendingSliceGateCount(milestoneId: string, sliceId: string): ).get({ ":mid": milestoneId, ":sid": sliceId }); return row ? (row["cnt"] as number) : 0; } + +/** + * Return pending gate rows owned by a specific workflow turn. + * + * Unlike `getPendingGates(..., scope)`, this filters by the registry's + * `ownerTurn` metadata so callers can distinguish Q3/Q4 (owned by + * gate-evaluate) from Q8 (owned by complete-slice) even though both are + * scope:"slice". Pass `taskId` to narrow task-scoped results to one task. + */ +export function getPendingGatesForTurn( + milestoneId: string, + sliceId: string, + turn: OwnerTurn, + taskId?: string, +): GateRow[] { + if (!currentDb) return []; + const ids = getGateIdsForTurn(turn); + if (ids.size === 0) return []; + const idList = [...ids]; + const placeholders = idList.map((_, i) => `:gid${i}`).join(","); + const params: Record = { + ":mid": milestoneId, + ":sid": sliceId, + }; + idList.forEach((id, i) => { + params[`:gid${i}`] = id; + }); + let sql = + `SELECT * FROM quality_gates + WHERE milestone_id = :mid AND slice_id = :sid + AND status = 'pending' + AND gate_id IN (${placeholders})`; + if (taskId !== undefined) { + sql += ` AND task_id = :tid`; + params[":tid"] = taskId; + } + return currentDb.prepare(sql).all(params).map(rowToGate); +} + +/** + * Count pending gates for a turn. Convenience wrapper used by state + * derivation to decide whether a phase transition should pause. + */ +export function getPendingGateCountForTurn( + milestoneId: string, + sliceId: string, + turn: OwnerTurn, +): number { + return getPendingGatesForTurn(milestoneId, sliceId, turn).length; +} diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts index b73ad122d..8892564a6 100644 --- a/src/resources/extensions/gsd/guided-flow.ts +++ b/src/resources/extensions/gsd/guided-flow.ts @@ -15,7 +15,7 @@ import { loadPrompt, inlineTemplate } from "./prompt-loader.js"; import { buildSkillActivationBlock } from "./auto-prompts.js"; import { deriveState } from "./state.js"; import { invalidateAllCaches } from "./cache.js"; -import { startAuto } from "./auto.js"; +import { startAutoDetached } from "./auto.js"; import { clearLock } from "./crash-recovery.js"; import { assessInterruptedSession, @@ -48,30 +48,14 @@ import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js"; import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForGuidedUnit, + supportsStructuredQuestions, } from "./workflow-mcp.js"; import { runPreparation, formatCodebaseBrief, formatPriorContextBrief, - formatEcosystemBrief, - type PreparationResult, } from "./preparation.js"; -// ─── Preparation result storage ───────────────────────────────────────────── -// Stores the most recent preparation result for injection into discuss prompts. -// S02 will consume this when building the prepared discussion prompt. -let lastPreparationResult: PreparationResult | null = null; - -/** Get the most recent preparation result (for S02 prompt building). */ -export function getLastPreparationResult(): PreparationResult | null { - return lastPreparationResult; -} - -/** Clear the preparation result (called after discussion completes). */ -export function clearPreparationResult(): void { - lastPreparationResult = null; -} - // ─── Re-exports (preserve public API for existing importers) ──────────────── export { MILESTONE_ID_RE, generateMilestoneSuffix, nextMilestoneId, @@ -83,7 +67,6 @@ export { showQueue, handleQueueReorder, showQueueAdd, buildExistingMilestonesContext, } from "./guided-flow-queue.js"; -import { getErrorMessage } from "./error-utils.js"; import { logWarning } from "./workflow-logger.js"; // ─── ID Generation with Reservation ───────────────────────────────────────── @@ -260,11 +243,7 @@ export function checkAutoStartAfterDiscuss(): boolean { pendingAutoStartMap.delete(basePath); ctx.ui.notify(`Milestone ${milestoneId} ready.`, "info"); - startAuto(ctx, pi, basePath, false, { step }).catch((err) => { - ctx.ui.notify(`Auto-start failed: ${getErrorMessage(err)}`, "error"); - logWarning("guided", `auto start error: ${getErrorMessage(err)}`); - debugLog("auto-start-failed", { error: getErrorMessage(err) }); - }); + startAutoDetached(ctx, pi, basePath, false, { step }); return true; } @@ -311,6 +290,7 @@ async function dispatchWorkflow( const result = await selectAndApplyModel( ctx, pi, unitType, /* unitId */ "", /* basePath */ process.cwd(), prefs, /* verbose */ false, /* autoModeStartModel */ null, + /* retryContext */ undefined, /* isAutoMode */ false, ); if (result.appliedModel) { debugLog("guided-flow-model-applied", { @@ -384,6 +364,20 @@ async function dispatchWorkflow( } } +function getStructuredQuestionsAvailability( + pi: ExtensionAPI, + ctx: ExtensionContext | undefined, +): "true" | "false" { + if (!ctx) return "false"; + + const provider = ctx.model?.provider; + const authMode = provider ? ctx.modelRegistry.getProviderAuthMode(provider) : undefined; + return supportsStructuredQuestions(pi.getActiveTools(), { + authMode, + baseUrl: ctx.model?.baseUrl, + }) ? "true" : "false"; +} + /** * Resolve a model ID string to a model object from available models. * Handles "provider/model" and bare ID formats. @@ -427,8 +421,9 @@ function resolveAvailableModel( * Build the discuss-and-plan prompt for a new milestone. * Used by all three "new milestone" paths (first ever, no active, all complete). */ -function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string): string { +function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string, pi: ExtensionAPI, ctx: ExtensionCommandContext, preparationContext?: string): string { const milestoneRel = `.gsd/milestones/${nextId}`; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const inlinedTemplates = [ inlineTemplate("project", "Project"), inlineTemplate("requirements", "Requirements"), @@ -439,6 +434,8 @@ function buildDiscussPrompt(nextId: string, preamble: string, _basePath: string) return loadPrompt("discuss", { milestoneId: nextId, preamble, + preparationContext: preparationContext ?? "", + structuredQuestionsAvailable, contextPath: `${milestoneRel}/${nextId}-CONTEXT.md`, roadmapPath: `${milestoneRel}/${nextId}-ROADMAP.md`, inlinedTemplates, @@ -471,59 +468,12 @@ function buildHeadlessDiscussPrompt(nextId: string, seedContext: string, _basePa }); } -/** - * Build the prepared discuss prompt with brief injection. - * Uses the discuss-prepared template which encodes the 4-layer discussion protocol. - * - * @param nextId - The milestone ID being discussed - * @param preamble - Preamble text for the discuss prompt - * @param _basePath - Root directory of the project (unused, kept for signature consistency) - * @param prepResult - Preparation result containing briefs to inject - * @returns The prepared discuss prompt string - */ -function buildPreparedPrompt( - nextId: string, - preamble: string, - _basePath: string, - prepResult: PreparationResult, -): string { - const milestoneRel = `.gsd/milestones/${nextId}`; - - // Use context-enhanced instead of context for prepared discussions - const inlinedTemplates = [ - inlineTemplate("project", "Project"), - inlineTemplate("requirements", "Requirements"), - inlineTemplate("context-enhanced", "Context Enhanced"), - inlineTemplate("roadmap", "Roadmap"), - inlineTemplate("decisions", "Decisions"), - ].join("\n\n---\n\n"); - - // Format the briefs from the preparation result - const codebaseBrief = prepResult.codebaseBrief || formatCodebaseBrief(prepResult.codebase); - const priorContextBrief = prepResult.priorContextBrief || formatPriorContextBrief(prepResult.priorContext); - const ecosystemBrief = prepResult.ecosystemBrief || formatEcosystemBrief(prepResult.ecosystem); - - return loadPrompt("discuss-prepared", { - milestoneId: nextId, - preamble, - codebaseBrief, - priorContextBrief, - ecosystemBrief, - contextPath: `${milestoneRel}/${nextId}-CONTEXT.md`, - roadmapPath: `${milestoneRel}/${nextId}-ROADMAP.md`, - inlinedTemplates, - commitInstruction: buildDocsCommitInstruction(`docs(${nextId}): context, requirements, and roadmap`), - multiMilestoneCommitInstruction: buildDocsCommitInstruction("docs: project plan — N milestones"), - }); -} - /** * Run preparation phase if enabled, then build the discuss prompt. - * This is the main entry point for new milestone discussions with preparation. - * Stores the preparation result for S02 to inject into the discuss prompt. - * - * When preparation succeeds, uses the discuss-prepared template with brief injection. - * Falls back to the standard discuss template when preparation is disabled or fails. + * Preparation analyzes the codebase and prior context, injecting the results + * as supplementary context into the standard discuss template. The discuss + * template drives the conversation (asks "What's the vision?" first), while + * the preparation briefs give the agent grounding in the existing codebase. * * @param ctx - Extension command context with UI for progress notifications * @param nextId - The milestone ID being discussed @@ -533,18 +483,18 @@ function buildPreparedPrompt( */ async function prepareAndBuildDiscussPrompt( ctx: ExtensionCommandContext, + pi: ExtensionAPI, nextId: string, preamble: string, basePath: string, ): Promise { - // Clear stale preparation result immediately to prevent cross-session/project - // state leaks. This ensures data from a prior milestone/project never leaks - // into subsequent discussions (adversarial review fix #3602). - lastPreparationResult = null; - const prefs = loadEffectiveGSDPreferences()?.preferences ?? {}; - // Run preparation if enabled (default: true) + // Run preparation if enabled (default: true) — results are injected as + // supplementary context into the standard discuss prompt, NOT as a + // replacement template. The discuss prompt always leads with "What's the + // vision?" so the user defines the scope, not the codebase analysis. + let preparationContext = ""; if (prefs.discuss_preparation !== false) { try { const prepResult = await runPreparation(basePath, ctx.ui, { @@ -552,21 +502,23 @@ async function prepareAndBuildDiscussPrompt( discuss_web_research: prefs.discuss_web_research, discuss_depth: prefs.discuss_depth, }); - lastPreparationResult = prepResult; - // Use prepared prompt if preparation was enabled and produced results if (prepResult.enabled) { - return buildPreparedPrompt(nextId, preamble, basePath, prepResult); + const codebaseBrief = prepResult.codebaseBrief || formatCodebaseBrief(prepResult.codebase); + const priorContextBrief = prepResult.priorContextBrief || formatPriorContextBrief(prepResult.priorContext); + const parts: string[] = []; + if (codebaseBrief) parts.push(`### Codebase Brief\n\n${codebaseBrief}`); + if (priorContextBrief) parts.push(`### Prior Context Brief\n\n${priorContextBrief}`); + if (parts.length > 0) { + preparationContext = `\n\n## Preparation Context\n\nThe system analyzed the codebase before this discussion. Use these findings as background context — they describe what already exists, NOT what the user wants to build. Always ask the user what they want to build first.\n\n${parts.join("\n\n")}`; + } } - } catch { - // If preparation throws, ensure stale data doesn't persist - lastPreparationResult = null; + } catch (err) { + logWarning("guided", `preparation failed, proceeding without context: ${(err as Error).message}`); } } - // Fall back to standard discuss prompt for backward compatibility - // lastPreparationResult is already null (cleared at entry or on error) - return buildDiscussPrompt(nextId, preamble, basePath); + return buildDiscussPrompt(nextId, preamble, basePath, pi, ctx, preparationContext); } /** @@ -801,7 +753,7 @@ export async function showDiscuss( if (choice === "discuss_draft") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const basePrompt = loadPrompt("guided-discuss-milestone", { milestoneId: mid, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`), @@ -814,7 +766,7 @@ export async function showDiscuss( await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone"); } else if (choice === "discuss_fresh") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: mid, step: false, createdAt: Date.now() }); await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", { milestoneId: mid, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, @@ -826,7 +778,7 @@ export async function showDiscuss( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: false, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, `New milestone ${nextId}.`, basePath), "gsd-run", ctx, "discuss-milestone"); + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath), "gsd-run", ctx, "discuss-milestone"); } return; } @@ -972,7 +924,7 @@ export async function showDiscuss( if (confirm !== "rediscuss") continue; } - const sqAvail = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const sqAvail = getStructuredQuestionsAvailability(pi, ctx); const prompt = await buildDiscussSlicePrompt(mid, chosen.id, chosen.title, basePath, { rediscuss: isRediscuss, structuredQuestionsAvailable: sqAvail }); await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "discuss-slice"); @@ -1082,7 +1034,7 @@ async function dispatchDiscussForMilestone( ].join("\n") : ""; const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const basePrompt = loadPrompt("guided-discuss-milestone", { milestoneId: mid, milestoneTitle, @@ -1231,7 +1183,7 @@ async function handleMilestoneActions( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1348,7 +1300,7 @@ export async function showSmartEntry( ], }); if (resume === "resume") { - await startAuto(ctx, pi, basePath, false, { + startAutoDetached(ctx, pi, basePath, false, { interrupted, step: interrupted.pausedSession?.stepMode ?? false, }); @@ -1421,7 +1373,7 @@ export async function showSmartEntry( if (isFirst) { // First ever — skip wizard, just ask directly pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New project, milestone ${nextId}. Do NOT read or explore .gsd/ — it's empty scaffolding.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1442,7 +1394,7 @@ export async function showSmartEntry( if (choice === "new_milestone") { pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1481,7 +1433,7 @@ export async function showSmartEntry( const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1523,7 +1475,7 @@ export async function showSmartEntry( if (choice === "discuss_draft") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); const basePrompt = loadPrompt("guided-discuss-milestone", { milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`), @@ -1536,7 +1488,7 @@ export async function showSmartEntry( await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone"); } else if (choice === "discuss_fresh") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode, createdAt: Date.now() }); await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", { milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, @@ -1548,7 +1500,7 @@ export async function showSmartEntry( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1634,7 +1586,7 @@ export async function showSmartEntry( }), "gsd-run", ctx, "plan-milestone"); } else if (choice === "discuss") { const discussMilestoneTemplates = inlineTemplate("context", "Context"); - const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const structuredQuestionsAvailable = getStructuredQuestionsAvailability(pi, ctx); await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", { milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable, commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`), @@ -1645,7 +1597,7 @@ export async function showSmartEntry( const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids; const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds); pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode, createdAt: Date.now() }); - await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, nextId, + await dispatchWorkflow(pi, await prepareAndBuildDiscussPrompt(ctx, pi, nextId, `New milestone ${nextId}.`, basePath ), "gsd-run", ctx, "discuss-milestone"); @@ -1690,7 +1642,7 @@ export async function showSmartEntry( }); if (choice === "auto") { - await startAuto(ctx, pi, basePath, false); + startAutoDetached(ctx, pi, basePath, false); } else if (choice === "status") { const { fireStatusViaCommand } = await import("./commands.js"); await fireStatusViaCommand(ctx); @@ -1774,7 +1726,7 @@ export async function showSmartEntry( }), }), "gsd-run", ctx, "plan-slice"); } else if (choice === "discuss") { - const sqAvail = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false"; + const sqAvail = getStructuredQuestionsAvailability(pi, ctx); await dispatchWorkflow(pi, await buildDiscussSlicePrompt(milestoneId, sliceId, sliceTitle, basePath, { rediscuss: hasContext, structuredQuestionsAvailable: sqAvail }), "gsd-run", ctx, "discuss-slice"); } else if (choice === "research") { const researchTemplates = inlineTemplate("research", "Research"); @@ -1902,7 +1854,7 @@ export async function showSmartEntry( }); if (choice === "auto") { - await startAuto(ctx, pi, basePath, false); + startAutoDetached(ctx, pi, basePath, false); return; } diff --git a/src/resources/extensions/gsd/init-wizard.ts b/src/resources/extensions/gsd/init-wizard.ts index 40f3e5b64..b7251471e 100644 --- a/src/resources/extensions/gsd/init-wizard.ts +++ b/src/resources/extensions/gsd/init-wizard.ts @@ -274,19 +274,9 @@ export async function showProjectInit( // Non-fatal — STATE.md will be regenerated on next /gsd invocation } - if (ctx.model?.provider === "claude-code") { - try { - const { ensureProjectWorkflowMcpConfig } = await import("./mcp-project-config.js"); - const result = ensureProjectWorkflowMcpConfig(basePath); - if (result.status !== "unchanged") { - ctx.ui.notify(`Claude Code MCP prepared at ${result.configPath}`, "info"); - } - } catch (err) { - ctx.ui.notify( - `Claude Code MCP prep failed: ${err instanceof Error ? err.message : String(err)}`, - "warning", - ); - } + { + const { prepareWorkflowMcpForProject } = await import("./workflow-mcp-auto-prep.js"); + prepareWorkflowMcpForProject(ctx, basePath); } ctx.ui.notify("GSD initialized. Starting your first milestone...", "info"); diff --git a/src/resources/extensions/gsd/interrupted-session.ts b/src/resources/extensions/gsd/interrupted-session.ts index 8c6274a05..b0ca579d3 100644 --- a/src/resources/extensions/gsd/interrupted-session.ts +++ b/src/resources/extensions/gsd/interrupted-session.ts @@ -34,6 +34,7 @@ export interface PausedSessionMetadata { activeEngineId?: string; activeRunDir?: string | null; autoStartTime?: number; + milestoneLock?: string | null; } export interface InterruptedSessionAssessment { diff --git a/src/resources/extensions/gsd/key-manager.ts b/src/resources/extensions/gsd/key-manager.ts index 17bd3cb31..a4699202b 100644 --- a/src/resources/extensions/gsd/key-manager.ts +++ b/src/resources/extensions/gsd/key-manager.ts @@ -49,6 +49,8 @@ export const PROVIDER_REGISTRY: ProviderInfo[] = [ { id: "custom-openai", label: "Custom (OpenAI-compat)", category: "llm", envVar: "CUSTOM_OPENAI_API_KEY" }, { id: "cerebras", label: "Cerebras", category: "llm", envVar: "CEREBRAS_API_KEY" }, { id: "azure-openai-responses", label: "Azure OpenAI", category: "llm", envVar: "AZURE_OPENAI_API_KEY" }, + { id: "alibaba-coding-plan", label: "Alibaba Coding Plan", category: "llm", envVar: "ALIBABA_API_KEY", dashboardUrl: "bailian.console.aliyun.com" }, + { id: "alibaba-dashscope", label: "Alibaba DashScope", category: "llm", envVar: "DASHSCOPE_API_KEY", dashboardUrl: "dashscope.console.aliyun.com" }, // Tool Keys { id: "context7", label: "Context7 Docs", category: "tool", envVar: "CONTEXT7_API_KEY", dashboardUrl: "context7.com/dashboard" }, diff --git a/src/resources/extensions/gsd/metrics.ts b/src/resources/extensions/gsd/metrics.ts index a29d4f39d..85f3484bb 100644 --- a/src/resources/extensions/gsd/metrics.ts +++ b/src/resources/extensions/gsd/metrics.ts @@ -41,6 +41,7 @@ export interface UnitMetrics { model: string; // model ID used startedAt: number; // ms timestamp finishedAt: number; // ms timestamp + autoSessionKey?: string; // identifies one auto-mode run across pause/resume tokens: TokenCounts; cost: number; // total USD cost toolCalls: number; @@ -133,7 +134,16 @@ export function snapshotUnitMetrics( unitId: string, startedAt: number, model: string, - opts?: { tier?: string; modelDowngraded?: boolean; contextWindowTokens?: number; truncationSections?: number; continueHereFired?: boolean; promptCharCount?: number; baselineCharCount?: number }, + opts?: { + tier?: string; + modelDowngraded?: boolean; + contextWindowTokens?: number; + truncationSections?: number; + continueHereFired?: boolean; + promptCharCount?: number; + baselineCharCount?: number; + autoSessionKey?: string; + }, ): UnitMetrics | null { if (!ledger) return null; @@ -181,6 +191,7 @@ export function snapshotUnitMetrics( model, startedAt, finishedAt: Date.now(), + ...(opts?.autoSessionKey ? { autoSessionKey: opts.autoSessionKey } : {}), tokens, cost, toolCalls, diff --git a/src/resources/extensions/gsd/milestone-actions.ts b/src/resources/extensions/gsd/milestone-actions.ts index 49102dc25..06562a893 100644 --- a/src/resources/extensions/gsd/milestone-actions.ts +++ b/src/resources/extensions/gsd/milestone-actions.ts @@ -20,7 +20,7 @@ import { } from "./paths.js"; import { invalidateAllCaches } from "./cache.js"; import { loadQueueOrder, saveQueueOrder } from "./queue-order.js"; -import { isDbAvailable, updateMilestoneStatus } from "./gsd-db.js"; +import { getMilestone, isDbAvailable, updateMilestoneStatus } from "./gsd-db.js"; import { logWarning } from "./workflow-logger.js"; // ─── Park ────────────────────────────────────────────────────────────────── @@ -77,9 +77,16 @@ export function unparkMilestone(basePath: string, milestoneId: string): boolean if (!mDir || !existsSync(mDir)) return false; const parkedPath = join(mDir, buildMilestoneFileName(milestoneId, "PARKED")); - if (!existsSync(parkedPath)) return false; // not parked + const hadParkedFile = existsSync(parkedPath); + const dbThinksParked = isDbAvailable() && getMilestone(milestoneId)?.status === "parked"; - unlinkSync(parkedPath); + // Recover the reverse desync too: DB can still say "parked" even when the + // PARKED marker was lost on disk, and /gsd unpark should repair that state. + if (!hadParkedFile && !dbThinksParked) return false; + + if (hadParkedFile) { + unlinkSync(parkedPath); + } // Sync DB status so deriveStateFromDb picks up the unparked milestone (#2694) if (isDbAvailable()) { try { diff --git a/src/resources/extensions/gsd/milestone-validation-gates.ts b/src/resources/extensions/gsd/milestone-validation-gates.ts index 4dcd522b6..994870c37 100644 --- a/src/resources/extensions/gsd/milestone-validation-gates.ts +++ b/src/resources/extensions/gsd/milestone-validation-gates.ts @@ -6,19 +6,13 @@ * records in the DB. This module inserts milestone-level validation gates * that correspond to the validation checks performed. * - * Gate IDs for milestone validation: - * MV01 — Success criteria checklist - * MV02 — Slice delivery audit - * MV03 — Cross-slice integration - * MV04 — Requirement coverage - * - * These use the existing quality_gates table with scope "milestone". + * Gate IDs for milestone validation (MV01–MV04) are sourced from the + * gate registry so the definitions stay in lockstep with prompt builders, + * dispatch rules, and state derivation. See gate-registry.ts. */ import { _getAdapter } from "./gsd-db.js"; - -/** Milestone validation gate IDs. */ -const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const; +import { getGatesForTurn } from "./gate-registry.js"; /** * Insert milestone-level quality_gates records for a validation run. @@ -27,6 +21,9 @@ const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const; * from the overall milestone validation verdict. Individual gate-level * verdicts are not available (the handler receives a single verdict), * so all gates share the overall verdict. + * + * Gate IDs come from the registry — adding/removing an MV-scoped gate + * in gate-registry.ts automatically flows through here. */ export function insertMilestoneValidationGates( milestoneId: string, @@ -38,8 +35,9 @@ export function insertMilestoneValidationGates( if (!db) return; const gateVerdict = verdict === "pass" ? "pass" : "flag"; + const milestoneGates = getGatesForTurn("validate-milestone"); - for (const gateId of MILESTONE_GATE_IDS) { + for (const def of milestoneGates) { db.prepare( `INSERT OR REPLACE INTO quality_gates (milestone_id, slice_id, gate_id, scope, task_id, status, verdict, rationale, findings, evaluated_at) @@ -47,9 +45,9 @@ export function insertMilestoneValidationGates( ).run({ ":mid": milestoneId, ":sid": sliceId, - ":gid": gateId, + ":gid": def.id, ":verdict": gateVerdict, - ":rationale": `Milestone validation verdict: ${verdict}`, + ":rationale": `${def.promptSection} — milestone validation verdict: ${verdict}`, ":evaluated_at": evaluatedAt, }); } diff --git a/src/resources/extensions/gsd/model-router.ts b/src/resources/extensions/gsd/model-router.ts index 0efbbf9b6..cc915877a 100644 --- a/src/resources/extensions/gsd/model-router.ts +++ b/src/resources/extensions/gsd/model-router.ts @@ -5,6 +5,9 @@ import type { ComplexityTier, ClassificationResult, TaskMetadata } from "./complexity-classifier.js"; import { tierOrdinal } from "./complexity-classifier.js"; import type { ResolvedModelConfig } from "./preferences.js"; +import { getProviderCapabilities, type ProviderCapabilities } from "@gsd/pi-ai"; +import { getToolCompatibility, getAllToolCompatibility } from "@gsd/pi-coding-agent"; +import type { ToolCompatibility } from "@gsd/pi-coding-agent"; // ─── Types ─────────────────────────────────────────────────────────────────── @@ -37,6 +40,8 @@ export interface RoutingDecision { selectionMethod: "tier-only" | "capability-scored"; /** Capability scores per eligible model (capability-scored path only) */ capabilityScores?: Record; + /** Tools filtered out due to provider incompatibility (ADR-005) */ + filteredTools?: string[]; /** Task requirement vector used for scoring */ taskRequirements?: Partial>; } @@ -58,7 +63,7 @@ export interface ModelCapabilities { // Maps known model IDs to their capability tier. Used when tier_models is not // explicitly configured to pick the best available model for each tier. -const MODEL_CAPABILITY_TIER: Record = { +export const MODEL_CAPABILITY_TIER: Record = { // Light-tier models (cheapest) "claude-haiku-4-5": "light", "claude-3-5-haiku-latest": "light", @@ -139,15 +144,49 @@ const MODEL_COST_PER_1K_INPUT: Record = { // model selection within an eligible tier set. export const MODEL_CAPABILITY_PROFILES: Record = { - "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 }, - "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, - "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, - "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 }, - "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 }, - "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 }, - "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, - "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 }, - "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 }, + // ── Anthropic ────────────────────────────────────────────────────────────── + "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 }, + "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, + "claude-sonnet-4-5-20250514": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, + "claude-3-5-sonnet-latest": { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 70, instruction: 82 }, + "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, + "claude-3-5-haiku-latest": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, + "claude-3-haiku-20240307": { coding: 50, debugging: 40, research: 35, reasoning: 40, speed: 95, longContext: 40, instruction: 65 }, + "claude-3-opus-latest": { coding: 90, debugging: 85, research: 82, reasoning: 90, speed: 35, longContext: 75, instruction: 88 }, + + // ── OpenAI GPT ───────────────────────────────────────────────────────────── + "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 }, + "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 }, + "gpt-4-turbo": { coding: 78, debugging: 72, research: 68, reasoning: 72, speed: 50, longContext: 65, instruction: 78 }, + "gpt-4.1": { coding: 82, debugging: 78, research: 72, reasoning: 78, speed: 62, longContext: 72, instruction: 82 }, + "gpt-4.1-mini": { coding: 58, debugging: 48, research: 42, reasoning: 48, speed: 88, longContext: 48, instruction: 72 }, + "gpt-4.1-nano": { coding: 40, debugging: 30, research: 25, reasoning: 30, speed: 95, longContext: 30, instruction: 60 }, + "gpt-5": { coding: 92, debugging: 88, research: 85, reasoning: 92, speed: 40, longContext: 85, instruction: 90 }, + "gpt-5-mini": { coding: 62, debugging: 52, research: 48, reasoning: 52, speed: 88, longContext: 52, instruction: 74 }, + "gpt-5-nano": { coding: 42, debugging: 32, research: 28, reasoning: 32, speed: 95, longContext: 32, instruction: 62 }, + "gpt-5-pro": { coding: 94, debugging: 90, research: 88, reasoning: 94, speed: 35, longContext: 88, instruction: 92 }, + "gpt-5.1": { coding: 93, debugging: 89, research: 86, reasoning: 93, speed: 42, longContext: 86, instruction: 91 }, + "gpt-5.1-codex-max": { coding: 90, debugging: 85, research: 70, reasoning: 85, speed: 55, longContext: 75, instruction: 85 }, + "gpt-5.1-codex-mini": { coding: 65, debugging: 55, research: 40, reasoning: 50, speed: 88, longContext: 48, instruction: 72 }, + "gpt-5.2": { coding: 93, debugging: 90, research: 87, reasoning: 93, speed: 42, longContext: 87, instruction: 91 }, + "gpt-5.2-codex": { coding: 93, debugging: 90, research: 72, reasoning: 88, speed: 50, longContext: 78, instruction: 88 }, + "gpt-5.3-codex": { coding: 94, debugging: 91, research: 74, reasoning: 89, speed: 50, longContext: 80, instruction: 89 }, + "gpt-5.3-codex-spark": { coding: 68, debugging: 58, research: 42, reasoning: 52, speed: 90, longContext: 50, instruction: 74 }, + "gpt-5.4": { coding: 95, debugging: 92, research: 88, reasoning: 94, speed: 42, longContext: 88, instruction: 92 }, + + // ── OpenAI o-series (reasoning-first) ────────────────────────────────────── + "o1": { coding: 78, debugging: 82, research: 78, reasoning: 90, speed: 20, longContext: 65, instruction: 82 }, + "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 }, + "o4-mini": { coding: 75, debugging: 80, research: 72, reasoning: 88, speed: 60, longContext: 65, instruction: 80 }, + "o4-mini-deep-research": { coding: 75, debugging: 80, research: 85, reasoning: 88, speed: 30, longContext: 80, instruction: 80 }, + + // ── Google ───────────────────────────────────────────────────────────────── + "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 }, + "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, + "gemini-flash-2.0": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, + + // ── DeepSeek ─────────────────────────────────────────────────────────────── + "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 }, }; // ─── Base Task Requirements Data Table ─────────────────────────────────────── @@ -502,3 +541,71 @@ function getModelCost(modelId: string): number { // Unknown cost — assume expensive to avoid routing to unknown cheap models return 999; } + +// ─── Tool Compatibility Filter (ADR-005 Phase 3) ─────────────────────────── + +/** + * Check if a tool is compatible with a provider's capabilities. + * Returns true if the tool can be used with the provider. + */ +export function isToolCompatibleWithProvider( + toolName: string, + providerCaps: ProviderCapabilities, +): boolean { + const compat = getToolCompatibility(toolName); + if (!compat) return true; // no metadata = always compatible + + // Hard filter: provider doesn't support image tool results + if (compat.producesImages && !providerCaps.imageToolResults) return false; + + // Hard filter: tool uses schema features provider doesn't support + if (compat.schemaFeatures?.some(f => providerCaps.unsupportedSchemaFeatures.includes(f))) { + return false; + } + + return true; +} + +/** + * Filter a list of tool names to only those compatible with a provider. + * Used by the routing pipeline to adjust tool sets when switching providers. + */ +export function filterToolsForProvider( + toolNames: string[], + providerApi: string, +): { compatible: string[]; filtered: string[] } { + const providerCaps = getProviderCapabilities(providerApi); + + // Provider doesn't support tool calling at all + if (!providerCaps.toolCalling) { + return { compatible: [], filtered: toolNames }; + } + + const compatible: string[] = []; + const filtered: string[] = []; + + for (const name of toolNames) { + if (isToolCompatibleWithProvider(name, providerCaps)) { + compatible.push(name); + } else { + filtered.push(name); + } + } + + return { compatible, filtered }; +} + +/** + * Adjust the active tool set for a selected model's provider capabilities. + * Returns tool names that should be active — removes incompatible tools. + * + * This is a hard filter only — it removes tools that would fail at the + * provider level. It does NOT remove tools based on soft heuristics. + */ +export function adjustToolSet( + activeToolNames: string[], + selectedModelApi: string, +): { toolNames: string[]; removedTools: string[] } { + const { compatible, filtered } = filterToolsForProvider(activeToolNames, selectedModelApi); + return { toolNames: compatible, removedTools: filtered }; +} diff --git a/src/resources/extensions/gsd/notification-overlay.ts b/src/resources/extensions/gsd/notification-overlay.ts index 1b5e3bec5..98d34785a 100644 --- a/src/resources/extensions/gsd/notification-overlay.ts +++ b/src/resources/extensions/gsd/notification-overlay.ts @@ -1,6 +1,6 @@ // GSD Extension — Notification History Overlay // Scrollable panel showing all persisted notifications with severity filtering. -// Toggled with Ctrl+Alt+N (⌃⌥N on macOS) or opened from /gsd notifications. +// Toggled with Ctrl+Alt+N (⌃⌥N on macOS), Ctrl+Shift+N fallback, or /gsd notifications. import type { Theme } from "@gsd/pi-coding-agent"; import { truncateToWidth, visibleWidth, matchesKey, Key } from "@gsd/pi-tui"; @@ -9,11 +9,12 @@ import { readNotifications, markAllRead, clearNotifications, - getUnreadCount, + onNotificationStoreChange, type NotificationEntry, type NotifySeverity, } from "./notification-store.js"; -import { padRight, centerLine, joinColumns, formatDuration } from "../shared/mod.js"; +import { formattedShortcutPair } from "./shortcut-defs.js"; +import { padRight, joinColumns } from "../shared/mod.js"; type FilterMode = "all" | "error" | "warning" | "info"; const FILTER_CYCLE: FilterMode[] = ["all", "error", "warning", "info"]; @@ -63,6 +64,12 @@ function formatTimestamp(ts: string): string { } } +function notificationSignature(entries: readonly NotificationEntry[]): string { + return entries + .map((entry) => `${entry.ts}|${entry.severity}|${entry.read ? 1 : 0}|${entry.message}`) + .join("\n"); +} + export class GSDNotificationOverlay { private tui: { requestRender: () => void }; private theme: Theme; @@ -72,9 +79,11 @@ export class GSDNotificationOverlay { private scrollOffset = 0; private filterIndex = 0; private entries: NotificationEntry[] = []; + private entriesSignature = ""; private refreshTimer: ReturnType; private disposed = false; private resizeHandler: (() => void) | null = null; + private unsubscribeStore: (() => void) | null = null; constructor( tui: { requestRender: () => void }, @@ -88,6 +97,7 @@ export class GSDNotificationOverlay { // Mark all as read on open markAllRead(); this.entries = readNotifications(); + this.entriesSignature = notificationSignature(this.entries); // Resize handler this.resizeHandler = () => { @@ -97,17 +107,17 @@ export class GSDNotificationOverlay { }; process.stdout.on("resize", this.resizeHandler); - // Refresh every 3s for new notifications + // Subscribe to store mutations for immediate updates + this.unsubscribeStore = onNotificationStoreChange(() => { + if (this.disposed) return; + this._refreshFromDisk(); + }); + + // 30s safety-net for cross-process edits (web subprocess, parallel workers) this.refreshTimer = setInterval(() => { if (this.disposed) return; - const fresh = readNotifications(); - if (fresh.length !== this.entries.length) { - this.entries = fresh; - markAllRead(); - this.invalidate(); - this.tui.requestRender(); - } - }, 3000); + this._refreshFromDisk(); + }, 30_000); } private get filter(): FilterMode { @@ -120,7 +130,12 @@ export class GSDNotificationOverlay { } handleInput(data: string): void { - if (matchesKey(data, Key.escape) || matchesKey(data, Key.ctrl("c")) || matchesKey(data, Key.ctrlAlt("n"))) { + if ( + matchesKey(data, Key.escape) || + matchesKey(data, Key.ctrl("c")) || + matchesKey(data, Key.ctrlAlt("n")) || + matchesKey(data, Key.ctrlShift("n")) + ) { this.dispose(); this.onClose(); return; @@ -165,6 +180,7 @@ export class GSDNotificationOverlay { if (data === "c") { clearNotifications(); this.entries = []; + this.entriesSignature = notificationSignature(this.entries); this.scrollOffset = 0; this.invalidate(); this.tui.requestRender(); @@ -199,12 +215,28 @@ export class GSDNotificationOverlay { dispose(): void { this.disposed = true; clearInterval(this.refreshTimer); + if (this.unsubscribeStore) { + this.unsubscribeStore(); + this.unsubscribeStore = null; + } if (this.resizeHandler) { process.stdout.removeListener("resize", this.resizeHandler); this.resizeHandler = null; } } + private _refreshFromDisk(): void { + const fresh = readNotifications(); + const signature = notificationSignature(fresh); + if (signature !== this.entriesSignature) { + markAllRead(); + this.entries = readNotifications(); + this.entriesSignature = notificationSignature(this.entries); + this.invalidate(); + this.tui.requestRender(); + } + } + private wrapInBox(inner: string[], width: number): string[] { const th = this.theme; const border = (s: string) => th.fg("borderAccent", s); @@ -250,7 +282,8 @@ export class GSDNotificationOverlay { lines.push(hr()); // Controls - lines.push(row(th.fg("dim", "↑/↓ scroll f filter c clear Esc close"))); + const closeShortcut = formattedShortcutPair("notifications"); + lines.push(row(th.fg("dim", `↑/↓ scroll f filter c clear Esc close (${closeShortcut})`))); lines.push(blank()); // Entries diff --git a/src/resources/extensions/gsd/notification-store.ts b/src/resources/extensions/gsd/notification-store.ts index d79d4a33c..36e6c6a7f 100644 --- a/src/resources/extensions/gsd/notification-store.ts +++ b/src/resources/extensions/gsd/notification-store.ts @@ -26,12 +26,16 @@ export interface NotificationEntry { const MAX_ENTRIES = 500; const FILENAME = "notifications.jsonl"; const LOCKFILE = "notifications.lock"; +const DEDUP_WINDOW_MS = 30_000; +const DEDUP_PRUNE_THRESHOLD = 200; // ─── Module State ─────────────────────────────────────────────────────── let _basePath: string | null = null; let _lineCount = 0; // Hint for rotation — not authoritative for public API let _suppressCount = 0; +let _recentMessageTimestamps = new Map(); +const _changeListeners = new Set<() => void>(); // ─── Public API ───────────────────────────────────────────────────────── @@ -40,6 +44,9 @@ let _suppressCount = 0; * project root. Seeds in-memory counters from the existing file on disk. */ export function initNotificationStore(basePath: string): void { + if (_basePath !== basePath) { + _recentMessageTimestamps.clear(); + } _basePath = basePath; // Seed line count hint for rotation — public counters read from disk _lineCount = _readEntriesFromDisk(basePath).length; @@ -56,12 +63,23 @@ export function appendNotification( ): void { if (!_basePath) return; if (_suppressCount > 0) return; + const persistedMessage = message.length > 500 ? message.slice(0, 500) + "…" : message; + const dedupKey = `${_basePath}:${severity}:${source}:${persistedMessage}`; + const now = Date.now(); + const lastSeen = _recentMessageTimestamps.get(dedupKey); + if (lastSeen !== undefined && now - lastSeen < DEDUP_WINDOW_MS) return; + _recentMessageTimestamps.set(dedupKey, now); + if (_recentMessageTimestamps.size > DEDUP_PRUNE_THRESHOLD) { + for (const [key, ts] of _recentMessageTimestamps) { + if (now - ts > DEDUP_WINDOW_MS) _recentMessageTimestamps.delete(key); + } + } const entry: NotificationEntry = { id: randomUUID(), ts: new Date().toISOString(), severity, - message: message.length > 500 ? message.slice(0, 500) + "…" : message, + message: persistedMessage, source, read: false, }; @@ -76,6 +94,7 @@ export function appendNotification( if (_lineCount > MAX_ENTRIES) { _rotate(); } + _emitChange(); } catch { // Non-fatal — never let persistence break the caller } @@ -104,6 +123,7 @@ export function markAllRead(basePath?: string): void { const hasUnread = entries.some((e) => !e.read); if (!hasUnread) return; + let changed = false; try { _withLock(bp, () => { // Re-read inside lock to get freshest state @@ -111,10 +131,12 @@ export function markAllRead(basePath?: string): void { if (fresh.length === 0 || !fresh.some((e) => !e.read)) return; const lines = fresh.map((e) => JSON.stringify({ ...e, read: true })); _atomicWrite(bp, lines.join("\n") + "\n"); + changed = true; }); } catch { // Non-fatal } + if (changed) _emitChange(); } /** @@ -128,6 +150,8 @@ export function clearNotifications(basePath?: string): void { _withLock(bp, () => { _atomicWrite(bp, ""); }); + _lineCount = 0; + _emitChange(); } catch { // Non-fatal } @@ -172,6 +196,17 @@ export function unsuppressPersistence(): void { _suppressCount = Math.max(0, _suppressCount - 1); } +/** + * Subscribe to notification-store mutations (append, mark-read, clear). + * Returns an unsubscribe function. + */ +export function onNotificationStoreChange(listener: () => void): () => void { + _changeListeners.add(listener); + return () => { + _changeListeners.delete(listener); + }; +} + // ─── Test Helpers ─────────────────────────────────────────────────────── /** @@ -181,6 +216,8 @@ export function _resetNotificationStore(): void { _basePath = null; _lineCount = 0; _suppressCount = 0; + _recentMessageTimestamps = new Map(); + _changeListeners.clear(); } // ─── Internal ─────────────────────────────────────────────────────────── @@ -216,12 +253,23 @@ function _rotate(): void { const trimmed = entries.slice(entries.length - MAX_ENTRIES); const lines = trimmed.map((e) => JSON.stringify(e)); _atomicWrite(_basePath!, lines.join("\n") + "\n"); + _lineCount = trimmed.length; }); } catch { // Non-fatal } } +function _emitChange(): void { + for (const listener of _changeListeners) { + try { + listener(); + } catch { + // Non-fatal + } + } +} + /** * Atomic file rewrite via temp-file + rename. Prevents partial reads * by other processes (web API subprocess, parallel workers). @@ -275,10 +323,11 @@ function _withLock(basePath: string, fn: () => T): T { } } - // Only run the mutation if we actually own the lock - const ownsLock = fd !== null; + // Best-effort: mutation runs regardless of lock status (idempotent overwrites). + // createdLock gates cleanup only — never skip fn() on lock failure. + const createdLock = fd !== null; try { - if (ownsLock && fd !== null) { + if (createdLock && fd !== null) { // Write our PID timestamp into the lock for stale detection writeFileSync(lockPath, String(Date.now()), "utf-8"); closeSync(fd); @@ -286,7 +335,7 @@ function _withLock(basePath: string, fn: () => T): T { return fn(); } finally { // Only delete the lock if we created it — never remove another process's lock - if (ownsLock) { + if (createdLock) { try { unlinkSync(lockPath); } catch { /* best-effort cleanup */ } } } diff --git a/src/resources/extensions/gsd/notification-widget.ts b/src/resources/extensions/gsd/notification-widget.ts index 8a963be5e..a4ad968a6 100644 --- a/src/resources/extensions/gsd/notification-widget.ts +++ b/src/resources/extensions/gsd/notification-widget.ts @@ -5,8 +5,8 @@ import type { ExtensionContext } from "@gsd/pi-coding-agent"; -import { getUnreadCount, readNotifications } from "./notification-store.js"; -import { formatShortcut } from "./files.js"; +import { getUnreadCount, onNotificationStoreChange } from "./notification-store.js"; +import { formattedShortcutPair } from "./shortcut-defs.js"; // ─── Pure rendering ──���────────────────────────���───────────────────────── @@ -14,18 +14,7 @@ export function buildNotificationWidgetLines(): string[] { const unread = getUnreadCount(); if (unread === 0) return []; - const entries = readNotifications(); - const latest = entries[0]; // newest-first - if (!latest) return []; - - const icon = latest.severity === "error" ? "✗" : latest.severity === "warning" ? "⚠" : "●"; - const badge = `${unread} unread`; - const msgMax = 80; - const truncated = latest.message.length > msgMax - ? latest.message.slice(0, msgMax - 1) + "…" - : latest.message; - - return [` ${icon} [${badge}] ${truncated} (${formatShortcut("Ctrl+Alt+N")} to view)`]; + return [` 🔔 Notifications: ${unread} unread (${formattedShortcutPair("notifications")})`]; } // ─── Widget init ──────────────────────────────────────────────────────── @@ -51,6 +40,7 @@ export function initNotificationWidget(ctx: ExtensionContext): void { _tui.requestRender(); }; + const unsubscribe = onNotificationStoreChange(refresh); const refreshTimer = setInterval(refresh, REFRESH_INTERVAL_MS); return { @@ -62,6 +52,7 @@ export function initNotificationWidget(ctx: ExtensionContext): void { cachedLines = undefined; }, dispose(): void { + unsubscribe(); clearInterval(refreshTimer); }, }; diff --git a/src/resources/extensions/gsd/parallel-monitor-overlay.ts b/src/resources/extensions/gsd/parallel-monitor-overlay.ts index d56623621..4d49872b2 100644 --- a/src/resources/extensions/gsd/parallel-monitor-overlay.ts +++ b/src/resources/extensions/gsd/parallel-monitor-overlay.ts @@ -2,7 +2,8 @@ * GSD Parallel Monitor Overlay * * Full-screen TUI overlay showing real-time parallel worker progress. - * Opened via `/gsd parallel watch` or Ctrl+Alt+P (⌃⌥P on macOS). + * Opened via `/gsd parallel watch`, Ctrl+Alt+P (⌃⌥P on macOS), + * or Ctrl+Shift+P fallback. * Reads the same data sources as `scripts/parallel-monitor.mjs` but * renders as a native pi-tui overlay with theme integration. */ @@ -15,6 +16,7 @@ import type { Theme } from "@gsd/pi-coding-agent"; import { truncateToWidth, visibleWidth, matchesKey, Key } from "@gsd/pi-tui"; import { formatDuration, STATUS_GLYPH, STATUS_COLOR } from "../shared/mod.js"; +import { formattedShortcutPair } from "./shortcut-defs.js"; // ─── Types ──────────────────────────────────────────────────────────────── @@ -347,7 +349,12 @@ export class ParallelMonitorOverlay { } handleInput(data: string): void { - if (matchesKey(data, Key.escape) || data === "q") { + if ( + matchesKey(data, Key.escape) || + matchesKey(data, Key.ctrlAlt("p")) || + matchesKey(data, Key.ctrlShift("p")) || + data === "q" + ) { this.dispose(); this.onClose(); return; @@ -486,7 +493,7 @@ export class ParallelMonitorOverlay { } lines.push(` ${t.bold("Total: $" + this.workers.reduce((s, wk) => s + wk.cost, 0).toFixed(2))}`); } - lines.push(t.fg("muted", " ESC/q to close │ ↑↓ scroll")); + lines.push(t.fg("muted", ` ESC/q/${formattedShortcutPair("parallel")} close │ ↑↓ scroll`)); // Apply scroll — use terminal rows as height estimate const termHeight = process.stdout.rows || 40; diff --git a/src/resources/extensions/gsd/pre-execution-checks.ts b/src/resources/extensions/gsd/pre-execution-checks.ts index ed10ba50b..5e77bca85 100644 --- a/src/resources/extensions/gsd/pre-execution-checks.ts +++ b/src/resources/extensions/gsd/pre-execution-checks.ts @@ -20,6 +20,8 @@ import { resolve } from "node:path"; import type { TaskRow } from "./gsd-db.ts"; import type { PreExecutionCheckJSON } from "./verification-evidence.ts"; +const NPM_COMMAND = process.platform === "win32" ? "npm.cmd" : "npm"; + // ─── Result Types ──────────────────────────────────────────────────────────── export interface PreExecutionResult { @@ -126,9 +128,10 @@ async function checkPackageOnNpm( timeoutMs = 5000 ): Promise<{ exists: boolean; error?: string }> { return new Promise((resolve) => { - const child = spawn("npm", ["view", packageName, "name"], { + const child = spawn(NPM_COMMAND, ["view", packageName, "name"], { stdio: ["ignore", "pipe", "pipe"], timeout: timeoutMs, + shell: process.platform === "win32", }); let stdout = ""; @@ -263,9 +266,9 @@ function extractPathFromAnnotation(raw: string): string { const trimmed = raw.trim(); if (!trimmed) return trimmed; - const backtickMatch = trimmed.match(/^`([^`]+)`(?:\s+[—–-]\s+.*)?$/); + const backtickMatch = trimmed.match(/^(`+)([^`]+)\1(?:(?:\s+[—–-]\s+.+)|(?:\s+\([^()]+\)))?$/); if (backtickMatch) { - return backtickMatch[1].trim(); + return backtickMatch[2].trim(); } const annotatedMatch = trimmed.match(/^(.+?)\s+[—–-]\s+.+$/); @@ -277,6 +280,38 @@ function extractPathFromAnnotation(raw: string): string { return trimmed.replace(/`/g, ""); } +/** + * Planning units sometimes use task.inputs for prose like "Current enum shape" + * instead of concrete file paths. Those entries should not fail path checks. + * Keep validation for anything that still looks like a real file reference: + * explicit backticks, globs, separators, dot-paths, or single-token basenames + * like Dockerfile. + */ +function shouldValidateInputAsPath(raw: string): boolean { + const trimmed = raw.trim(); + if (!trimmed) return false; + + if (/^`+[^`]+`+/.test(trimmed)) { + return true; + } + + const candidate = extractPathFromAnnotation(trimmed); + if (!candidate) return false; + + if (!/\s/.test(candidate)) { + return true; + } + + return ( + candidate.startsWith("/") || + candidate.startsWith("./") || + candidate.startsWith("../") || + candidate.startsWith("~/") || + /[\\/]/.test(candidate) || + /[*?[\]{}]/.test(candidate) + ); +} + /** * Build a set of files that will be created by tasks up to (but not including) taskIndex. * All paths are normalized for consistent comparison. @@ -315,6 +350,7 @@ export function checkFilePathConsistency( for (const file of filesToCheck) { // Skip empty strings if (!file.trim()) continue; + if (!shouldValidateInputAsPath(file)) continue; // Normalize path for consistent comparison const normalizedFile = normalizeFilePath(file); @@ -351,7 +387,7 @@ export function checkFilePathConsistency( */ export function checkTaskOrdering( tasks: TaskRow[], - _basePath: string + basePath: string ): PreExecutionCheckJSON[] { const results: PreExecutionCheckJSON[] = []; @@ -375,9 +411,13 @@ export function checkTaskOrdering( const filesToCheck = [...task.inputs]; for (const file of filesToCheck) { + if (!shouldValidateInputAsPath(file)) continue; + const normalizedFile = normalizeFilePath(file); const creator = fileCreators.get(normalizedFile); - if (creator && creator.index > i) { + const absolutePath = resolve(basePath, normalizedFile); + const existsOnDisk = existsSync(absolutePath); + if (creator && creator.index > i && !existsOnDisk) { // Task reads file that is created later — impossible ordering results.push({ category: "file", diff --git a/src/resources/extensions/gsd/preferences-skills.ts b/src/resources/extensions/gsd/preferences-skills.ts index d930ba0b4..30b286d4c 100644 --- a/src/resources/extensions/gsd/preferences-skills.ts +++ b/src/resources/extensions/gsd/preferences-skills.ts @@ -17,7 +17,6 @@ import type { SkillResolutionReport, } from "./preferences-types.js"; import { validatePreferences } from "./preferences-validation.js"; -import { loadEffectiveGSDPreferences } from "./preferences.js"; // Re-export types so existing consumers of ./preferences-skills.js keep working export type { GSDSkillRule, SkillDiscoveryMode, SkillResolution, SkillResolutionReport } from "./preferences-types.js"; @@ -143,38 +142,5 @@ export function resolveAllSkillReferences(preferences: GSDPreferences, cwd: stri return { resolutions, warnings }; } -/** - * Format a skill reference for the system prompt. - * If resolved, shows the path so the agent knows exactly where to read. - * If unresolved, marks it clearly. - */ -export function formatSkillRef(ref: string, resolutions: Map): string { - const resolution = resolutions.get(ref); - if (!resolution || resolution.method === "unresolved") { - return `${ref} (⚠ not found — check skill name or path)`; - } - // For absolute paths where SKILL.md is just appended, don't clutter the output - if (resolution.method === "absolute-path" || resolution.method === "absolute-dir") { - return ref; - } - // For bare names resolved from skill directories, show the resolved path - return `${ref} → \`${resolution.resolvedPath}\``; -} - -/** - * Resolve the skill discovery mode from effective preferences. - * Defaults to "suggest" -- skills are identified during research but not installed automatically. - */ -export function resolveSkillDiscoveryMode(): SkillDiscoveryMode { - const prefs = loadEffectiveGSDPreferences(); - return prefs?.preferences.skill_discovery ?? "suggest"; -} - -/** - * Resolve the skill staleness threshold in days. - * Returns 0 if disabled, default 60 if not configured. - */ -export function resolveSkillStalenessDays(): number { - const prefs = loadEffectiveGSDPreferences(); - return prefs?.preferences.skill_staleness_days ?? 60; -} +// resolveSkillDiscoveryMode and resolveSkillStalenessDays moved to +// preferences.ts to break circular dependency (they need loadEffectiveGSDPreferences). diff --git a/src/resources/extensions/gsd/preferences-types.ts b/src/resources/extensions/gsd/preferences-types.ts index 58b847cc9..75aac4a0c 100644 --- a/src/resources/extensions/gsd/preferences-types.ts +++ b/src/resources/extensions/gsd/preferences-types.ts @@ -20,7 +20,7 @@ import type { ReactiveExecutionConfig, GateEvaluationConfig, } from "./types.js"; -import type { DynamicRoutingConfig } from "./model-router.js"; +import type { DynamicRoutingConfig, ModelCapabilities } from "./model-router.js"; export interface ContextManagementConfig { observation_masking?: boolean; // default: true @@ -255,6 +255,8 @@ export interface GSDPreferences { post_unit_hooks?: PostUnitHookConfig[]; pre_dispatch_hooks?: PreDispatchHookConfig[]; dynamic_routing?: DynamicRoutingConfig; + /** Per-model capability overrides. Deep-merged with built-in profiles for capability-aware routing (ADR-004). */ + modelOverrides?: Record }>; context_management?: ContextManagementConfig; token_profile?: TokenProfile; phases?: PhaseSkipPreferences; @@ -382,3 +384,19 @@ export interface SkillResolutionReport { /** References that could not be resolved. */ warnings: string[]; } + +/** + * Format a skill reference for the system prompt. + * If resolved, shows the path so the agent knows exactly where to read. + * If unresolved, marks it clearly. + */ +export function formatSkillRef(ref: string, resolutions: Map): string { + const resolution = resolutions.get(ref); + if (!resolution || resolution.method === "unresolved") { + return `${ref} (⚠ not found — check skill name or path)`; + } + if (resolution.method === "absolute-path" || resolution.method === "absolute-dir") { + return ref; + } + return `${ref} → \`${resolution.resolvedPath}\``; +} diff --git a/src/resources/extensions/gsd/preferences.ts b/src/resources/extensions/gsd/preferences.ts index a2c86fdbd..7a7ac6751 100644 --- a/src/resources/extensions/gsd/preferences.ts +++ b/src/resources/extensions/gsd/preferences.ts @@ -29,9 +29,10 @@ import { type GSDPreferences, type LoadedGSDPreferences, type SkillResolution, + type SkillDiscoveryMode, + formatSkillRef, } from "./preferences-types.js"; import { validatePreferences } from "./preferences-validation.js"; -import { formatSkillRef } from "./preferences-skills.js"; // ─── Re-exports: types ────────────────────────────────────────────────────── // Every type/interface that was previously exported from this file is @@ -60,11 +61,20 @@ export type { export { validatePreferences } from "./preferences-validation.js"; // ─── Re-exports: skills ───────────────────────────────────────────────────── -export { - resolveAllSkillReferences, - resolveSkillDiscoveryMode, - resolveSkillStalenessDays, -} from "./preferences-skills.js"; +export { resolveAllSkillReferences } from "./preferences-skills.js"; + +// These lived in preferences-skills.ts but imported loadEffectiveGSDPreferences +// back from this file, creating a circular dependency. Moved here since they +// are trivial wrappers over loadEffectiveGSDPreferences. +export function resolveSkillDiscoveryMode(): SkillDiscoveryMode { + const prefs = loadEffectiveGSDPreferences(); + return prefs?.preferences.skill_discovery ?? "suggest"; +} + +export function resolveSkillStalenessDays(): number { + const prefs = loadEffectiveGSDPreferences(); + return prefs?.preferences.skill_staleness_days ?? 60; +} // ─── Re-exports: models ───────────────────────────────────────────────────── export { @@ -389,6 +399,9 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr github: (base.github || override.github) ? { ...(base.github ?? {}), ...(override.github ?? {}) } as import("../github-sync/types.js").GitHubSyncConfig : undefined, + experimental: (base.experimental || override.experimental) + ? { ...(base.experimental ?? {}), ...(override.experimental ?? {}) } + : undefined, service_tier: override.service_tier ?? base.service_tier, forensics_dedup: override.forensics_dedup ?? base.forensics_dedup, show_token_cost: override.show_token_cost ?? base.show_token_cost, diff --git a/src/resources/extensions/gsd/prompt-loader.ts b/src/resources/extensions/gsd/prompt-loader.ts index aa01d583a..f3f75b76d 100644 --- a/src/resources/extensions/gsd/prompt-loader.ts +++ b/src/resources/extensions/gsd/prompt-loader.ts @@ -143,10 +143,15 @@ export function loadPrompt(name: string, vars: Record = {}): str } for (const [key, value] of Object.entries(effectiveVars)) { + const safeValue = + key === "workingDirectory" && typeof value === "string" + ? value.replaceAll("\\", "/") + : value; + // Use split/join instead of replaceAll to avoid JavaScript's special // replacement patterns ($', $`, $&) being interpreted in the value. // See: https://github.com/gsd-build/gsd-2/issues/2968 - content = content.split(`{{${key}}}`).join(value); + content = content.split(`{{${key}}}`).join(safeValue); } return content.trim(); diff --git a/src/resources/extensions/gsd/prompt-validation.ts b/src/resources/extensions/gsd/prompt-validation.ts index df2463a98..0163b88a6 100644 --- a/src/resources/extensions/gsd/prompt-validation.ts +++ b/src/resources/extensions/gsd/prompt-validation.ts @@ -1,10 +1,16 @@ /** - * GSD Prompt Validation — Validates enhanced context output before writing. + * GSD Prompt Validation — Validates enhanced context and turn output + * artifacts before writing. * - * Implements R109 validation requirement: CONTEXT.md must have required sections - * before being written to disk. + * Implements R109 validation requirement: CONTEXT.md must have required + * sections before being written to disk. Additionally, per-turn validators + * check that artifacts produced by gate-owning turns contain the gate + * sections declared in gate-registry.ts, so a malformed summary/validation + * markdown file cannot silently drop a quality gate. */ +import { getGatesForTurn, type OwnerTurn } from "./gate-registry.js"; + /** * Result of validating enhanced context output. */ @@ -86,3 +92,66 @@ export function validateEnhancedContext(content: string): ValidationResult { missing, }; } + +// ─── Per-Turn Gate Section Validators ───────────────────────────────────── +// +// Each validator checks that the artifact written by a turn contains a +// heading for every gate owned by that turn. The registry is the source +// of truth for which sections must exist; adding a new gate automatically +// flows through via `getGatesForTurn(turn)`. + +/** + * Escape a string so it can be embedded safely inside a regular expression. + */ +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +/** + * Validate that an artifact contains an `## H2` heading for every gate the + * named turn owns. Returns the list of missing gate section headers. + * + * Soft rule: a section counts as "present" if it is declared (H2 heading + * exists) — empty-body sections are allowed and handled by the tool + * handler, which will record such gates as `omitted`. + */ +export function validateGateSections( + content: string, + turn: OwnerTurn, +): ValidationResult { + const missing: string[] = []; + for (const def of getGatesForTurn(turn)) { + const pattern = new RegExp(`^##\\s+${escapeRegExp(def.promptSection)}\\b`, "m"); + if (!pattern.test(content)) { + missing.push(`${def.id} (## ${def.promptSection})`); + } + } + return { valid: missing.length === 0, missing }; +} + +/** + * Validate a SUMMARY.md produced by the complete-slice turn. Requires + * an H2 heading for every gate owned by complete-slice (e.g. Q8 → + * "## Operational Readiness"). Intended for use in the tool handler's + * pre-write checks or in the post-unit validation sweep. + */ +export function validateSliceSummaryOutput(content: string): ValidationResult { + return validateGateSections(content, "complete-slice"); +} + +/** + * Validate a task SUMMARY.md produced by the execute-task turn. Only + * flags gates that are still pending for the task; skips the check + * when no rows are seeded (simple task). + */ +export function validateTaskSummaryOutput(content: string): ValidationResult { + return validateGateSections(content, "execute-task"); +} + +/** + * Validate a VALIDATION.md produced by the validate-milestone turn. + * Requires an H2 heading for every MV gate declared in the registry. + */ +export function validateMilestoneValidationOutput(content: string): ValidationResult { + return validateGateSections(content, "validate-milestone"); +} diff --git a/src/resources/extensions/gsd/prompts/complete-slice.md b/src/resources/extensions/gsd/prompts/complete-slice.md index 86c271298..100f8efd7 100644 --- a/src/resources/extensions/gsd/prompts/complete-slice.md +++ b/src/resources/extensions/gsd/prompts/complete-slice.md @@ -16,14 +16,16 @@ All relevant context has been preloaded below — the slice plan, all task summa {{inlinedContext}} +{{gatesToClose}} + **Match effort to complexity.** A simple slice with 1-2 tasks needs a brief summary and lightweight verification. A complex slice with 5 tasks across multiple subsystems needs thorough verification and a detailed summary. Scale the work below accordingly. Then: 1. Use the **Slice Summary** and **UAT** output templates from the inlined context above 2. {{skillActivation}} -3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first. +3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first. Task artifacts use a **flat file layout** directly inside `tasks/` (for example `T01-SUMMARY.md`, `T02-SUMMARY.md`) rather than per-task subdirectories. If you need to count or re-read task summaries during verification, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` or `ls .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks/*-SUMMARY.md`. Never use `tasks/*/SUMMARY.md` — that glob expects subdirectories that do not exist. 4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections. -5. If the slice involved runtime behavior, fill the **Operational Readiness** section (Q8) in the slice summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit entirely for simple slices with no runtime concerns. +5. Address every gate listed in the **Gates to Close** section above — each gate maps to a specific slice-summary section the handler inspects (for example, Q8 maps to **Operational Readiness**: health signal, failure signal, recovery procedure, and monitoring gaps). Leaving a section empty records the gate as `omitted`. 6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_requirement_update` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database. 7. Prepare the slice completion content you will pass to `gsd_complete_slice` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts. 8. Draft the UAT content you will pass as `uatContent` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built. @@ -35,7 +37,7 @@ Then: **Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the slice summary. If a decision genuinely requires human input, note it in the summary and proceed with the best available option. -**File system safety:** Task summaries are preloaded in the inlined context above. If you need to re-read any of them, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` to list file paths first — never pass `{{slicePath}}` or any other directory path directly to the `read` tool. The `read` tool only accepts file paths, not directories. +**File system safety:** Task summaries are preloaded in the inlined context above. Task artifacts use a **flat file layout** — files such as `T01-SUMMARY.md` and `T02-SUMMARY.md` live directly inside the `tasks/` directory, not inside per-task subdirectories like `tasks/T01/SUMMARY.md`. If you need to re-read any of them, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` to list file paths first. Never use `tasks/*/SUMMARY.md`, and never pass `{{slicePath}}` or any other directory path directly to the `read` tool. The `read` tool only accepts file paths, not directories. **You MUST call `gsd_complete_slice` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.** diff --git a/src/resources/extensions/gsd/prompts/discuss-prepared.md b/src/resources/extensions/gsd/prompts/discuss-prepared.md deleted file mode 100644 index 92a232f7b..000000000 --- a/src/resources/extensions/gsd/prompts/discuss-prepared.md +++ /dev/null @@ -1,424 +0,0 @@ -{{preamble}} - -You are conducting a **prepared discussion** — the system has already analyzed the codebase, gathered prior context, and researched the ecosystem. Your job is to present these findings, make recommendations, and gather the user's input through a structured 4-layer protocol. - -## Preparation Briefs - -The following briefs were generated during the preparation phase. Use them to ground your recommendations. - -### Codebase Brief - -{{codebaseBrief}} - -### Prior Context Brief - -{{priorContextBrief}} - -### Ecosystem Brief - -{{ecosystemBrief}} - ---- - -## 4-Layer Discussion Protocol - -This discussion proceeds through four mandatory layers. At each layer: -1. **Present findings** — share what the preparation revealed -2. **Make a recommendation** — take a position based on the evidence -3. **Ask clarifying questions** — fill gaps the preparation couldn't answer -4. **Gate** — use `ask_user_questions` to get explicit sign-off before advancing - -**Do NOT skip layers.** Each layer builds on the previous. The user must explicitly approve each layer before you proceed. - ---- - -## Depth Adaptation - -The depth of questioning at each layer should match THIS milestone's work type. Do not apply a fixed checklist — reason from first principles about what matters for this specific work. - -**Work-type reasoning:** -- **API/service work** — Focus Layer 2 questions on contracts, versioning, backwards compatibility, authentication boundaries. Layer 3 must cover rate limiting, timeout cascades, and partial failure states. -- **CLI/developer tools** — Focus Layer 1 on user mental model and command grammar. Layer 4 needs shell compatibility, error message clarity, and exit code semantics. -- **ML/data pipelines** — Focus Layer 2 on data flow, reproducibility, and intermediate state. Layer 3 must cover data corruption, training divergence, and checkpoint recovery. -- **UI/frontend work** — Focus Layer 2 on component boundaries and state management. Layer 3 needs loading states, optimistic updates, and offline behavior. Layer 4 must include visual regression criteria. -- **Infrastructure/platform** — Focus Layer 2 on deployment topology and failure domains. Layer 3 must cover cascading failures, resource exhaustion, and rollback paths. -- **Refactoring/migration** — Focus Layer 1 on what changes vs what must stay identical. Layer 4 needs behavioral equivalence tests, not just code coverage. - -**Adaptation principle:** Ask "What would cause this milestone to fail silently or succeed incorrectly?" The answer shapes which questions deserve deep exploration vs quick confirmation. - ---- - -## Layer 1 — Scope (What are we building?) - -### Identify Work Type - -**Before presenting findings, identify the primary work type and state it explicitly:** - -"Based on [user's request and codebase analysis], this milestone is primarily **[work type]** work (e.g., API/backend, UI/frontend, CLI tool, data pipeline, simulation, infrastructure)." - -This classification determines the depth and focus of questioning at each layer. If the work type spans multiple categories, state the dominant type and note the secondary types. The user can correct this classification. - -### Present Findings - -Start by presenting what you learned from the preparation: - -1. **From the Codebase Brief:** Summarize the technology stack, key modules, and established patterns. Call out anything that constrains or enables the proposed work. - -2. **From the Prior Context Brief:** Surface existing decisions, requirements, and knowledge that are relevant. Note any prior commitments or constraints. - -3. **Scope implications:** Based on the above, explain what scope makes sense and what would conflict with the existing codebase. - -### Make a Recommendation - -Take a clear position: "Based on [specific findings], I recommend the milestone scope as [concrete description]." - -Include: -- What the milestone will deliver (user-visible outcome) -- What it explicitly excludes (to prevent scope creep) -- Rough size estimate (number of slices, complexity) - -### Resolve Scope — Mandatory Rounds - -After presenting your recommendation, you MUST complete these rounds in order. Each round uses `ask_user_questions` or direct questions. Do NOT skip rounds. Do NOT combine rounds. Do NOT jump to the Layer 1 Gate until all rounds are complete. - -**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment. - -**Round 1 — Feature boundaries:** -For each feature in your recommendation, state what it includes and excludes. Ask the user to confirm or adjust each boundary. Example: "Signup — I'm including email/password registration. I'm excluding OAuth, email verification, and phone number signup. Correct?" - -**Round 2 — Ambiguity resolution:** -Identify every term or concept in the scope that could be interpreted multiple ways. For each one, state the two most likely interpretations and ask which the user intends. Example: "'User authentication' — do you mean just login/signup, or also session management, token refresh, and logout?" - -**Round 3 — Dependencies and constraints:** -Ask about external dependencies (APIs, services, databases), existing code that will be affected, and constraints the user hasn't mentioned. Reference specific findings from the codebase brief. Example: "Your db.ts already has a getUser() function — should signup create users compatible with this existing model?" - -**Round 4 — Priority and ordering:** -If the scope has multiple features, ask the user to rank them by priority. Ask what's the minimum viable version if the milestone needs to be cut short. Example: "If we had to ship with only 2 of the 3 slices, which two matter most?" - -After completing all 4 rounds, proceed to the Layer 1 Gate. - -### Layer 1 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer1_scope_gate`: - -``` -Header: "Scope Gate" -Question: "Does this scope capture what you want to build?" -Options: - - "Yes, scope is correct (Recommended)" — proceed to Layer 2 - - "Needs adjustment" — user will clarify, then re-present scope -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 2 until the user explicitly approves the scope. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. "Tool not responding, I'll proceed," "auth issues," or "I'll use my recommended scope" are all **forbidden**. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Ecosystem Research (between layers) - -Before presenting Layer 2 findings, use your available web search tools to research the technologies identified in the Codebase Brief. For each major technology (framework, ORM, key library): - -1. Search for "[technology] [version] best practices [current year]" -2. Search for "[technology] [version] known issues" - -Summarize findings concisely. If search tools fail or are unavailable, note this and proceed using your training knowledge — but do NOT use a search failure as justification to skip any gate. - -Present ecosystem findings at the start of Layer 2 alongside your architecture recommendation. - ---- - -## Layer 2 — Architecture (How will it work?) - -### Present Findings - -Now present architectural recommendations grounded in evidence: - -1. **From the Ecosystem Brief:** Share relevant best practices, known issues, library recommendations, and integration patterns discovered during research. - -2. **From the Codebase Brief:** Identify existing architectural patterns that should be followed or deliberately broken from. - -3. **Synthesis:** Explain how the ecosystem research applies to this specific codebase context. - -### Make a Recommendation - -Take a clear position: "I'd suggest [approach] because [evidence-based rationale]." - -Cover: -- Overall architectural approach (new module? extend existing? separate service?) -- Key technical decisions (which libraries, patterns, data flow) -- Integration points with existing code -- What you'd avoid and why - -### Resolve Architecture — Mandatory Rounds - -After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds. Do NOT jump to the Layer 2 Gate until all rounds are complete. - -**Complexity calibration:** If the milestone is simple (1-2 slices, well-understood patterns, no ambiguity), you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. For complex milestones (3+ slices, novel architecture, significant ambiguity), give each round full treatment. - -**Round 1 — Per-slice technical decisions:** -For each slice in your decomposition, state the specific technical approach. Ask the user to confirm or adjust. Don't just say "build the signup endpoint" — state which library handles password hashing, where the route file lives, what the request/response schema looks like. - -**Round 2 — Inter-slice contracts:** -For each dependency between slices, state explicitly what the upstream slice produces and what the downstream slice expects. Ask the user to confirm the interface. Example: "S01 produces a User model with {id, email, hashedPassword}. S02's login endpoint will query by email and compare password. Does this contract work?" - -**Round 3 — Library and pattern decisions:** -For each library or pattern choice, present at least one alternative with tradeoffs. Ask the user to confirm. Example: "bcrypt vs argon2 for password hashing — bcrypt is more common in Node, argon2 is newer and more resistant to GPU attacks. I recommend bcrypt for simplicity. Agree?" - -**Round 4 — Integration with existing code:** -Walk through how the new code connects to existing files and patterns. Ask about anything that might conflict. Reference specific files from the codebase brief. Example: "The new auth routes will mount at /api/auth alongside your existing /api router in routes.ts. Should they share the same router file or get their own auth-routes.ts?" - -After completing all 4 rounds, proceed to the Layer 2 Gate. - -### Layer 2 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer2_architecture_gate`: - -``` -Header: "Architecture Gate" -Question: "Ready to move to error handling, or want to adjust the architecture?" -Options: - - "Architecture looks good (Recommended)" — proceed to Layer 3 - - "Want to adjust" — user will clarify, then re-present architecture -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 3 until the user explicitly approves the architecture. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Layer 3 — Error States (What can go wrong?) - -### Present Findings - -Identify failure modes based on the scope and architecture: - -1. **From the Ecosystem Brief:** Known issues, common pitfalls, edge cases that trip up similar implementations. - -2. **From the Architecture:** Failure points at integration boundaries, async operations, external dependencies, user input handling. - -3. **From the Codebase Brief:** How existing code handles errors — patterns to follow, gaps to fill. - -### Make a Recommendation - -Take a clear position: "The critical error paths are [X, Y, Z]. I recommend handling them by [approach]." - -Cover: -- **Must-handle errors:** Failures that would break the user experience or corrupt data -- **Should-handle errors:** Degraded experiences that are acceptable with good messaging -- **Edge cases:** Boundary conditions, malformed input, timing issues -- **Recovery strategy:** Retry logic, fallback behavior, user notification - -### Resolve Error Handling — Mandatory Rounds - -After presenting your recommendation, ask the user: - -**"Do you want to go deep on error handling, or accept the defaults I recommended?"** - -Use `ask_user_questions` with options: "Go deep" / "Accept defaults" - -If they accept defaults, record your recommendations as decisions and proceed to the Layer 3 Gate. - -If they want to go deep, complete these rounds: - -**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic. You may NOT skip rounds entirely. - -**Round 1 — Input validation:** -For each endpoint or entry point, state what input validation happens and what error the user sees for invalid input. Ask the user to confirm. Example: "Signup with missing email returns 400 with {error: 'Email is required'}. Signup with invalid email format returns 400 with {error: 'Invalid email format'}. Right?" - -**Round 2 — Authentication/authorization failures:** -For each protected operation, state what happens when auth fails. Ask the user to confirm. Example: "Expired JWT returns 401. Missing JWT returns 401. Malformed JWT returns 401. All three use the same generic message to avoid information leakage. Correct?" - -**Round 3 — System failures:** -For each external dependency (database, API, file system), state what happens when it's unavailable. Ask the user to confirm. Example: "If Prisma can't connect to the database, all endpoints return 500 with a generic message. We log the real error server-side but never expose it to the client." - -After completing all rounds (or accepting defaults), proceed to the Layer 3 Gate. - -### Layer 3 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer3_error_gate`: - -``` -Header: "Error Handling Gate" -Question: "Error handling strategy captured. Ready to define the quality bar?" -Options: - - "Yes, move to quality bar (Recommended)" — proceed to Layer 4 - - "Want to adjust error handling" — user will clarify, then re-present errors -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Layer 4 until the user explicitly approves error handling. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Layer 4 — Quality Bar (What does done mean?) - -### Present Findings - -Define what "done" looks like based on everything discussed: - -1. **Testing requirements:** What must be tested? Unit tests, integration tests, E2E tests? Based on the architecture's complexity and risk profile. - -2. **Acceptance criteria:** Concrete, observable outcomes that prove the milestone is complete. Derived from the scope discussion. - -3. **Performance/quality constraints:** Based on ecosystem research and codebase patterns — response times, error rates, accessibility requirements. - -### Make a Recommendation - -Take a clear position: "For this scope, I'd suggest these acceptance criteria: [list]." - -Include: -- **Definition of done:** What conditions must be true for the milestone to be complete? -- **Test coverage expectations:** What must be tested vs nice-to-have? -- **Quality gates:** What would block shipping? - -### Resolve Quality — Mandatory Rounds - -After presenting your recommendation, you MUST complete these rounds in order. Do NOT skip rounds. - -**Complexity calibration:** If the milestone is simple, you may compress rounds — but you must still explicitly address each round's topic, even if briefly. You may NOT skip rounds entirely. - -**Round 1 — Per-slice acceptance criteria:** -For each slice, state 3-5 specific, testable acceptance criteria. Ask the user to confirm each slice's criteria. These must be concrete enough that the planner can use them directly. "Tests pass" is NOT an acceptance criterion. "POST /api/auth/signup with {email, password} returns 201 with {id, email}" IS an acceptance criterion. - -**Round 2 — Test strategy:** -For each slice, state what type of tests are needed (unit, integration, e2e) and what specifically gets tested. Ask the user to confirm. Example: "S01 needs: unit test for password hashing, integration test for signup endpoint with valid and invalid inputs. No e2e needed for this slice." - -**Round 3 — Definition of done:** -State the end-to-end scenario that proves the milestone works. Ask the user to confirm. Example: "Done means: a new user can sign up, log in, receive a JWT, and use that JWT to access a protected endpoint — all verified by running the sequence manually or via integration test." - -After completing all 3 rounds, proceed to the Layer 4 Gate. - -### Layer 4 Gate - -Before advancing, use `ask_user_questions` with question ID containing `layer4_quality_gate`: - -``` -Header: "Quality Gate" -Question: "Quality bar defined. Ready to write context and roadmap?" -Options: - - "Yes, write the artifacts (Recommended)" — proceed to Output Phase - - "Want to adjust the quality bar" — user will clarify, then re-present quality -``` - -**CRITICAL — Non-bypassable gate:** Do NOT proceed to Output Phase until the user explicitly approves the quality bar. If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block. The gate exists to protect the user's work; treat a block as an instruction to wait, not an obstacle to work around. - ---- - -## Output Phase - -Once all four layers are complete, you have gathered: -- Confirmed scope (Layer 1) -- Approved architecture (Layer 2) -- Error handling strategy (Layer 3) -- Quality bar and acceptance criteria (Layer 4) - -### Capability Contract - -Before writing a roadmap, produce or update `.gsd/REQUIREMENTS.md`. - -Use it as the project's explicit capability contract. Requirements discovered during the 4-layer discussion should be captured here with source `user` or `inferred` as appropriate. - -**Print the requirements in chat before writing the roadmap.** Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?" **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed to roadmap creation without explicit requirement confirmation. - -### Roadmap Preview - -Before writing any files, **print the planned roadmap in chat** so the user can see and approve it. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list. - -If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two. **Non-bypassable:** If the user does not respond or gives an ambiguous answer, you MUST re-ask — never write files without explicit approval. A missing response is not a "yes." - -### Naming Convention - -Directories use bare IDs. Files use ID-SUFFIX format. Titles live inside file content, not in names. -- Milestone dir: `.gsd/milestones/{{milestoneId}}/` -- Milestone files: `{{milestoneId}}-CONTEXT.md`, `{{milestoneId}}-ROADMAP.md` -- Slice dirs: `S01/`, `S02/`, etc. - -### Single Milestone - -Once the user is satisfied, in a single pass: -1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices` -2. Write or update `.gsd/PROJECT.md` — use the **Project** output template below. Describe what the project is, its current state, and list the milestone sequence. -3. Write or update `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Confirm requirement states, ownership, and traceability before roadmap creation. - -**Depth-Preservation Guidance for context.md:** -When writing context.md, preserve the user's exact terminology, emphasis, and specific framing from the discussion. Do not paraphrase user nuance into generic summaries. If the user said "craft feel," write "craft feel" — not "high-quality user experience." If they emphasized a specific constraint or negative requirement, carry that emphasis through verbatim. The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision. - -**Enhanced Context Requirement:** Because this is a prepared discussion, use the `context-enhanced` template which includes sections for Codebase Brief, Architectural Decisions, Interface Contracts, Error Handling Strategy, Testing Requirements, Acceptance Criteria, and Ecosystem Notes. Populate these from the 4-layer discussion: -- Codebase Brief: from Layer 1 presentation -- Architectural Decisions: from Layer 2 — each decision with rationale, evidence, alternatives -- Error Handling Strategy: from Layer 3 -- Testing Requirements and Acceptance Criteria: from Layer 4 -- Ecosystem Notes: key findings from the ecosystem brief - -4. Write `{{contextPath}}` — use the **Context Enhanced** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion. -5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters. -6. For each architectural or pattern decision made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically. -7. {{commitInstruction}} - -After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically. - -### Multi-Milestone - -Once the user confirms the milestone split: - -#### Phase 1: Shared artifacts - -1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones//slices`. -2. Write `.gsd/PROJECT.md` — use the **Project** output template below. -3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet. -4. For any architectural or pattern decisions made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically. - -#### Phase 2: Primary milestone - -5. Write a full enhanced `CONTEXT.md` for the primary milestone (the one discussed in depth). Use the `context-enhanced` template. -6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done. - -#### MANDATORY: depends_on Frontmatter in CONTEXT.md - -Every CONTEXT.md for a milestone that depends on other milestones MUST have YAML frontmatter with `depends_on`. The auto-mode state machine reads this field to determine execution order — without it, milestones may execute out of order or in parallel when they shouldn't. - -```yaml ---- -depends_on: [M001, M002] ---- - -# M003: Title -``` - -If a milestone has no dependencies, omit the frontmatter. The dependency chain from the milestone confirmation gate MUST be reflected in each CONTEXT.md frontmatter. Do NOT rely on QUEUE.md or PROJECT.md for dependency tracking — the state machine only reads CONTEXT.md frontmatter. - -#### Phase 3: Sequential readiness gate for remaining milestones - -For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. Present three options: - -- **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (Layer 1-4 protocol). When the discussion concludes, write a full enhanced `CONTEXT.md`. Then move to the gate for the next milestone. -- **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted. -- **"Just queue it"** — This milestone is identified but intentionally left without context. No context file is written — the directory already exists from Phase 1. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user to run /gsd. The wizard starts a full discussion from scratch. - -**When "Discuss now" is chosen:** Run the full 4-layer protocol for that milestone using fresh preparation briefs scoped to that milestone. - -#### Milestone Gate Tracking (MANDATORY for multi-milestone) - -After EVERY Phase 3 gate decision, immediately write or update `.gsd/DISCUSSION-MANIFEST.json` with the cumulative state. This file is mechanically validated by the system before auto-mode starts — if gates are incomplete, auto-mode will NOT start. - -```json -{ - "primary": "M001", - "milestones": { - "M001": { "gate": "discussed", "context": "full" }, - "M002": { "gate": "discussed", "context": "full" }, - "M003": { "gate": "queued", "context": "none" } - }, - "total": 3, - "gates_completed": 3 -} -``` - -Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`. - -For single-milestone projects, do NOT write this file — it is only for multi-milestone discussions. - -#### Phase 4: Finalize - -7. {{multiMilestoneCommitInstruction}} - -After writing the files, say exactly: "Milestone M001 ready." — nothing else. Auto-mode will start automatically. - -{{inlinedTemplates}} diff --git a/src/resources/extensions/gsd/prompts/discuss.md b/src/resources/extensions/gsd/prompts/discuss.md index 4061bc054..160d99f91 100644 --- a/src/resources/extensions/gsd/prompts/discuss.md +++ b/src/resources/extensions/gsd/prompts/discuss.md @@ -28,6 +28,8 @@ After reflection is confirmed, decide the approach based on the actual scope — **Anti-reduction rule:** If the user describes a big vision, plan the big vision. Do not ask "what's the minimum viable version?" or try to reduce scope unless the user explicitly asks for an MVP or minimal version. When something is complex or risky, phase it into a later milestone — do not cut it. The user's ambition is the target, and your job is to sequence it intelligently, not shrink it. +{{preparationContext}} + ## Mandatory Investigation Before First Question Round Before asking your first question, do a mandatory investigation pass. This is not optional. @@ -47,10 +49,133 @@ This happens ONCE, before the first round. The goal: your first questions should For subsequent rounds, continue investigating between rounds — check docs, search, or scout as needed to make each round's questions smarter. But the first-round investigation is mandatory and explicit. Distribute searches across turns rather than clustering them in one turn. +## Layered Question Rounds + +Questions are organized into four layers. Each layer targets a specific depth dimension. At each layer: ask 1-3 open questions per round, investigate between rounds as needed, and gate before advancing. + +**Default to open questions.** Use `ask_user_questions` only when there are 2-3 genuinely distinct paths with clear tradeoffs (e.g., "REST vs GraphQL" or "Postgres vs SQLite"). For nuanced design questions, ask in plain text and let the user explain. + +**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for binary/ternary choices. Keep option labels short (3-5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.** + +**If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1-3 focused questions. Wait for answers before asking the next round. + +**Incremental persistence:** After every 2 question rounds (across any layer), silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` using `gsd_summary_save` with `artifact_type: "CONTEXT-DRAFT"` and `milestone_id: "{{milestoneId}}"`. This protects confirmed work against session crashes. Do NOT mention this save to the user. + +### Identify Work Type + +Before starting Layer 1, identify the primary work type and state it: + +"Based on your description and the codebase, this is primarily **[work type]** work." + +Work types include: API/backend, UI/frontend, CLI/developer tool, data pipeline, ML/AI, infrastructure/platform, refactoring/migration, or a combination. The user can correct this. The classification shapes which questions deserve deep exploration at each layer. + +### Layer 1 — Scope + +Resolve what's in and what's out. Ask about: +- Feature boundaries — what exactly ships in this milestone vs later +- Ambiguities in the user's description — anything you're unsure about, ask +- Dependencies — what does this work depend on, what depends on it +- Priority — if scope needs trimming, what matters most + +Adapt depth to work type: +- **CLI work:** Focus on user mental model, command grammar, what existing commands do +- **Refactoring:** Focus on what changes vs what must stay identical + +**Depth-matching:** Simple, well-defined scope may need 1 round. Ambiguous or large scope may need 3-4 rounds. Don't pad rounds to hit a number. + +#### Layer 1 Gate + +Summarize scope decisions in the user's own terminology: +- What's included, what's excluded, what's deferred +- Key boundaries and constraints + +Then ask: **"Does this capture the scope? Adjust anything before we move on."** + +If the user adjusts, reflect the updated understanding and ask again. Do not advance until the user explicitly confirms. If the user says "looks good, let's move faster" at any gate, respect that and advance. + +--- + +### Layer 2 — Architecture + +Resolve how it's built. Ask about: +- Per-slice technical decisions — what approach for each major piece +- Inter-slice contracts — how do the pieces connect +- Library/framework choices — with evidence from investigation, not assumptions +- Integration with existing code — what patterns to follow, what to change + +Adapt depth to work type: +- **API work:** Contracts, versioning, backwards compatibility, auth boundaries +- **UI work:** Component boundaries, state management, data flow +- **Infrastructure:** Deployment topology, failure domains, rollback + +Between rounds, use your available web search tools to research technologies from the Codebase Brief. Search for "[technology] [version] best practices [current year]" and "[technology] [version] known issues". Present findings alongside your questions. + +#### Layer 2 Gate + +Summarize architecture decisions, each with: +- The decision and rationale +- Evidence source (codebase patterns, library docs, web research) +- Alternatives considered + +Then ask: **"Does this capture the architecture? Adjust anything before we move on."** + +Same gate rules: reflect adjustments, wait for confirmation. + +--- + +### Layer 3 — Error States + +Resolve what happens when things fail. Present this layer with an option: + +"We can go deep on error handling and failure modes, or I can apply sensible defaults based on the architecture decisions above. Which do you prefer?" + +If the user chooses defaults, summarize what the defaults are and gate. If the user chooses to go deep, ask about: +- Failure modes for each major component +- Error propagation between layers (API → frontend, service → database) +- Timeout, retry, and circuit-breaker strategies +- What the user sees when something fails + +Adapt depth to work type: +- **API work:** Rate limiting, timeout cascades, partial failure, status codes +- **UI work:** Loading states, optimistic updates, offline behavior, error boundaries +- **Data pipelines:** Data corruption, checkpoint recovery, idempotency + +#### Layer 3 Gate + +Summarize error handling strategy. Then ask: **"Does this capture how errors should be handled? Adjust anything before we move on."** + +--- + +### Layer 4 — Quality Bar + +Resolve what "done" means concretely. Ask about: +- Per-slice acceptance criteria — specific enough for automated verification +- Test strategy — what types of tests, what coverage expectations +- Definition of done — what must be true before the milestone ships +- Non-functional requirements — performance, accessibility, security if relevant + +Adapt depth to work type: +- **CLI work:** Shell compatibility, error message clarity, exit code semantics +- **Refactoring:** Behavioral equivalence tests, not just code coverage +- **UI work:** Visual regression criteria, responsive breakpoints + +#### Layer 4 Gate + +Summarize quality bar: acceptance criteria, test strategy, definition of done. Then ask: **"Does this capture the quality bar? Adjust anything before we move on to requirements and roadmap?"** + +--- + +### Layer cadence + +- Do not count the reflection step as a question round. Rounds start at Layer 1 after reflection is confirmed. +- When all four layer gates have been confirmed (or skipped by the user), move to the Depth Verification step below. Do not ask a separate "ready to wrap up?" gate — the depth verification confirms the full picture. + ## Questioning Philosophy You are a thinking partner, not an interviewer. +**Turn-taking contract (non-bypassable).** Never fabricate, simulate, or role-play user responses. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:` to invent input. Prior conversation context may be provided to you inside `` with `` / `` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions) per turn, then stop and wait for the user's actual response before continuing. If you use `ask_user_questions`, call it at most once per turn and treat its returned response as the only valid structured user input for that round. + **Start open, follow energy.** Let the user's enthusiasm guide where you dig deeper. If they light up about a particular aspect, explore it. If they're vague about something, that's where you probe. **Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "it needs to handle edge cases" / "good UX"), push for specifics. What does "smart" mean in practice? Which edge cases? What does good UX look like for this specific interaction? @@ -92,29 +217,27 @@ Do NOT offer to proceed until ALL of the following are satisfied. Track these in Before offering to proceed, demonstrate absorption: reference specific things the user emphasized, specific terminology they used, specific nuance they sharpened — and show how those shaped your understanding. Synthesize, don't recite. "Your emphasis on X led me to prioritize Y over Z" is good. "You said X, you said Y, you said Z" is not. The user should feel heard in the specifics, not just acknowledged in the abstract. -**Questioning depth should match scope.** Simple, well-defined work needs fewer rounds — maybe 1-2. Large, ambiguous visions need more — maybe 4+. Don't pad rounds to hit a number. Stop when the depth checklist is satisfied and you genuinely understand the work. - -Do not count the reflection step as a question round. Rounds start after reflection is confirmed. - ## Depth Verification Before moving to the wrap-up gate, present a structured depth summary as a checkpoint. **Print the summary as normal chat text first** — this is where the formatting renders properly. Structure the summary across the depth checklist dimensions using the user's own terminology and framing. Cover: what you understood them to be building, what shaped your understanding most (their emphasis, constraints, concerns), and any areas where you're least confident in your understanding. -**Then** use `ask_user_questions` with a short confirmation question — NOT the summary itself. The question field is designed for single sentences, not multi-paragraph summaries. +**Then confirm:** -**Convention:** The question ID must contain `depth_verification` (e.g., `depth_verification_confirm`). This naming convention enables downstream mechanical detection of this step. +**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with: +- header: "Depth Check" +- question: "Did I capture the depth right?" +- options: "Yes, you got it (Recommended)", "Not quite — let me clarify" +- **The question ID must contain `depth_verification`** (e.g., `depth_verification_confirm`) — this naming convention enables downstream mechanical detection and the write-gate. -Example flow: -1. Print in chat: the full depth summary with markdown formatting (headers, bold, bullets) -2. Call `ask_user_questions` with: header "Depth Check", question "Did I capture the depth right?", options "Yes, you got it (Recommended)" and "Not quite — let me clarify" +**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text: "Did I capture that correctly? If not, tell me what I missed." Wait for explicit confirmation before proceeding. **The same non-bypassable gate applies to the plain-text path** — if the user does not respond, gives an ambiguous answer, or does not explicitly confirm, you MUST re-ask. Never rationalize past a missing confirmation. If they clarify, absorb the correction and re-verify. The depth verification is the required write-gate. Do **not** add another meta "ready to proceed?" checkpoint immediately after it unless there is still material ambiguity. -**CRITICAL — Non-bypassable gate:** The system mechanically blocks CONTEXT.md writes until the user selects the "(Recommended)" option. If the user declines, cancels, or the tool fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). The gate exists to protect the user's work; treat a block as an instruction, not an obstacle to work around. +**CRITICAL — Non-bypassable gate:** The system mechanically blocks CONTEXT.md writes until the user selects the "(Recommended)" option (structured path) or explicitly confirms (plain-text path). If the user declines, cancels, does not respond, or the tool fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). The gate exists to protect the user's work; treat a block as an instruction, not an obstacle to work around. ## Wrap-up Gate @@ -203,6 +326,14 @@ Once the user is satisfied, in a single pass: **Depth-Preservation Guidance for context.md:** When writing context.md, preserve the user's exact terminology, emphasis, and specific framing from the discussion. Do not paraphrase user nuance into generic summaries. If the user said "craft feel," write "craft feel" — not "high-quality user experience." If they emphasized a specific constraint or negative requirement, carry that emphasis through verbatim. The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision. +**Structured sections from discussion layers:** +When writing CONTEXT.md, include structured sections that map to the discussion layers: +- **Scope** — what's in, what's out, what's deferred (from Layer 1 gate summary) +- **Architectural Decisions** — each with rationale, evidence source, alternatives considered (from Layer 2 gate summary) +- **Error Handling Strategy** — failure modes, propagation, user-facing error behavior (from Layer 3 gate summary) +- **Acceptance Criteria** — per-slice criteria specific enough for the planner to use directly (from Layer 4 gate summary) +These sections are in addition to whatever other context the discussion surfaced. + 4. Write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion. 5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters. 6. For each architectural or pattern decision made during discussion, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically. @@ -242,7 +373,7 @@ If a milestone has no dependencies, omit the frontmatter. The dependency chain f #### Phase 3: Sequential readiness gate for remaining milestones -For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. **Non-bypassable:** If `ask_user_questions` fails, errors, returns no response, or the user's response does not match a provided option, you MUST re-ask — never rationalize past the block or auto-select a readiness mode. Present three options: +For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then present the three options below to the user. **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions`. **If `{{structuredQuestionsAvailable}}` is `false`:** present the options as a plain-text numbered list and ask the user to type their choice. **Non-bypassable:** If the user does not respond, gives an ambiguous answer, or the tool fails, you MUST re-ask — never rationalize past the block or auto-select a readiness mode. Present three options: - **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (reflection → investigation → questioning → depth verification). When the discussion concludes, write a full `CONTEXT.md`. Then move to the gate for the next milestone. - **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted. @@ -254,9 +385,9 @@ Before writing each milestone's CONTEXT.md (whether primary or secondary), you M 1. **Read the actual code** for every file or module you reference. Confirm APIs exist, check what functions actually do, identify phantom capabilities (code that exists but isn't wired up). 2. **Check for stale assumptions** — the codebase changes. Verify referenced modules still work as described. -3. **Present findings** — use `ask_user_questions` with a question ID containing BOTH `depth_verification` AND the milestone ID (e.g., `depth_verification_M002`). Present: what you're about to write, key technical findings from investigation, risks the code review surfaced. +3. **Present findings** — **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with a question ID containing BOTH `depth_verification` AND the milestone ID (e.g., `depth_verification_M002`). Present: what you're about to write, key technical findings from investigation, risks the code review surfaced. **If `{{structuredQuestionsAvailable}}` is `false`:** present the same findings in plain text and ask for explicit confirmation before proceeding. -**The system mechanically blocks CONTEXT.md writes until the per-milestone depth verification passes.** Each milestone needs its own verification — one global verification does not unlock all milestones. +**The system mechanically blocks CONTEXT.md writes until the per-milestone depth verification passes** (structured path: user selects "(Recommended)" option; plain-text path: user explicitly confirms). Each milestone needs its own verification — one global verification does not unlock all milestones. **Why sequential, not batch:** After writing the primary milestone's context and roadmap, the agent still has context window capacity. Asking one milestone at a time lets the user decide per-milestone whether to invest that remaining capacity in a focused discussion now, or defer to a future session. A batch question ("Ready/Draft/Queue for M002, M003, M004?") forces the user to decide everything upfront without knowing how much session capacity remains. diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md index ddf3fa1d7..5fc9dc835 100644 --- a/src/resources/extensions/gsd/prompts/execute-task.md +++ b/src/resources/extensions/gsd/prompts/execute-task.md @@ -22,6 +22,8 @@ A researcher explored the codebase and a planner decomposed the work — you are {{slicePlanExcerpt}} +{{gatesToClose}} + ## Backing Source Artifacts - Slice plan: `{{planPath}}` - Task plan source: `{{taskPlanPath}}` @@ -32,29 +34,30 @@ Then: 0. Narrate step transitions, key implementation decisions, and verification outcomes as you work. Keep it terse — one line between tool-call clusters, not between every call — but write complete sentences in user-facing prose, not shorthand notes or scratchpad fragments. 1. {{skillActivation}} Follow any activated skills before writing code. If no skills match this task, skip this step. 2. Execute the steps in the inlined task plan, adapting minor local mismatches when the surrounding code differs from the planner's snapshot -3. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature. -4. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail). -5. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply. +3. Before any `Write` that creates an artifact or output file, check whether that path already exists. If it does, read it first and decide whether the work is already done, should be extended, or truly needs replacement. "Create" in the plan does **not** mean the file is missing — a prior session may already have started it. +4. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature. +5. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail). +6. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply. **Background process rule:** Never use bare `command &` to run background processes. The shell's `&` operator leaves stdout/stderr attached to the parent, which causes the Bash tool to hang indefinitely waiting for those streams to close. Always redirect output before backgrounding: - Correct: `command > /dev/null 2>&1 &` or `nohup command > /dev/null 2>&1 &` - Example: `python -m http.server 8080 > /dev/null 2>&1 &` (NOT `python -m http.server 8080 &`) - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues -6. If the task plan includes a **Failure Modes** section (Q5), implement the error/timeout/malformed handling specified. Verify each dependency's failure path is handled. Skip if the section is absent. -7. If the task plan includes a **Load Profile** section (Q6), implement protections for the identified 10x breakpoint (connection pooling, rate limiting, pagination, etc.). Skip if absent. -8. If the task plan includes a **Negative Tests** section (Q7), write the specified negative test cases alongside the happy-path tests — malformed inputs, error paths, and boundary conditions. Skip if absent. -9. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors) -10. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary. -11. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section. -12. If the task touches UI, browser flows, DOM behavior, or user-visible web state: +7. If the task plan includes a **Failure Modes** section (Q5), implement the error/timeout/malformed handling specified. Verify each dependency's failure path is handled. Skip if the section is absent. +8. If the task plan includes a **Load Profile** section (Q6), implement protections for the identified 10x breakpoint (connection pooling, rate limiting, pagination, etc.). Skip if absent. +9. If the task plan includes a **Negative Tests** section (Q7), write the specified negative test cases alongside the happy-path tests — malformed inputs, error paths, and boundary conditions. Skip if absent. +10. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors) +11. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary. +12. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section. +13. If the task touches UI, browser flows, DOM behavior, or user-visible web state: - exercise the real flow in the browser - prefer `browser_batch` when the next few actions are obvious and sequential - prefer `browser_assert` for explicit pass/fail verification of the intended outcome - use `browser_diff` when an action's effect is ambiguous - use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI - record verification in terms of explicit checks passed/failed, not only prose interpretation -13. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section. -14. **If execution is running long or verification fails:** +14. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section. +15. **If execution is running long or verification fails:** **Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step. @@ -65,13 +68,13 @@ Then: - Distinguish "I know" from "I assume." Observable facts (the error says X) are strong evidence. Assumptions (this library should work this way) need verification. - Know when to stop. If you've tried 3+ fixes without progress, your mental model is probably wrong. Stop. List what you know for certain. List what you've ruled out. Form fresh hypotheses from there. - Don't fix symptoms. Understand *why* something fails before changing code. A test that passes after a change you don't understand is luck, not a fix. -15. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice. -16. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made. -17. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things. -18. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md` -19. Use that template to prepare the completion content you will pass to `gsd_complete_task` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you. -20. Call `gsd_complete_task` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically. -21. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message. +16. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice. +17. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made. +18. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things. +19. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md` +20. Use that template to prepare the completion content you will pass to `gsd_complete_task` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you. +21. Call `gsd_complete_task` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically. +22. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message. All work stays in your working directory: `{{workingDirectory}}`. diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md index efa3cda62..6fa33ddc3 100644 --- a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +++ b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md @@ -32,6 +32,8 @@ Ask **1–3 questions per round**. Keep each question focused on one of: - **The biggest technical unknowns / risks** — what could fail, what hasn't been proven - **What external systems/services this touches** — APIs, databases, third-party services +**Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing. + **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.** **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round. diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md index e182bc417..871d304f3 100644 --- a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md +++ b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md @@ -22,6 +22,8 @@ Do **not** go deep — just enough that your questions reflect what's actually t ### Question rounds +**Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing. + **If `{{structuredQuestionsAvailable}}` is `true`:** Ask **1–3 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.** **If `{{structuredQuestionsAvailable}}` is `false`:** Ask **1–3 questions per round** in plain text. Number them and wait for the user's response before asking the next round. Keep each question focused on one of: diff --git a/src/resources/extensions/gsd/prompts/guided-resume-task.md b/src/resources/extensions/gsd/prompts/guided-resume-task.md index 3b15c0cad..71cbea2e5 100644 --- a/src/resources/extensions/gsd/prompts/guided-resume-task.md +++ b/src/resources/extensions/gsd/prompts/guided-resume-task.md @@ -1 +1 @@ -Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, read it, and use it as the recovery contract for where to pick up. Do **not** delete the continue file immediately. Keep it until the task is successfully completed or you have written a newer summary/continue artifact that clearly supersedes it. If the resumed attempt fails again, update or replace the continue file so no recovery context is lost. {{skillActivation}} +Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, read it, and use it as the recovery contract for where to pick up. Before you create any expected artifact or output file, check whether it already exists and read it first — a prior session may already have started or completed that work. Do **not** delete the continue file immediately. Keep it until the task is successfully completed or you have written a newer summary/continue artifact that clearly supersedes it. If the resumed attempt fails again, update or replace the continue file so no recovery context is lost. {{skillActivation}} diff --git a/src/resources/extensions/gsd/prompts/queue.md b/src/resources/extensions/gsd/prompts/queue.md index 5bbdd7b2a..8b8807c2a 100644 --- a/src/resources/extensions/gsd/prompts/queue.md +++ b/src/resources/extensions/gsd/prompts/queue.md @@ -18,6 +18,7 @@ Say exactly: "What do you want to add?" — nothing else. Wait for the user's an ## Discussion Phase After they describe it, your job is to understand the new work deeply enough to create context files that a future planning session can use. +Never fabricate or simulate user input during this discussion. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing. **If the user provides a file path or pastes a large document** (spec, design doc, product plan, chat export), read it fully before asking questions. Use it as the starting point — don't ask them to re-explain what's already in the document. Your questions should fill gaps and resolve ambiguities the document doesn't cover. @@ -36,11 +37,11 @@ Don't go deep — just enough that your next question reflects what's actually t - How the new work relates to existing milestones — overlap, dependencies, prerequisites - If `.gsd/REQUIREMENTS.md` exists: which unmet Active or Deferred requirements this queued work advances -**Then use ask_user_questions** to dig into gray areas — scope boundaries, proof expectations, integration choices, tech preferences when they materially matter, and what's in vs out. 1-3 questions per round. +**Then use ask_user_questions** to dig into gray areas — scope boundaries, proof expectations, integration choices, tech preferences when they materially matter, and what's in vs out. Ask 1-3 questions per round, then wait for the user's response before asking the next round. If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during discuss/planning work, but do not let it override the required discuss flow or artifact requirements. -**Self-regulate:** Do **not** ask a meta "ready to queue?" question after every round. Keep going until you have enough depth to write the context well, then use a single wrap-up prompt if needed. If the user clearly keeps adding detail instead of objecting, treat that as permission to continue. +**Self-regulate:** Do **not** ask a meta "ready to queue?" question after every round. Keep going until you have enough depth to write the context well, then use a single wrap-up prompt if needed. Do not infer permission to continue from silence or from partial prior answers — each new round requires an actual user response. ## Existing Milestone Awareness diff --git a/src/resources/extensions/gsd/prompts/system.md b/src/resources/extensions/gsd/prompts/system.md index 45998c36e..d74f7a3a7 100644 --- a/src/resources/extensions/gsd/prompts/system.md +++ b/src/resources/extensions/gsd/prompts/system.md @@ -35,6 +35,7 @@ GSD ships with bundled skills. Load the relevant skill file with the `read` tool - Read before edit. - Reproduce before fix when possible. - Work is not done until the relevant verification has passed. +- **Never fabricate, simulate, or role-play user responses.** Never generate markers like `[User]`, `[Human]`, `User:`, or similar to represent user input inside your own output. Prior conversation context may be provided to you inside `` with `` / `` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions), then stop and wait for the user's actual response before continuing. If `ask_user_questions` is available, treat its returned response as the only valid structured user input for that round. - Never print, echo, log, or restate secrets or credentials. Report only key names and applied/skipped status. - Never ask the user to edit `.env` files or set secrets manually. Use `secure_env_collect`. - In enduring files, write current state only unless the file is explicitly historical. diff --git a/src/resources/extensions/gsd/prompts/validate-milestone.md b/src/resources/extensions/gsd/prompts/validate-milestone.md index 0b7046b7f..f5a200602 100644 --- a/src/resources/extensions/gsd/prompts/validate-milestone.md +++ b/src/resources/extensions/gsd/prompts/validate-milestone.md @@ -18,6 +18,8 @@ All relevant context has been preloaded below — the roadmap, all slice summari {{inlinedContext}} +{{gatesToEvaluate}} + ## Execution Protocol ### Step 1 — Dispatch Parallel Reviewers @@ -31,7 +33,7 @@ Prompt: "Review milestone {{milestoneId}} requirements coverage. Working directo Prompt: "Review milestone {{milestoneId}} cross-slice integration. Working directory: {{workingDirectory}}. Read `{{roadmapPath}}` and find the boundary map (produces/consumes contracts). For each boundary, check that the producing slice's SUMMARY confirms it produced the artifact, and the consuming slice's SUMMARY confirms it consumed it. Output a markdown table: Boundary | Producer Summary | Consumer Summary | Status. End with a one-line verdict: PASS if all boundaries honored, NEEDS-ATTENTION if any gaps." **Reviewer C — Assessment & Acceptance Criteria** -Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Output a checklist: [ ] Criterion | Evidence. End with a one-line verdict: PASS if all criteria met, NEEDS-ATTENTION if gaps exist." +Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Then review the inlined milestone verification classes from planning. For each non-empty planned class, output a markdown table: Class | Planned Check | Evidence | Verdict. Use the exact class names `Contract`, `Integration`, `Operational`, and `UAT` whenever those classes are present. If no verification classes were planned, say that explicitly. Output two sections: `Acceptance Criteria` with a checklist `[ ] Criterion | Evidence`, and `Verification Classes` with the table. End with a one-line verdict: PASS if all criteria and verification classes are covered, NEEDS-ATTENTION if gaps exist." ### Step 2 — Synthesize Findings @@ -70,6 +72,7 @@ reviewers: 3 ``` Call `gsd_validate_milestone` with the camelCase fields `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verdictRationale`, and `remediationPlan` when needed. If you include verification-class analysis, pass it in `verificationClasses`. +Extract the `Verification Classes` subsection from Reviewer C and pass it verbatim in `verificationClasses` so the persisted validation output uses the canonical class names `Contract`, `Integration`, `Operational`, and `UAT`. **DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools. Direct DB access corrupts the WAL and bypasses tool-level validation. diff --git a/src/resources/extensions/gsd/rtk-status.ts b/src/resources/extensions/gsd/rtk-status.ts deleted file mode 100644 index f3f519cdf..000000000 --- a/src/resources/extensions/gsd/rtk-status.ts +++ /dev/null @@ -1,53 +0,0 @@ -import type { ExtensionContext } from "@gsd/pi-coding-agent"; -import { - ensureRtkSessionBaseline, - formatRtkSavingsLabel, - getRtkSessionSavings, -} from "../shared/rtk-session-stats.js"; -import { loadEffectiveGSDPreferences } from "./preferences.js"; - -const STATUS_KEY = "gsd-rtk"; -const REFRESH_INTERVAL_MS = 30_000; - -let refreshTimer: ReturnType | null = null; - -function clearTimer(): void { - if (refreshTimer) { - clearInterval(refreshTimer); - refreshTimer = null; - } -} - -function isRtkEnabledInPrefs(): boolean { - return loadEffectiveGSDPreferences()?.preferences.experimental?.rtk === true; -} - -function updateStatus(ctx: ExtensionContext): void { - if (!ctx.hasUI) return; - if (!isRtkEnabledInPrefs()) return; - - const basePath = ctx.cwd; - const sessionId = ctx.sessionManager.getSessionId(); - ensureRtkSessionBaseline(basePath, sessionId); - const savings = getRtkSessionSavings(basePath, sessionId); - ctx.ui.setStatus(STATUS_KEY, formatRtkSavingsLabel(savings) ?? undefined); -} - -export function startRtkStatusUpdates(ctx: ExtensionContext): void { - clearTimer(); - if (!isRtkEnabledInPrefs()) { - // Ensure any previously set status is cleared (e.g. preference was toggled off) - ctx.ui.setStatus(STATUS_KEY, undefined); - return; - } - updateStatus(ctx); - if (!ctx.hasUI) return; - refreshTimer = setInterval(() => { - updateStatus(ctx); - }, REFRESH_INTERVAL_MS); -} - -export function stopRtkStatusUpdates(ctx?: ExtensionContext): void { - clearTimer(); - ctx?.ui.setStatus(STATUS_KEY, undefined); -} diff --git a/src/resources/extensions/gsd/session-model-override.ts b/src/resources/extensions/gsd/session-model-override.ts new file mode 100644 index 000000000..3494c4da7 --- /dev/null +++ b/src/resources/extensions/gsd/session-model-override.ts @@ -0,0 +1,36 @@ +export interface SessionModelOverride { + provider: string; + id: string; +} + +const sessionOverrides = new Map(); + +function normalizeSessionId(sessionId: string): string { + return typeof sessionId === "string" ? sessionId.trim() : ""; +} + +export function setSessionModelOverride( + sessionId: string, + override: SessionModelOverride, +): void { + const key = normalizeSessionId(sessionId); + if (!key) return; + sessionOverrides.set(key, { + provider: override.provider, + id: override.id, + }); +} + +export function getSessionModelOverride( + sessionId: string, +): SessionModelOverride | undefined { + const key = normalizeSessionId(sessionId); + if (!key) return undefined; + return sessionOverrides.get(key); +} + +export function clearSessionModelOverride(sessionId: string): void { + const key = normalizeSessionId(sessionId); + if (!key) return; + sessionOverrides.delete(key); +} diff --git a/src/resources/extensions/gsd/shortcut-defs.ts b/src/resources/extensions/gsd/shortcut-defs.ts new file mode 100644 index 000000000..79e50e33d --- /dev/null +++ b/src/resources/extensions/gsd/shortcut-defs.ts @@ -0,0 +1,56 @@ +// Canonical GSD shortcut definitions used by registration, help text, and overlays. + +import { formatShortcut } from "./files.js"; + +export type GSDShortcutId = "dashboard" | "notifications" | "parallel"; + +type GSDShortcutDef = { + key: "g" | "n" | "p"; + action: string; + command: string; + /** Whether the Ctrl+Shift fallback is registered (false when it conflicts with an app keybinding). */ + hasFallback: boolean; +}; + +export const GSD_SHORTCUTS: Record = { + dashboard: { + key: "g", + action: "Open GSD dashboard", + command: "/gsd status", + hasFallback: true, + }, + notifications: { + key: "n", + action: "Open notification history", + command: "/gsd notifications", + hasFallback: true, + }, + parallel: { + key: "p", + action: "Open parallel worker monitor", + command: "/gsd parallel watch", + hasFallback: false, // Ctrl+Shift+P conflicts with cycleModelBackward + }, +}; + +function combo(prefix: "Ctrl+Alt+" | "Ctrl+Shift+", key: string): string { + return `${prefix}${key.toUpperCase()}`; +} + +export function primaryShortcutCombo(id: GSDShortcutId): string { + return combo("Ctrl+Alt+", GSD_SHORTCUTS[id].key); +} + +export function fallbackShortcutCombo(id: GSDShortcutId): string { + return combo("Ctrl+Shift+", GSD_SHORTCUTS[id].key); +} + +export function shortcutPair(id: GSDShortcutId, formatter: (combo: string) => string = (combo) => combo): string { + const primary = formatter(primaryShortcutCombo(id)); + if (!GSD_SHORTCUTS[id].hasFallback) return primary; + return `${primary} / ${formatter(fallbackShortcutCombo(id))}`; +} + +export function formattedShortcutPair(id: GSDShortcutId): string { + return shortcutPair(id, formatShortcut); +} diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 9dddc53e6..ac34a8b8e 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -57,8 +57,9 @@ import { insertMilestone, insertSlice, insertTask, + updateSliceStatus, updateTaskStatus, - getPendingSliceGateCount, + getPendingGateCountForTurn, type MilestoneRow, type SliceRow, type TaskRow, @@ -322,17 +323,8 @@ const isStatusDone = isClosedStatus; * * Must produce field-identical GSDState to _deriveStateImpl() for the same project. */ -export async function deriveStateFromDb(basePath: string): Promise { - const requirements = parseRequirementCounts(await loadFile(resolveGsdRootFile(basePath, "REQUIREMENTS"))); - +function reconcileDiskToDb(basePath: string): MilestoneRow[] { let allMilestones = getAllMilestones(); - - // Incremental disk→DB sync: milestone directories created outside the DB - // write path (via /gsd queue, manual mkdir, or complete-milestone writing the - // next CONTEXT.md) are never inserted by the initial migration guard in - // auto-start.ts because that guard only runs when gsd.db doesn't exist yet. - // Reconcile here so deriveStateFromDb never silently misses queued milestones. - // insertMilestone uses INSERT OR IGNORE, so this is safe to call every time. const dbIdSet = new Set(allMilestones.map(m => m.id)); const diskIds = findMilestoneIds(basePath); let synced = false; @@ -344,11 +336,6 @@ export async function deriveStateFromDb(basePath: string): Promise { } if (synced) allMilestones = getAllMilestones(); - // Disk→DB slice reconciliation (#2533): slices defined in ROADMAP.md but - // missing from the DB cause permanent "No slice eligible" blocks because - // the dependency resolver only sees DB rows. Parse each milestone's roadmap - // and insert any missing slices, checking SUMMARY files to set correct status. - // insertSlice uses INSERT OR IGNORE, so existing rows are never overwritten. for (const mid of diskIds) { if (isGhostMilestone(basePath, mid)) continue; const roadmapPath = resolveMilestoneFile(basePath, mid, "ROADMAP"); @@ -372,94 +359,63 @@ export async function deriveStateFromDb(basePath: string): Promise { depends: s.depends, demo: s.demo, }); } - } - // Reconcile: discover milestones that exist on disk but are missing from - // the DB. This happens when milestones were created before the DB migration - // or were manually added to the filesystem. Without this, disk-only - // milestones are invisible after migration (#2416). - const dbMilestoneIds = new Set(allMilestones.map(m => m.id)); - const diskMilestoneIds = findMilestoneIds(basePath); - for (const diskId of diskMilestoneIds) { - if (!dbMilestoneIds.has(diskId)) { - // Synthesize a minimal MilestoneRow for the disk-only milestone. - // Title and status will be resolved from disk files in the loop below. - allMilestones.push({ - id: diskId, - title: diskId, - status: 'active', - depends_on: [] as string[], - created_at: new Date().toISOString(), - } as MilestoneRow); + // Reconcile stale *existing* slice rows (#3599): a slice row may exist in + // the DB with status "pending" even though disk artifacts (SUMMARY) prove + // completion — the same class of desync that task-level reconciliation + // (further below) already handles. Without this, the dependency resolver + // builds doneSliceIds from stale DB rows and downstream slices stay blocked + // forever with "No slice eligible". + for (const dbSlice of dbSlices) { + if (isStatusDone(dbSlice.status)) continue; + const summaryPath = resolveSliceFile(basePath, mid, dbSlice.id, "SUMMARY"); + if (summaryPath) { + try { + updateSliceStatus(mid, dbSlice.id, "complete"); + logWarning("reconcile", `slice ${mid}/${dbSlice.id} status reconciled from "${dbSlice.status}" to "complete" (#3599)`, { mid, sid: dbSlice.id }); + } catch (e) { + logError("reconcile", `failed to update slice ${dbSlice.id}`, { sid: dbSlice.id, error: (e as Error).message }); + } + } } } - // Re-sort so milestones follow queue order (same as dispatch guard) (#2556) - const customOrder = loadQueueOrder(basePath); - const sortedIds = sortByQueueOrder(allMilestones.map(m => m.id), customOrder); - const byId = new Map(allMilestones.map(m => [m.id, m])); - allMilestones.length = 0; - for (const id of sortedIds) allMilestones.push(byId.get(id)!); + return allMilestones; +} - // Parallel worker isolation: when locked, filter to just the locked milestone - const milestoneLock = process.env.GSD_MILESTONE_LOCK; - const milestones = milestoneLock - ? allMilestones.filter(m => m.id === milestoneLock) - : allMilestones; - - if (milestones.length === 0) { - return { - activeMilestone: null, - activeSlice: null, - activeTask: null, - phase: 'pre-planning', - recentDecisions: [], - blockers: [], - nextAction: 'No milestones found. Run /gsd to create one.', - registry: [], - requirements, - progress: { milestones: { done: 0, total: 0 } }, - }; - } - - // Phase 1: Build completeness set (which milestones count as "done" for dep resolution) +function buildCompletenessSet(basePath: string, milestones: MilestoneRow[]) { const completeMilestoneIds = new Set(); const parkedMilestoneIds = new Set(); for (const m of milestones) { - // Check disk for PARKED flag (not stored in DB status reliably — disk is truth for flag files) const parkedFile = resolveMilestoneFile(basePath, m.id, "PARKED"); if (parkedFile || m.status === 'parked') { parkedMilestoneIds.add(m.id); continue; } - if (isStatusDone(m.status)) { completeMilestoneIds.add(m.id); continue; } - - // Check if milestone has a summary on disk (terminal artifact per #864) const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY"); if (summaryFile) { completeMilestoneIds.add(m.id); continue; } - - // Milestones with all slices done but no SUMMARY file are in - // validating/completing state — intentionally NOT added to - // completeMilestoneIds. The SUMMARY file (checked above) is the - // terminal artifact that proves completion per #864. } + return { completeMilestoneIds, parkedMilestoneIds }; +} - // Phase 2: Build registry and find active milestone +async function buildRegistryAndFindActive( + basePath: string, + milestones: MilestoneRow[], + completeMilestoneIds: Set, + parkedMilestoneIds: Set +) { const registry: MilestoneRegistryEntry[] = []; let activeMilestone: ActiveRef | null = null; let activeMilestoneSlices: SliceRow[] = []; let activeMilestoneFound = false; let activeMilestoneHasDraft = false; - // Queued shells (DB row, no slices, no content files) are deferred during - // the main loop so they don't eclipse real active milestones (#3470). - // If no real active milestone is found, the first deferred shell is promoted. let firstDeferredQueuedShell: { id: string; title: string; deps: string[] } | null = null; for (const m of milestones) { @@ -468,19 +424,14 @@ export async function deriveStateFromDb(basePath: string): Promise { continue; } - // Ghost milestone check: no slices in DB AND no substantive files on disk. - // Skip queued milestones — they are handled by the deferred-shell logic below (#3470). const slices = getMilestoneSlices(m.id); if (slices.length === 0 && !isStatusDone(m.status) && m.status !== 'queued') { - // Check disk for ghost detection if (isGhostMilestone(basePath, m.id)) continue; } const summaryFile = resolveMilestoneFile(basePath, m.id, "SUMMARY"); - // Determine if this milestone is complete if (completeMilestoneIds.has(m.id) || (summaryFile !== null)) { - // Get title from DB or summary let title = stripMilestonePrefix(m.title) || m.id; if (summaryFile && !m.title) { const summaryContent = await loadFile(summaryFile); @@ -489,14 +440,12 @@ export async function deriveStateFromDb(basePath: string): Promise { } } registry.push({ id: m.id, title, status: 'complete' }); - completeMilestoneIds.add(m.id); // ensure it's in the set + completeMilestoneIds.add(m.id); continue; } - // Not complete — determine if it should be active const allSlicesDone = slices.length > 0 && slices.every(s => isStatusDone(s.status)); - // Get title — prefer DB, fall back to context file extraction let title = stripMilestonePrefix(m.title) || m.id; if (title === m.id) { const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT"); @@ -507,7 +456,6 @@ export async function deriveStateFromDb(basePath: string): Promise { } if (!activeMilestoneFound) { - // Check milestone-level dependencies const deps = m.depends_on; const depsUnmet = deps.some(dep => !completeMilestoneIds.has(dep)); @@ -516,11 +464,6 @@ export async function deriveStateFromDb(basePath: string): Promise { continue; } - // Defer queued shell milestones with no substantive content (#3470). - // A queued milestone with no slices and no context/draft file is a - // placeholder that should not block later real active milestones. - // If no real active milestone is found after the loop, the first - // deferred shell is promoted to active (#2921). if (m.status === 'queued' && slices.length === 0) { const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT"); const draftFile = resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT"); @@ -533,14 +476,12 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // Handle all-slices-done case (validating/completing) if (allSlicesDone) { const validationFile = resolveMilestoneFile(basePath, m.id, "VALIDATION"); const validationContent = validationFile ? await loadFile(validationFile) : null; const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; if (!validationTerminal || (validationTerminal && !summaryFile)) { - // Validating or completing — still active activeMilestone = { id: m.id, title }; activeMilestoneSlices = slices; activeMilestoneFound = true; @@ -549,7 +490,6 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // Check for context draft (needs-discussion phase) const contextFile = resolveMilestoneFile(basePath, m.id, "CONTEXT"); const draftFile = resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT"); if (!contextFile && draftFile) activeMilestoneHasDraft = true; @@ -559,13 +499,11 @@ export async function deriveStateFromDb(basePath: string): Promise { activeMilestoneFound = true; registry.push({ id: m.id, title, status: 'active', ...(deps.length > 0 ? { dependsOn: deps } : {}) }); } else { - // After active milestone found — rest are pending const deps = m.depends_on; registry.push({ id: m.id, title, status: 'pending', ...(deps.length > 0 ? { dependsOn: deps } : {}) }); } } - // Promote deferred queued shell if no real active milestone was found (#3470/#2921). if (!activeMilestoneFound && firstDeferredQueuedShell) { const shell = firstDeferredQueuedShell; activeMilestone = { id: shell.id, title: shell.title }; @@ -575,74 +513,264 @@ export async function deriveStateFromDb(basePath: string): Promise { if (entry) entry.status = 'active'; } - const milestoneProgress = { - done: registry.filter(e => e.status === 'complete').length, - total: registry.length, - }; + return { registry, activeMilestone, activeMilestoneSlices, activeMilestoneHasDraft }; +} - // ── No active milestone ────────────────────────────────────────────── - if (!activeMilestone) { - const pendingEntries = registry.filter(e => e.status === 'pending'); - const parkedEntries = registry.filter(e => e.status === 'parked'); +function handleNoActiveMilestone( + registry: MilestoneRegistryEntry[], + requirements: any, + milestoneProgress: { done: number, total: number } +): GSDState { + const pendingEntries = registry.filter(e => e.status === 'pending'); + const parkedEntries = registry.filter(e => e.status === 'parked'); - if (pendingEntries.length > 0) { - const blockerDetails = pendingEntries - .filter(e => e.dependsOn && e.dependsOn.length > 0) - .map(e => `${e.id} is waiting on unmet deps: ${e.dependsOn!.join(', ')}`); - return { - activeMilestone: null, activeSlice: null, activeTask: null, - phase: 'blocked', - recentDecisions: [], blockers: blockerDetails.length > 0 - ? blockerDetails - : ['All remaining milestones are dep-blocked but no deps listed — check CONTEXT.md files'], - nextAction: 'Resolve milestone dependencies before proceeding.', - registry, requirements, - progress: { milestones: milestoneProgress }, - }; - } - - if (parkedEntries.length > 0) { - const parkedIds = parkedEntries.map(e => e.id).join(', '); - return { - activeMilestone: null, activeSlice: null, activeTask: null, - phase: 'pre-planning', - recentDecisions: [], blockers: [], - nextAction: `All remaining milestones are parked (${parkedIds}). Run /gsd unpark or create a new milestone.`, - registry, requirements, - progress: { milestones: milestoneProgress }, - }; - } - - if (registry.length === 0) { - return { - activeMilestone: null, activeSlice: null, activeTask: null, - phase: 'pre-planning', - recentDecisions: [], blockers: [], - nextAction: 'No milestones found. Run /gsd to create one.', - registry: [], requirements, - progress: { milestones: { done: 0, total: 0 } }, - }; - } - - // All milestones complete - const lastEntry = registry[registry.length - 1]; - const activeReqs = requirements.active ?? 0; - const completionNote = activeReqs > 0 - ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.` - : 'All milestones complete.'; + if (pendingEntries.length > 0) { + const blockerDetails = pendingEntries + .filter(e => e.dependsOn && e.dependsOn.length > 0) + .map(e => `${e.id} is waiting on unmet deps: ${e.dependsOn!.join(', ')}`); return { - activeMilestone: null, - lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null, - activeSlice: null, activeTask: null, - phase: 'complete', - recentDecisions: [], blockers: [], - nextAction: completionNote, + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'blocked', + recentDecisions: [], blockers: blockerDetails.length > 0 + ? blockerDetails + : ['All remaining milestones are dep-blocked but no deps listed — check CONTEXT.md files'], + nextAction: 'Resolve milestone dependencies before proceeding.', registry, requirements, progress: { milestones: milestoneProgress }, }; } - // ── Active milestone has no slices or no roadmap ──────────────────── + if (parkedEntries.length > 0) { + const parkedIds = parkedEntries.map(e => e.id).join(', '); + return { + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'pre-planning', + recentDecisions: [], blockers: [], + nextAction: `All remaining milestones are parked (${parkedIds}). Run /gsd unpark or create a new milestone.`, + registry, requirements, + progress: { milestones: milestoneProgress }, + }; + } + + if (registry.length === 0) { + return { + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'pre-planning', + recentDecisions: [], blockers: [], + nextAction: 'No milestones found. Run /gsd to create one.', + registry: [], requirements, + progress: { milestones: { done: 0, total: 0 } }, + }; + } + + const lastEntry = registry[registry.length - 1]; + const activeReqs = requirements.active ?? 0; + const completionNote = activeReqs > 0 + ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.` + : 'All milestones complete.'; + return { + activeMilestone: null, + lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null, + activeSlice: null, activeTask: null, + phase: 'complete', + recentDecisions: [], blockers: [], + nextAction: completionNote, + registry, requirements, + progress: { milestones: milestoneProgress }, + }; +} + +async function handleAllSlicesDone( + basePath: string, + activeMilestone: ActiveRef, + registry: MilestoneRegistryEntry[], + requirements: any, + milestoneProgress: { done: number, total: number }, + sliceProgress: { done: number, total: number } +): Promise { + const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION"); + const validationContent = validationFile ? await loadFile(validationFile) : null; + const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; + const verdict = validationContent ? extractVerdict(validationContent) : undefined; + + if (!validationTerminal || verdict === 'needs-remediation') { + return { + activeMilestone, activeSlice: null, activeTask: null, + phase: 'validating-milestone', + recentDecisions: [], blockers: [], + nextAction: `Validate milestone ${activeMilestone.id} before completion.`, + registry, requirements, + progress: { milestones: milestoneProgress, slices: sliceProgress }, + }; + } + + return { + activeMilestone, activeSlice: null, activeTask: null, + phase: 'completing-milestone', + recentDecisions: [], blockers: [], + nextAction: `All slices complete in ${activeMilestone.id}. Write milestone summary.`, + registry, requirements, + progress: { milestones: milestoneProgress, slices: sliceProgress }, + }; +} + +function resolveSliceDependencies(activeMilestoneSlices: SliceRow[]): { activeSlice: ActiveRef | null, activeSliceRow: SliceRow | null } { + const doneSliceIds = new Set( + activeMilestoneSlices.filter(s => isStatusDone(s.status)).map(s => s.id) + ); + + const sliceLock = process.env.GSD_SLICE_LOCK; + if (sliceLock) { + const lockedSlice = activeMilestoneSlices.find(s => s.id === sliceLock); + if (lockedSlice) { + return { activeSlice: { id: lockedSlice.id, title: lockedSlice.title }, activeSliceRow: lockedSlice }; + } else { + logWarning("state", `GSD_SLICE_LOCK=${sliceLock} not found in active slices — worker has no assigned work`); + return { activeSlice: null, activeSliceRow: null }; + } + } + + for (const s of activeMilestoneSlices) { + if (isStatusDone(s.status)) continue; + if (isDeferredStatus(s.status)) continue; + if (s.depends.every(dep => doneSliceIds.has(dep))) { + return { activeSlice: { id: s.id, title: s.title }, activeSliceRow: s }; + } + } + return { activeSlice: null, activeSliceRow: null }; +} + +async function reconcileSliceTasks( + basePath: string, + milestoneId: string, + sliceId: string, + planFile: string +): Promise { + let tasks = getSliceTasks(milestoneId, sliceId); + + if (tasks.length === 0 && planFile) { + try { + const planContent = await loadFile(planFile); + if (planContent) { + const diskPlan = parsePlan(planContent); + if (diskPlan.tasks.length > 0) { + for (let i = 0; i < diskPlan.tasks.length; i++) { + const t = diskPlan.tasks[i]; + try { + insertTask({ + id: t.id, + sliceId, + milestoneId, + title: t.title, + status: t.done ? 'complete' : 'pending', + sequence: i + 1, + }); + } catch (insertErr) { + logWarning("reconcile", `failed to insert task ${t.id} from plan file: ${insertErr instanceof Error ? insertErr.message : String(insertErr)}`); + } + } + tasks = getSliceTasks(milestoneId, sliceId); + logWarning("reconcile", `imported ${tasks.length} tasks from plan file for ${milestoneId}/${sliceId} — DB was empty (#3600)`, { mid: milestoneId, sid: sliceId }); + } + } + } catch (err) { + logError("reconcile", `plan-file task import failed for ${milestoneId}/${sliceId}: ${err instanceof Error ? err.message : String(err)}`); + } + } + + let reconciled = false; + for (const t of tasks) { + if (isStatusDone(t.status)) continue; + const summaryPath = resolveTaskFile(basePath, milestoneId, sliceId, t.id, "SUMMARY"); + if (summaryPath && existsSync(summaryPath)) { + try { + updateTaskStatus(milestoneId, sliceId, t.id, "complete"); + logWarning("reconcile", `task ${milestoneId}/${sliceId}/${t.id} status reconciled from "${t.status}" to "complete" (#2514)`, { mid: milestoneId, sid: sliceId, tid: t.id }); + reconciled = true; + } catch (e) { + logError("reconcile", `failed to update task ${t.id}`, { tid: t.id, error: (e as Error).message }); + } + } + } + if (reconciled) { + tasks = getSliceTasks(milestoneId, sliceId); + } + return tasks; +} + +async function detectBlockers(basePath: string, milestoneId: string, sliceId: string, tasks: TaskRow[]): Promise { + const completedTasks = tasks.filter(t => isStatusDone(t.status)); + for (const ct of completedTasks) { + if (ct.blocker_discovered) { + return ct.id; + } + const summaryFile = resolveTaskFile(basePath, milestoneId, sliceId, ct.id, "SUMMARY"); + if (!summaryFile) continue; + const summaryContent = await loadFile(summaryFile); + if (!summaryContent) continue; + const summary = parseSummary(summaryContent); + if (summary.frontmatter.blocker_discovered) { + return ct.id; + } + } + return null; +} + +function checkReplanTrigger(basePath: string, milestoneId: string, sliceId: string): boolean { + const sliceRow = getSlice(milestoneId, sliceId); + const dbTriggered = !!sliceRow?.replan_triggered_at; + const diskTriggered = !dbTriggered && + !!resolveSliceFile(basePath, milestoneId, sliceId, "REPLAN-TRIGGER"); + return dbTriggered || diskTriggered; +} + +async function checkInterruptedWork(basePath: string, milestoneId: string, sliceId: string): Promise { + const sDir = resolveSlicePath(basePath, milestoneId, sliceId); + const continueFile = sDir ? resolveSliceFile(basePath, milestoneId, sliceId, "CONTINUE") : null; + return !!(continueFile && await loadFile(continueFile)) || + !!(sDir && await loadFile(join(sDir, "continue.md"))); +} + +export async function deriveStateFromDb(basePath: string): Promise { + const requirements = parseRequirementCounts(await loadFile(resolveGsdRootFile(basePath, "REQUIREMENTS"))); + + let allMilestones = reconcileDiskToDb(basePath); + + const customOrder = loadQueueOrder(basePath); + const sortedIds = sortByQueueOrder(allMilestones.map(m => m.id), customOrder); + const byId = new Map(allMilestones.map(m => [m.id, m])); + allMilestones.length = 0; + for (const id of sortedIds) allMilestones.push(byId.get(id)!); + + const milestoneLock = process.env.GSD_MILESTONE_LOCK; + const milestones = milestoneLock + ? allMilestones.filter(m => m.id === milestoneLock) + : allMilestones; + + if (milestones.length === 0) { + return { + activeMilestone: null, activeSlice: null, activeTask: null, + phase: 'pre-planning', recentDecisions: [], blockers: [], + nextAction: 'No milestones found. Run /gsd to create one.', + registry: [], requirements, + progress: { milestones: { done: 0, total: 0 } }, + }; + } + + const { completeMilestoneIds, parkedMilestoneIds } = buildCompletenessSet(basePath, milestones); + + const registryContext = await buildRegistryAndFindActive(basePath, milestones, completeMilestoneIds, parkedMilestoneIds); + const { registry, activeMilestone, activeMilestoneSlices, activeMilestoneHasDraft } = registryContext; + + const milestoneProgress = { + done: registry.filter(e => e.status === 'complete').length, + total: registry.length, + }; + + if (!activeMilestone) { + return handleNoActiveMilestone(registry, requirements, milestoneProgress); + } + const hasRoadmap = resolveMilestoneFile(basePath, activeMilestone.id, "ROADMAP") !== null; if (activeMilestoneSlices.length === 0) { @@ -659,195 +787,60 @@ export async function deriveStateFromDb(basePath: string): Promise { }; } - // Has roadmap file but zero slices in DB — pre-planning (zero-slice roadmap guard) return { activeMilestone, activeSlice: null, activeTask: null, - phase: 'pre-planning', - recentDecisions: [], blockers: [], + phase: 'pre-planning', recentDecisions: [], blockers: [], nextAction: `Milestone ${activeMilestone.id} has a roadmap but no slices defined. Add slices to the roadmap.`, registry, requirements, - progress: { - milestones: milestoneProgress, - slices: { done: 0, total: 0 }, - }, + progress: { milestones: milestoneProgress, slices: { done: 0, total: 0 } }, }; } - // ── All slices done → validating/completing ───────────────────────── const allSlicesDone = activeMilestoneSlices.every(s => isStatusDone(s.status)); - if (allSlicesDone) { - const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION"); - const validationContent = validationFile ? await loadFile(validationFile) : null; - const validationTerminal = validationContent ? isValidationTerminal(validationContent) : false; - const verdict = validationContent ? extractVerdict(validationContent) : undefined; - const sliceProgress = { - done: activeMilestoneSlices.length, - total: activeMilestoneSlices.length, - }; - - // Force re-validation when verdict is needs-remediation — remediation slices - // may have completed since the stale validation was written (#3596). - if (!validationTerminal || verdict === 'needs-remediation') { - return { - activeMilestone, activeSlice: null, activeTask: null, - phase: 'validating-milestone', - recentDecisions: [], blockers: [], - nextAction: `Validate milestone ${activeMilestone.id} before completion.`, - registry, requirements, - progress: { milestones: milestoneProgress, slices: sliceProgress }, - }; - } - - return { - activeMilestone, activeSlice: null, activeTask: null, - phase: 'completing-milestone', - recentDecisions: [], blockers: [], - nextAction: `All slices complete in ${activeMilestone.id}. Write milestone summary.`, - registry, requirements, - progress: { milestones: milestoneProgress, slices: sliceProgress }, - }; - } - - // ── Find active slice (first incomplete with deps satisfied) ───────── const sliceProgress = { done: activeMilestoneSlices.filter(s => isStatusDone(s.status)).length, total: activeMilestoneSlices.length, }; - const doneSliceIds = new Set( - activeMilestoneSlices.filter(s => isStatusDone(s.status)).map(s => s.id) - ); + if (allSlicesDone) { + return handleAllSlicesDone(basePath, activeMilestone, registry, requirements, milestoneProgress, sliceProgress); + } - let activeSlice: ActiveRef | null = null; - let activeSliceRow: SliceRow | null = null; - - // ── Slice-level parallel worker isolation ───────────────────────────── - // When GSD_SLICE_LOCK is set, this process is a parallel worker scoped - // to a single slice. Override activeSlice to only the locked slice ID. - const sliceLock = process.env.GSD_SLICE_LOCK; - if (sliceLock) { - const lockedSlice = activeMilestoneSlices.find(s => s.id === sliceLock); - if (lockedSlice) { - activeSlice = { id: lockedSlice.id, title: lockedSlice.title }; - activeSliceRow = lockedSlice; - } else { - logWarning("state", `GSD_SLICE_LOCK=${sliceLock} not found in active slices — worker has no assigned work`); - // Don't silently continue — this is a dispatch error + const activeSliceContext = resolveSliceDependencies(activeMilestoneSlices); + if (!activeSliceContext.activeSlice) { + // If locked slice wasn't found, it returns null but logs warning, we need to return 'blocked' + if (process.env.GSD_SLICE_LOCK) { return { activeMilestone, activeSlice: null, activeTask: null, - phase: 'blocked', - recentDecisions: [], blockers: [`GSD_SLICE_LOCK=${sliceLock} not found in active milestone slices`], + phase: 'blocked', recentDecisions: [], blockers: [`GSD_SLICE_LOCK=${process.env.GSD_SLICE_LOCK} not found in active milestone slices`], nextAction: 'Slice lock references a non-existent slice — check orchestrator dispatch.', registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress }, }; } - } else { - for (const s of activeMilestoneSlices) { - if (isStatusDone(s.status)) continue; - // #2661: Skip deferred slices — a decision explicitly deferred this work. - // Without this guard the dispatcher would keep dispatching deferred slices - // because DECISIONS.md is only contextual, not authoritative for dispatch. - if (isDeferredStatus(s.status)) continue; - if (s.depends.every(dep => doneSliceIds.has(dep))) { - activeSlice = { id: s.id, title: s.title }; - activeSliceRow = s; - break; - } - } - } - - if (!activeSlice) { return { activeMilestone, activeSlice: null, activeTask: null, - phase: 'blocked', - recentDecisions: [], blockers: ['No slice eligible — check dependency ordering'], + phase: 'blocked', recentDecisions: [], blockers: ['No slice eligible — check dependency ordering'], nextAction: 'Resolve dependency blockers or plan next slice.', registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress }, }; } + const { activeSlice } = activeSliceContext; - // ── Check for slice plan file on disk ──────────────────────────────── const planFile = resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "PLAN"); if (!planFile) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'planning', - recentDecisions: [], blockers: [], + phase: 'planning', recentDecisions: [], blockers: [], nextAction: `Plan slice ${activeSlice.id} (${activeSlice.title}).`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress }, }; } - // ── Get tasks from DB ──────────────────────────────────────────────── - let tasks = getSliceTasks(activeMilestone.id, activeSlice.id); - - // ── Reconcile missing tasks: plan file has tasks but DB is empty (#3600) ── - // When the planning agent writes S##-PLAN.md with task entries but never - // calls the gsd_plan_slice persistence tool, the DB has zero task rows - // even though the plan file contains valid tasks. Without this reconciliation, - // deriveState returns phase='planning' forever — the dispatcher re-dispatches - // plan-slice in an infinite loop. - if (tasks.length === 0 && planFile) { - try { - const planContent = await loadFile(planFile); - if (planContent) { - const diskPlan = parsePlan(planContent); - if (diskPlan.tasks.length > 0) { - for (let i = 0; i < diskPlan.tasks.length; i++) { - const t = diskPlan.tasks[i]; - try { - insertTask({ - id: t.id, - sliceId: activeSlice.id, - milestoneId: activeMilestone.id, - title: t.title, - status: t.done ? 'complete' : 'pending', - sequence: i + 1, - }); - } catch (insertErr) { - // Task may already exist from a partial previous import — skip - logWarning("reconcile", `failed to insert task ${t.id} from plan file: ${insertErr instanceof Error ? insertErr.message : String(insertErr)}`); - } - } - tasks = getSliceTasks(activeMilestone.id, activeSlice.id); - logWarning("reconcile", `imported ${tasks.length} tasks from plan file for ${activeMilestone.id}/${activeSlice.id} — DB was empty (#3600)`, { mid: activeMilestone.id, sid: activeSlice.id }); - } - } - } catch (err) { - // Non-fatal — fall through to the existing "empty plan" logic - logError("reconcile", `plan-file task import failed for ${activeMilestone.id}/${activeSlice.id}: ${err instanceof Error ? err.message : String(err)}`); - } - } - - // ── Reconcile stale task status (#2514) ────────────────────────────── - // When a session disconnects after the agent writes SUMMARY + VERIFY - // artifacts but before postUnitPostVerification updates the DB, tasks - // remain "pending" in the DB despite being complete on disk. Without - // reconciliation, deriveState keeps returning the stale task as active, - // causing the dispatcher to re-dispatch the same completed task forever. - let reconciled = false; - for (const t of tasks) { - if (isStatusDone(t.status)) continue; - const summaryPath = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, t.id, "SUMMARY"); - if (summaryPath && existsSync(summaryPath)) { - try { - updateTaskStatus(activeMilestone.id, activeSlice.id, t.id, "complete"); - logWarning("reconcile", `task ${activeMilestone.id}/${activeSlice.id}/${t.id} status reconciled from "${t.status}" to "complete" (#2514)`, { mid: activeMilestone.id, sid: activeSlice.id, tid: t.id }); - reconciled = true; - } catch (e) { - // DB write failed — continue with stale status rather than crash - logError("reconcile", `failed to update task ${t.id}`, { tid: t.id, error: (e as Error).message }); - } - } - } - // Re-fetch tasks if any were reconciled so downstream logic sees fresh status - if (reconciled) { - tasks = getSliceTasks(activeMilestone.id, activeSlice.id); - } - + const tasks = await reconcileSliceTasks(basePath, activeMilestone.id, activeSlice.id, planFile); + const taskProgress = { done: tasks.filter(t => isStatusDone(t.status)).length, total: tasks.length, @@ -856,23 +849,19 @@ export async function deriveStateFromDb(basePath: string): Promise { const activeTaskRow = tasks.find(t => !isStatusDone(t.status)); if (!activeTaskRow && tasks.length > 0) { - // All tasks done but slice not marked complete → summarizing return { activeMilestone, activeSlice, activeTask: null, - phase: 'summarizing', - recentDecisions: [], blockers: [], + phase: 'summarizing', recentDecisions: [], blockers: [], nextAction: `All tasks done in ${activeSlice.id}. Write slice summary and complete slice.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, }; } - // Empty plan — no tasks defined yet if (!activeTaskRow) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'planning', - recentDecisions: [], blockers: [], + phase: 'planning', recentDecisions: [], blockers: [], nextAction: `Slice ${activeSlice.id} has a plan file but no tasks. Add tasks to the plan.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, @@ -881,15 +870,13 @@ export async function deriveStateFromDb(basePath: string): Promise { const activeTask: ActiveRef = { id: activeTaskRow.id, title: activeTaskRow.title }; - // ── Task plan file check (#909) ───────────────────────────────────── const tasksDir = resolveTasksDir(basePath, activeMilestone.id, activeSlice.id); if (tasksDir && existsSync(tasksDir) && tasks.length > 0) { const allFiles = readdirSync(tasksDir).filter(f => f.endsWith(".md")); if (allFiles.length === 0) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'planning', - recentDecisions: [], blockers: [], + phase: 'planning', recentDecisions: [], blockers: [], nextAction: `Task plan files missing for ${activeSlice.id}. Run plan-slice to generate task plans.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, @@ -898,50 +885,34 @@ export async function deriveStateFromDb(basePath: string): Promise { } // ── Quality gate evaluation check ────────────────────────────────── - // If slice-scoped gates (Q3/Q4) are still pending, pause before execution - // so the gate-evaluate dispatch rule can run parallel sub-agents. + // Pause before execution only when gates owned by the `gate-evaluate` + // turn (Q3/Q4) are still pending. Q8 is also `scope:"slice"` but is + // owned by `complete-slice`, so it must NOT block the evaluating-gates + // phase — otherwise auto-loop stalls forever waiting for a gate that + // this turn never evaluates. See gate-registry.ts for the ownership map. // Slices with zero gate rows (pre-feature or simple) skip straight through. - const pendingGateCount = getPendingSliceGateCount(activeMilestone.id, activeSlice.id); + const pendingGateCount = getPendingGateCountForTurn( + activeMilestone.id, + activeSlice.id, + "gate-evaluate", + ); if (pendingGateCount > 0) { return { activeMilestone, activeSlice, activeTask: null, - phase: 'evaluating-gates', - recentDecisions: [], blockers: [], + phase: 'evaluating-gates', recentDecisions: [], blockers: [], nextAction: `Evaluate ${pendingGateCount} quality gate(s) for ${activeSlice.id} before execution.`, registry, requirements, progress: { milestones: milestoneProgress, slices: sliceProgress, tasks: taskProgress }, }; } - // ── Blocker detection: check completed tasks for blocker_discovered ── - const completedTasks = tasks.filter(t => isStatusDone(t.status)); - let blockerTaskId: string | null = null; - for (const ct of completedTasks) { - if (ct.blocker_discovered) { - blockerTaskId = ct.id; - break; - } - // Also check disk summary in case DB doesn't have the flag - const summaryFile = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, ct.id, "SUMMARY"); - if (!summaryFile) continue; - const summaryContent = await loadFile(summaryFile); - if (!summaryContent) continue; - const summary = parseSummary(summaryContent); - if (summary.frontmatter.blocker_discovered) { - blockerTaskId = ct.id; - break; - } - } - + const blockerTaskId = await detectBlockers(basePath, activeMilestone.id, activeSlice.id, tasks); if (blockerTaskId) { - // Loop protection: if replan_history has entries for this slice, a replan - // was already performed — don't re-enter replanning phase. const replanHistory = getReplanHistory(activeMilestone.id, activeSlice.id); if (replanHistory.length === 0) { return { activeMilestone, activeSlice, activeTask, - phase: 'replanning-slice', - recentDecisions: [], + phase: 'replanning-slice', recentDecisions: [], blockers: [`Task ${blockerTaskId} discovered a blocker requiring slice replan`], nextAction: `Task ${blockerTaskId} reported blocker_discovered. Replan slice ${activeSlice.id} before continuing.`, activeWorkspace: undefined, @@ -951,22 +922,14 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // ── REPLAN-TRIGGER detection ───────────────────────────────────────── if (!blockerTaskId) { - const sliceRow = getSlice(activeMilestone.id, activeSlice.id); - // Check DB column first, fall back to disk trigger file when DB write - // was best-effort and failed (triage-resolution.ts dual-write gap). - const dbTriggered = !!sliceRow?.replan_triggered_at; - const diskTriggered = !dbTriggered && - !!resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "REPLAN-TRIGGER"); - if (dbTriggered || diskTriggered) { - // Loop protection: if replan_history has entries, replan was already done + const isTriggered = checkReplanTrigger(basePath, activeMilestone.id, activeSlice.id); + if (isTriggered) { const replanHistory = getReplanHistory(activeMilestone.id, activeSlice.id); if (replanHistory.length === 0) { return { activeMilestone, activeSlice, activeTask, - phase: 'replanning-slice', - recentDecisions: [], + phase: 'replanning-slice', recentDecisions: [], blockers: ['Triage replan trigger detected — slice replan required'], nextAction: `Triage replan triggered for slice ${activeSlice.id}. Replan before continuing.`, activeWorkspace: undefined, @@ -977,16 +940,11 @@ export async function deriveStateFromDb(basePath: string): Promise { } } - // ── Check for interrupted work ─────────────────────────────────────── - const sDir = resolveSlicePath(basePath, activeMilestone.id, activeSlice.id); - const continueFile = sDir ? resolveSliceFile(basePath, activeMilestone.id, activeSlice.id, "CONTINUE") : null; - const hasInterrupted = !!(continueFile && await loadFile(continueFile)) || - !!(sDir && await loadFile(join(sDir, "continue.md"))); + const hasInterrupted = await checkInterruptedWork(basePath, activeMilestone.id, activeSlice.id); return { activeMilestone, activeSlice, activeTask, - phase: 'executing', - recentDecisions: [], blockers: [], + phase: 'executing', recentDecisions: [], blockers: [], nextAction: hasInterrupted ? `Resume interrupted work on ${activeTask.id}: ${activeTask.title} in slice ${activeSlice.id}. Read continue.md first.` : `Execute ${activeTask.id}: ${activeTask.title} in slice ${activeSlice.id}.`, @@ -995,11 +953,14 @@ export async function deriveStateFromDb(basePath: string): Promise { }; } + // LEGACY: Filesystem-based state derivation for unmigrated projects. // DB-backed projects use deriveStateFromDb() above. Target: extract to // state-legacy.ts when all projects are DB-backed. export async function _deriveStateImpl(basePath: string): Promise { - const milestoneIds = findMilestoneIds(basePath); + const diskIds = findMilestoneIds(basePath); + const customOrder = loadQueueOrder(basePath); + const milestoneIds = sortByQueueOrder(diskIds, customOrder); // ── Parallel worker isolation ────────────────────────────────────────── // When GSD_MILESTONE_LOCK is set, this process is a parallel worker diff --git a/src/resources/extensions/gsd/templates/context-enhanced.md b/src/resources/extensions/gsd/templates/context-enhanced.md deleted file mode 100644 index 503ffaf17..000000000 --- a/src/resources/extensions/gsd/templates/context-enhanced.md +++ /dev/null @@ -1,138 +0,0 @@ -# {{milestoneId}}: {{milestoneTitle}} - -**Gathered:** {{date}} -**Status:** Ready for planning - -## Project Description - -{{description}} - -## Why This Milestone - -{{whatProblemThisSolves_AND_whyNow}} - -## Codebase Brief - -### Technology Stack - -{{techStack}} - -### Key Modules - -{{keyModules}} - -### Patterns in Use - -{{patternsInUse}} - -## User-Visible Outcome - -### When this milestone is complete, the user can: - -- {{literalUserActionInRealEnvironment}} -- {{literalUserActionInRealEnvironment}} - -### Entry point / environment - -- Entry point: {{CLI command / URL / bot / extension / service / workflow}} -- Environment: {{local dev / browser / mobile / launchd / CI / production-like}} -- Live dependencies involved: {{telegram / database / webhook / rpc subprocess / none}} - -## Completion Class - -- Contract complete means: {{what can be proven by tests / fixtures / artifacts}} -- Integration complete means: {{what must work across real subsystems}} -- Operational complete means: {{what must work under real lifecycle conditions, or none}} - -## Architectural Decisions - -### {{decisionTitle}} - -**Decision:** {{decisionStatement}} - -**Rationale:** {{rationale}} - -**Evidence:** {{evidence}} - -**Alternatives Considered:** -- {{alternative1}} — {{whyNotChosen1}} -- {{alternative2}} — {{whyNotChosen2}} - ---- - -> Add additional decisions as separate `### Decision Title` blocks following the same structure above. - -## Interface Contracts - -{{interfaceContracts}} - -> Document API boundaries, function signatures, data shapes, or protocol agreements that must be honored. Leave blank or remove if not applicable to this milestone. - -## Error Handling Strategy - -{{errorHandlingStrategy}} - -> Describe the approach for handling failures, edge cases, and error propagation. Include retry policies, fallback behaviors, and user-facing error messages where relevant. - -## Final Integrated Acceptance - -To call this milestone complete, we must prove: - -- {{one real end-to-end scenario}} -- {{one real end-to-end scenario}} -- {{what cannot be simulated if this milestone is to be considered truly done}} - -## Testing Requirements - -{{testingRequirements}} - -> Specify test types (unit, integration, e2e), coverage expectations, and any specific test scenarios that must pass. - -## Acceptance Criteria - -{{acceptanceCriteria}} - -> Per-slice acceptance criteria gathered during discussion. Each slice should have clear, testable criteria. - -## Risks and Unknowns - -- {{riskOrUnknown}} — {{whyItMatters}} - -## Existing Codebase / Prior Art - -- `{{fileOrModule}}` — {{howItRelates}} -- `{{fileOrModule}}` — {{howItRelates}} - -> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution. - -## Relevant Requirements - -- {{requirementId}} — {{howThisMilestoneAdvancesIt}} - -## Scope - -### In Scope - -- {{inScopeItem}} - -### Out of Scope / Non-Goals - -- {{outOfScopeItem}} - -## Technical Constraints - -- {{constraint}} - -## Integration Points - -- {{systemOrService}} — {{howThisMilestoneInteractsWithIt}} - -## Ecosystem Notes - -{{ecosystemNotes}} - -> Research findings, best practices, known issues, and relevant external documentation discovered during preparation. - -## Open Questions - -- {{question}} — {{currentThinking}} diff --git a/src/resources/extensions/gsd/templates/context.md b/src/resources/extensions/gsd/templates/context.md index 3e19bb788..0111e7c83 100644 --- a/src/resources/extensions/gsd/templates/context.md +++ b/src/resources/extensions/gsd/templates/context.md @@ -38,6 +38,28 @@ To call this milestone complete, we must prove: - {{one real end-to-end scenario}} - {{what cannot be simulated if this milestone is to be considered truly done}} +## Architectural Decisions + +### {{decisionTitle}} + +**Decision:** {{decisionStatement}} + +**Rationale:** {{rationale}} + +**Alternatives Considered:** +- {{alternative}} — {{whyNotChosen}} + +--- + +> Add additional decisions as separate `### Decision Title` blocks following the same structure above. +> See `.gsd/DECISIONS.md` for the full append-only register of all project decisions. + +## Error Handling Strategy + +{{errorHandlingStrategy}} + +> Describe the approach for handling failures, edge cases, and error propagation. Include retry policies, fallback behaviors, and user-facing error messages where relevant. + ## Risks and Unknowns - {{riskOrUnknown}} — {{whyItMatters}} @@ -47,8 +69,6 @@ To call this milestone complete, we must prove: - `{{fileOrModule}}` — {{howItRelates}} - `{{fileOrModule}}` — {{howItRelates}} -> See `.gsd/DECISIONS.md` for all architectural and pattern decisions — it is an append-only register; read it during planning, append to it during execution. - ## Relevant Requirements - {{requirementId}} — {{howThisMilestoneAdvancesIt}} @@ -71,6 +91,18 @@ To call this milestone complete, we must prove: - {{systemOrService}} — {{howThisMilestoneInteractsWithIt}} +## Testing Requirements + +{{testingRequirements}} + +> Specify test types (unit, integration, e2e), coverage expectations, and specific test scenarios that must pass. + +## Acceptance Criteria + +{{acceptanceCriteria}} + +> Per-slice acceptance criteria gathered during discussion. Each slice should have clear, testable criteria. + ## Open Questions - {{question}} — {{currentThinking}} diff --git a/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts b/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts deleted file mode 100644 index 66c24a082..000000000 --- a/src/resources/extensions/gsd/tests/adversarial-review-fixes.test.ts +++ /dev/null @@ -1,223 +0,0 @@ -/** - * Tests for adversarial review fixes from PR #3602. - * - * These tests verify the fixes for: - * 1. Cross-session state leak in lastPreparationResult (HIGH) - * 2. Invalid regex anchor \z in prompt-validation.ts (HIGH) - * 3. Consecutive error counter in agent-loop.ts (MEDIUM) — UPSTREAM CODE, NOT MODIFIED - */ - -import { describe, test, beforeEach, afterEach } from "node:test"; -import assert from "node:assert/strict"; -import { mkdirSync, writeFileSync, rmSync } from "node:fs"; -import { join } from "node:path"; -import { tmpdir } from "node:os"; - -import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts"; -import { validateEnhancedContext } from "../prompt-validation.ts"; - -// ─── Test Helpers ─────────────────────────────────────────────────────────────── - -function makeTempDir(prefix: string): string { - const dir = join( - tmpdir(), - `gsd-adversarial-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, - ); - mkdirSync(dir, { recursive: true }); - return dir; -} - -function cleanup(dir: string): void { - try { - rmSync(dir, { recursive: true, force: true }); - } catch { - // best-effort - } -} - -// ─── Fix 1: Cross-session state leak in lastPreparationResult ──────────────────── - -describe("Fix #1 — Cross-session state leak (lastPreparationResult)", () => { - beforeEach(() => { - clearPreparationResult(); - }); - - afterEach(() => { - clearPreparationResult(); - }); - - test("clearPreparationResult sets lastPreparationResult to null", () => { - // First, verify the getter returns null after clear - clearPreparationResult(); - const result = getLastPreparationResult(); - assert.equal(result, null, "lastPreparationResult should be null after clear"); - }); - - test("getLastPreparationResult returns null initially", () => { - clearPreparationResult(); - const result = getLastPreparationResult(); - assert.equal(result, null, "should return null when no preparation has run"); - }); - - // Note: The actual test that prepareAndBuildDiscussPrompt clears the result - // on entry requires mocking ExtensionCommandContext which is complex. - // The fix is verified by code inspection and integration tests. - // The key behavior is: - // 1. lastPreparationResult = null at the start of prepareAndBuildDiscussPrompt - // 2. If preparation throws, lastPreparationResult stays null - // 3. If discuss_preparation is false, lastPreparationResult stays null -}); - -// ─── Fix 2: Invalid regex anchor \z in prompt-validation.ts ────────────────────── - -describe("Fix #2 — Invalid regex anchor (prompt-validation.ts)", () => { - test("validates content with Architectural Decisions at end of file", () => { - // This was the bug: \z is PCRE/Ruby, not JS. JS treated it as literal 'z'. - // The section extraction would fail when Architectural Decisions was the - // last section (no subsequent ## heading). - const contentWithDecisionsAtEnd = ` -# M001: Test Milestone - -## Why This Milestone - -This is why. - -## Acceptance Criteria - -- Criterion 1 - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript -**Rationale:** Type safety -`; - - const result = validateEnhancedContext(contentWithDecisionsAtEnd); - assert.equal(result.valid, true, "should validate content with decisions at end"); - assert.equal(result.missing.length, 0, "should have no missing sections"); - }); - - test("validates content with Architectural Decisions followed by another section", () => { - const contentWithDecisionsInMiddle = ` -# M001: Test Milestone - -## Why This Milestone - -This is why. - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentWithDecisionsInMiddle); - assert.equal(result.valid, true, "should validate content with decisions in middle"); - }); - - test("detects missing decision entry when section is empty", () => { - const contentEmptyDecisions = ` -# M001: Test Milestone - -## Why This Milestone - -This is why. - -## Architectural Decisions - -(No decisions yet) - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentEmptyDecisions); - assert.equal(result.valid, false, "should fail when decisions section has no entries"); - assert.ok( - result.missing.some((m) => m.includes("decision entry")), - "should report missing decision entry", - ); - }); - - test("accepts inline **Decision format", () => { - const contentInlineDecision = ` -## Why This Milestone - -Test - -## Architectural Decisions - -**Decision:** Use React - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentInlineDecision); - assert.equal(result.valid, true, "should accept **Decision format"); - }); - - test("accepts ### subsection format", () => { - const contentSubsectionDecision = ` -## Why This Milestone - -Test - -## Architectural Decisions - -### Database Choice - -We chose SQLite. - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentSubsectionDecision); - assert.equal(result.valid, true, "should accept ### subsection format"); - }); - - test("handles edge case: Architectural Decisions heading without space before content", () => { - const contentNoSpace = `## Why This Milestone -Test -## Architectural Decisions -### Decision 1 -Content here -## Acceptance Criteria -- Done`; - - const result = validateEnhancedContext(contentNoSpace); - assert.equal(result.valid, true, "should handle content without extra spacing"); - }); -}); - -// ─── Fix 3: Consecutive error counter (agent-loop.ts) ──────────────────────────── - -describe("Fix #3 — Consecutive error counter (UPSTREAM)", () => { - test("NOTE: agent-loop.ts is upstream code that was not modified", () => { - // This finding from the adversarial review relates to upstream behavior - // in packages/pi-agent-core/src/agent-loop.ts. - // - // The consecutiveAllToolErrorTurns counter logic was added in PR #3301 - // and refined in PR #3618 by upstream contributors. These PRs fix - // issues with: - // - Schema overload detection counting bash exit codes as failures - // - The counter not resetting properly on successful turns - // - // Since this is upstream code (part of pi-agent-core, not gsd extension), - // we do not modify it here. The fix should be coordinated with upstream. - // - // See: packages/pi-agent-core/src/agent-loop.ts lines 191, 298-325 - assert.ok(true, "Documented as upstream behavior — no changes made"); - }); -}); diff --git a/src/resources/extensions/gsd/tests/auto-paused-ui-cleanup.test.ts b/src/resources/extensions/gsd/tests/auto-paused-ui-cleanup.test.ts new file mode 100644 index 000000000..9ce54a61e --- /dev/null +++ b/src/resources/extensions/gsd/tests/auto-paused-ui-cleanup.test.ts @@ -0,0 +1,27 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const autoSource = readFileSync(join(__dirname, "..", "auto.ts"), "utf-8"); + +test("#3370: cleanupAfterLoopExit preserves paused auto badge after provider pause", () => { + const cleanupIdx = autoSource.indexOf("function cleanupAfterLoopExit"); + assert.ok(cleanupIdx > -1, "auto.ts should define cleanupAfterLoopExit"); + + const dispatchIdx = autoSource.indexOf("export async function dispatchHookUnit", cleanupIdx); + assert.ok(dispatchIdx > cleanupIdx, "cleanupAfterLoopExit body should be bounded by the next export"); + + const cleanupBody = autoSource.slice(cleanupIdx, dispatchIdx); + const pausedGuardIdx = cleanupBody.indexOf("if (!s.paused) {"); + const clearStatusIdx = cleanupBody.indexOf('ctx.ui.setStatus("gsd-auto", undefined);'); + + assert.ok(pausedGuardIdx > -1, "loop-exit cleanup must guard UI clearing when auto is paused"); + assert.ok(clearStatusIdx > pausedGuardIdx, "status clearing must live behind the paused guard"); + assert.ok( + autoSource.includes('ctx?.ui.setStatus("gsd-auto", "paused");'), + "pauseAuto must still set the paused badge for transient provider pauses", + ); +}); diff --git a/src/resources/extensions/gsd/tests/auto-start-model-capture.test.ts b/src/resources/extensions/gsd/tests/auto-start-model-capture.test.ts index 2ffb5bf96..0a455cba3 100644 --- a/src/resources/extensions/gsd/tests/auto-start-model-capture.test.ts +++ b/src/resources/extensions/gsd/tests/auto-start-model-capture.test.ts @@ -7,9 +7,8 @@ const sourcePath = join(import.meta.dirname, "..", "auto-start.ts"); const source = readFileSync(sourcePath, "utf-8"); test("bootstrapAutoSession snapshots ctx.model before guided-flow entry (#2829)", () => { - // #3517 changed the snapshot to prefer GSD preferences, but the ordering - // guarantee still holds: the snapshot must be built before guided-flow. - const snapshotIdx = source.indexOf("const startModelSnapshot = preferredModel"); + // The snapshot ordering guarantee still holds: build snapshot before guided-flow. + const snapshotIdx = source.indexOf("const startModelSnapshot = manualSessionOverride"); assert.ok(snapshotIdx > -1, "auto-start.ts should snapshot model at bootstrap start"); const firstDiscussIdx = source.indexOf('await showSmartEntry(ctx, pi, base, { step: requestedStepMode });'); @@ -29,8 +28,11 @@ test("bootstrapAutoSession restores autoModeStartModel from the early snapshot ( assert.ok(snapshotRefIdx > -1, "autoModeStartModel should be restored from startModelSnapshot"); }); -test("bootstrapAutoSession prefers GSD PREFERENCES.md over settings.json for start model (#3517)", () => { - // resolveDefaultSessionModel() should be called before the snapshot is built +test("bootstrapAutoSession checks manual session override before preferences", () => { + const manualIdx = source.indexOf("const manualSessionOverride = getSessionModelOverride("); + assert.ok(manualIdx > -1, "auto-start.ts should read session model override first"); + + // resolveDefaultSessionModel() should still be called for fallback behavior const preferredIdx = source.indexOf("const preferredModel = resolveDefaultSessionModel("); assert.ok(preferredIdx > -1, "auto-start.ts should call resolveDefaultSessionModel()"); @@ -38,11 +40,25 @@ test("bootstrapAutoSession prefers GSD PREFERENCES.md over settings.json for sta const withProviderIdx = source.indexOf("resolveDefaultSessionModel(ctx.model?.provider)"); assert.ok(withProviderIdx > -1, "auto-start.ts should pass ctx.model?.provider for bare ID resolution"); - const snapshotIdx = source.indexOf("const startModelSnapshot = preferredModel"); - assert.ok(snapshotIdx > -1, "startModelSnapshot should use preferredModel when available"); + const snapshotIdx = source.indexOf("const startModelSnapshot = manualSessionOverride"); + assert.ok(snapshotIdx > -1, "startModelSnapshot should prefer manual session override"); assert.ok( - preferredIdx < snapshotIdx, - "resolveDefaultSessionModel() must be called before building startModelSnapshot", + manualIdx < snapshotIdx && preferredIdx < snapshotIdx, + "manual override and preference fallback must be resolved before building startModelSnapshot", ); }); + +test("bootstrapAutoSession validates preferred model against live registry auth (#unconfigured-models)", () => { + // The raw PREFERENCES.md value must be validated against getAvailable() + // before being captured as the snapshot, so an unconfigured provider + // (no API key / OAuth) can't become autoModeStartModel. + const validationIdx = source.indexOf("ctx.modelRegistry.getAvailable()"); + assert.ok(validationIdx > -1, "auto-start.ts should validate preferred model against getAvailable()"); + + const resolveModelIdIdx = source.indexOf("resolveModelId"); + assert.ok(resolveModelIdIdx > -1, "auto-start.ts should resolve preferred model against the registry"); + + const warningIdx = source.indexOf("is not configured; falling back to session default"); + assert.ok(warningIdx > -1, "auto-start.ts should warn when preferred model is unconfigured"); +}); diff --git a/src/resources/extensions/gsd/tests/auto-start-worktree-db-path.test.ts b/src/resources/extensions/gsd/tests/auto-start-worktree-db-path.test.ts new file mode 100644 index 000000000..f32bf41fb --- /dev/null +++ b/src/resources/extensions/gsd/tests/auto-start-worktree-db-path.test.ts @@ -0,0 +1,28 @@ +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +import { createTestContext } from "./test-helpers.ts"; + +const { assertTrue, report } = createTestContext(); + +const srcPath = join(import.meta.dirname, "..", "auto-start.ts"); +const src = readFileSync(srcPath, "utf-8"); + +console.log("\n=== #3822: worktree bootstrap uses project DB path ==="); + +const dbLifecycleIdx = src.indexOf("// ── DB lifecycle ──"); +assertTrue(dbLifecycleIdx > 0, "auto-start.ts has a DB lifecycle section"); + +const dbLifecycleRegion = dbLifecycleIdx > 0 ? src.slice(dbLifecycleIdx, dbLifecycleIdx + 600) : ""; + +assertTrue( + dbLifecycleRegion.includes("const gsdDbPath = resolveProjectRootDbPath(s.basePath);"), + "DB lifecycle resolves the project-root DB path after worktree entry (#3822)", +); + +assertTrue( + !dbLifecycleRegion.includes('join(s.basePath, ".gsd", "gsd.db")'), + "DB lifecycle no longer derives gsd.db directly from the worktree path (#3822)", +); + +report(); diff --git a/src/resources/extensions/gsd/tests/block-db-writes.test.ts b/src/resources/extensions/gsd/tests/block-db-writes.test.ts new file mode 100644 index 000000000..72708fb7c --- /dev/null +++ b/src/resources/extensions/gsd/tests/block-db-writes.test.ts @@ -0,0 +1,63 @@ +/** + * Regression test for #3674 — block direct writes to gsd.db + * + * When gsd_complete_task was unavailable, agents fell back to shell-based + * sqlite3 writes, corrupting the WAL-backed database. The fix extends + * write-intercept to block file writes and bash commands targeting gsd.db. + */ + +import { describe, test } from 'node:test'; +import assert from 'node:assert/strict'; +import { isBlockedStateFile, isBashWriteToStateFile } from '../write-intercept.ts'; + +describe('isBlockedStateFile blocks gsd.db paths (#3674)', () => { + test('blocks .gsd/gsd.db', () => { + assert.ok(isBlockedStateFile('/project/.gsd/gsd.db')); + }); + + test('blocks .gsd/gsd.db-wal', () => { + assert.ok(isBlockedStateFile('/project/.gsd/gsd.db-wal')); + }); + + test('blocks .gsd/gsd.db-shm', () => { + assert.ok(isBlockedStateFile('/project/.gsd/gsd.db-shm')); + }); + + test('blocks resolved symlink path under .gsd/projects/', () => { + assert.ok(isBlockedStateFile('/home/user/.gsd/projects/myproj/gsd.db')); + }); + + test('still blocks STATE.md', () => { + assert.ok(isBlockedStateFile('/project/.gsd/STATE.md')); + }); + + test('does not block other .gsd files', () => { + assert.ok(!isBlockedStateFile('/project/.gsd/DECISIONS.md')); + }); +}); + +describe('isBashWriteToStateFile blocks DB shell commands (#3674)', () => { + test('blocks sqlite3 targeting gsd.db', () => { + assert.ok(isBashWriteToStateFile('sqlite3 .gsd/gsd.db "INSERT INTO ..."')); + }); + + test('blocks better-sqlite3 targeting gsd.db', () => { + assert.ok(isBashWriteToStateFile('node -e "require(\'better-sqlite3\')(\'.gsd/gsd.db\')"')); + }); + + test('blocks shell redirect to gsd.db', () => { + assert.ok(isBashWriteToStateFile('echo data > .gsd/gsd.db')); + }); + + test('blocks cp to gsd.db', () => { + assert.ok(isBashWriteToStateFile('cp backup.db .gsd/gsd.db')); + }); + + test('blocks mv to gsd.db', () => { + assert.ok(isBashWriteToStateFile('mv temp.db .gsd/gsd.db')); + }); + + test('does not block reading gsd.db with cat', () => { + assert.ok(!isBashWriteToStateFile('cat .gsd/gsd.db')); + }); +}); diff --git a/src/resources/extensions/gsd/tests/bootstrap-derive-state-db-open.test.ts b/src/resources/extensions/gsd/tests/bootstrap-derive-state-db-open.test.ts new file mode 100644 index 000000000..5c2d18cfc --- /dev/null +++ b/src/resources/extensions/gsd/tests/bootstrap-derive-state-db-open.test.ts @@ -0,0 +1,39 @@ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const systemContextSrc = readFileSync( + join(import.meta.dirname, "..", "bootstrap", "system-context.ts"), + "utf-8", +); +const registerHooksSrc = readFileSync( + join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"), + "utf-8", +); + +describe("bootstrap deriveState DB guards (#3844)", () => { + test("system-context opens DB before deriveState in resume flows", () => { + const helperIdx = systemContextSrc.indexOf("const ensureStateDbOpen = async () => {"); + const firstDeriveIdx = systemContextSrc.indexOf("const state = await deriveState(basePath);"); + assert.ok(helperIdx > -1, "system-context should define a DB-open helper for deriveState callers"); + assert.ok(firstDeriveIdx > -1, "system-context should still derive state for resume flows"); + assert.ok(helperIdx < firstDeriveIdx, "system-context should prepare DB opening before deriveState resume calls"); + assert.match( + systemContextSrc, + /await ensureStateDbOpen\(\);\s*\n\s*const state = await deriveState\(basePath\);/g, + "system-context resume flows should open DB before deriveState", + ); + }); + + test("register-hooks opens DB before deriveState in session_before_compact", () => { + const compactIdx = registerHooksSrc.indexOf('pi.on("session_before_compact"'); + assert.ok(compactIdx > -1, "register-hooks should define session_before_compact"); + const compactSection = registerHooksSrc.slice(compactIdx, compactIdx + 1600); + const ensureIdx = compactSection.indexOf("ensureDbOpen()"); + const deriveIdx = compactSection.indexOf("deriveState(basePath)"); + assert.ok(ensureIdx > -1, "session_before_compact should call ensureDbOpen()"); + assert.ok(deriveIdx > -1, "session_before_compact should derive state"); + assert.ok(ensureIdx < deriveIdx, "session_before_compact should open DB before deriveState"); + }); +}); diff --git a/src/resources/extensions/gsd/tests/capability-router.test.ts b/src/resources/extensions/gsd/tests/capability-router.test.ts index 751fc6e11..8e185b508 100644 --- a/src/resources/extensions/gsd/tests/capability-router.test.ts +++ b/src/resources/extensions/gsd/tests/capability-router.test.ts @@ -11,6 +11,7 @@ import { getEligibleModels, resolveModelForComplexity, MODEL_CAPABILITY_PROFILES, + MODEL_CAPABILITY_TIER, BASE_REQUIREMENTS, defaultRoutingConfig, } from "../model-router.js"; @@ -125,13 +126,9 @@ describe("computeTaskRequirements", () => { // ─── MODEL_CAPABILITY_PROFILES ─────────────────────────────────────────────── describe("MODEL_CAPABILITY_PROFILES", () => { - test("contains all 9 required models", () => { - const required = [ - "claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5", - "gpt-4o", "gpt-4o-mini", "gemini-2.5-pro", "gemini-2.0-flash", - "deepseek-chat", "o3", - ]; - for (const model of required) { + test("contains profiles for all tier-mapped models", () => { + const tierModels = Object.keys(MODEL_CAPABILITY_TIER); + for (const model of tierModels) { assert.ok(MODEL_CAPABILITY_PROFILES[model], `Missing profile for ${model}`); } }); @@ -345,3 +342,30 @@ describe("RoutingDecision.selectionMethod", () => { assert.equal(result.selectionMethod, "tier-only"); }); }); + +// ─── ADR-004: Profile Completeness Lint ───────────────────────────────────── +// Every model in MODEL_CAPABILITY_TIER must have an entry in +// MODEL_CAPABILITY_PROFILES. This prevents profile staleness as new models +// are added to the tier map without corresponding capability data. + +describe("profile completeness (ADR-004 lint)", () => { + test("every model in MODEL_CAPABILITY_TIER has a MODEL_CAPABILITY_PROFILES entry", () => { + const tierModels = Object.keys(MODEL_CAPABILITY_TIER); + const missing = tierModels.filter(id => !MODEL_CAPABILITY_PROFILES[id]); + assert.equal( + missing.length, + 0, + `Models in MODEL_CAPABILITY_TIER but missing from MODEL_CAPABILITY_PROFILES:\n ${missing.join("\n ")}\n\nAdd capability profiles for these models in model-router.ts.`, + ); + }); + + test("MODEL_CAPABILITY_PROFILES does not contain models absent from MODEL_CAPABILITY_TIER", () => { + const profileModels = Object.keys(MODEL_CAPABILITY_PROFILES); + const orphaned = profileModels.filter(id => !MODEL_CAPABILITY_TIER[id]); + assert.equal( + orphaned.length, + 0, + `Models in MODEL_CAPABILITY_PROFILES but not in MODEL_CAPABILITY_TIER:\n ${orphaned.join("\n ")}\n\nEither add these to MODEL_CAPABILITY_TIER or remove stale profiles.`, + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/cmux.test.ts b/src/resources/extensions/gsd/tests/cmux.test.ts index 0e6dd8e77..305a3ef0d 100644 --- a/src/resources/extensions/gsd/tests/cmux.test.ts +++ b/src/resources/extensions/gsd/tests/cmux.test.ts @@ -1,7 +1,8 @@ -import test, { describe } from "node:test"; +import test, { describe, beforeEach, afterEach } from "node:test"; import assert from "node:assert/strict"; import * as fs from "node:fs"; import * as path from "node:path"; +import { tmpdir } from "node:os"; import { fileURLToPath } from "node:url"; import { buildCmuxProgress, @@ -12,6 +13,7 @@ import { resolveCmuxConfig, shouldPromptToEnableCmux, } from "../../cmux/index.ts"; +import { autoEnableCmuxPreferences } from "../commands-cmux.ts"; import type { GSDState } from "../types.ts"; test("detectCmuxEnvironment requires workspace, surface, and socket", () => { @@ -79,6 +81,70 @@ test("shouldPromptToEnableCmux only prompts once per session", () => { resetCmuxPromptState(); }); +describe("autoEnableCmuxPreferences", () => { + let tmp: string; + let originalCwd: string; + + beforeEach(() => { + originalCwd = process.cwd(); + tmp = fs.mkdtempSync(path.join(tmpdir(), "cmux-auto-test-")); + fs.mkdirSync(path.join(tmp, ".gsd"), { recursive: true }); + process.chdir(tmp); + }); + + afterEach(() => { + process.chdir(originalCwd); + fs.rmSync(tmp, { recursive: true, force: true }); + }); + + test("writes cmux.enabled true when preferences file exists with no cmux config", () => { + const prefsPath = path.join(tmp, ".gsd", "preferences.md"); + fs.writeFileSync(prefsPath, [ + "---", + "version: 1", + "---", + "", + "# GSD Skill Preferences", + ].join("\n")); + + const result = autoEnableCmuxPreferences(); + assert.equal(result, true); + + const content = fs.readFileSync(prefsPath, "utf-8"); + assert.ok(content.includes("enabled: true"), "should write enabled: true"); + assert.ok(content.includes("notifications: true"), "should default notifications on"); + assert.ok(content.includes("sidebar: true"), "should default sidebar on"); + assert.ok(content.includes("splits: false"), "should default splits off"); + }); + + test("returns false when preferences file does not exist", () => { + const result = autoEnableCmuxPreferences(); + assert.equal(result, false); + }); + + test("preserves existing cmux sub-preferences when auto-enabling", () => { + const prefsPath = path.join(tmp, ".gsd", "preferences.md"); + fs.writeFileSync(prefsPath, [ + "---", + "version: 1", + "cmux:", + " splits: true", + " browser: true", + "---", + "", + "# GSD Skill Preferences", + ].join("\n")); + + const result = autoEnableCmuxPreferences(); + assert.equal(result, true); + + const content = fs.readFileSync(prefsPath, "utf-8"); + assert.ok(content.includes("enabled: true"), "should set enabled: true"); + assert.ok(content.includes("splits: true"), "should preserve existing splits: true"); + assert.ok(content.includes("browser: true"), "should preserve existing browser: true"); + }); +}); + test("buildCmuxStatusLabel and progress prefer deepest active unit", () => { const state: GSDState = { activeMilestone: { id: "M001", title: "Milestone" }, diff --git a/src/resources/extensions/gsd/tests/codebase-generator.test.ts b/src/resources/extensions/gsd/tests/codebase-generator.test.ts index d8d3d74c8..923c19f1d 100644 --- a/src/resources/extensions/gsd/tests/codebase-generator.test.ts +++ b/src/resources/extensions/gsd/tests/codebase-generator.test.ts @@ -162,6 +162,34 @@ test("generateCodebaseMap: excludes .claude/ and other tool directories", () => } }); +test("generateCodebaseMap: excludes .agents/ and other tooling directories", () => { + const base = makeTmpRepo(); + try { + addFile(base, "src/main.ts"); + addFile(base, ".agents/skills/pdf/SKILL.md"); + addFile(base, ".agents/skills/find-skills/SKILL.md"); + addFile(base, ".bg-shell/session.json"); + addFile(base, ".idea/workspace.xml"); + addFile(base, ".cache/data.bin"); + addFile(base, "tmp/scratch.ts"); + addFile(base, "target/debug/build.rs"); + addFile(base, "venv/lib/site.py"); + + const result = generateCodebaseMap(base); + assert.ok(result.content.includes("`src/main.ts`"), "should include src/main.ts"); + assert.ok(!result.content.includes("SKILL.md"), "should exclude .agents/ files"); + assert.ok(!result.content.includes(".bg-shell"), "should exclude .bg-shell/ files"); + assert.ok(!result.content.includes(".idea"), "should exclude .idea/ files"); + assert.ok(!result.content.includes(".cache"), "should exclude .cache/ files"); + assert.ok(!result.content.includes("tmp/"), "should exclude tmp/ files"); + assert.ok(!result.content.includes("target"), "should exclude target/ files"); + assert.ok(!result.content.includes("venv"), "should exclude venv/ files"); + assert.equal(result.fileCount, 1); + } finally { + cleanup(base); + } +}); + test("generateCodebaseMap: excludes binary and lock files", () => { const base = makeTmpRepo(); try { diff --git a/src/resources/extensions/gsd/tests/complete-slice-gate-closure.test.ts b/src/resources/extensions/gsd/tests/complete-slice-gate-closure.test.ts new file mode 100644 index 000000000..6a764ef55 --- /dev/null +++ b/src/resources/extensions/gsd/tests/complete-slice-gate-closure.test.ts @@ -0,0 +1,167 @@ +/** + * complete-slice gate closure integration test. + * + * Pins the fix for the Q8-stall bug: complete-slice must close every gate + * owned by the complete-slice turn based on the content of the matching + * CompleteSliceParams field. Without this, Q8 stays pending forever and + * blocks state derivation on subsequent loops. + */ + +import { describe, test, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import * as os from "node:os"; + +import { + openDatabase, + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + insertGateRow, + getGateResults, +} from "../gsd-db.ts"; +import { handleCompleteSlice } from "../tools/complete-slice.ts"; +import type { CompleteSliceParams } from "../types.ts"; + +function makeValidSliceParams(overrides: Partial = {}): CompleteSliceParams { + return { + sliceId: "S01", + milestoneId: "M001", + sliceTitle: "Test Slice", + oneLiner: "Implemented test slice", + narrative: "Built and tested.", + verification: "All tests pass.", + deviations: "None.", + knownLimitations: "None.", + followUps: "None.", + keyFiles: ["src/foo.ts"], + keyDecisions: [], + patternsEstablished: [], + observabilitySurfaces: [], + provides: [], + requirementsSurfaced: [], + drillDownPaths: [], + affects: [], + requirementsAdvanced: [], + requirementsValidated: [], + requirementsInvalidated: [], + filesModified: [], + requires: [], + uatContent: "## Smoke Test\n\nVerify happy path.", + ...overrides, + }; +} + +describe("complete-slice closes complete-slice-owned gates", () => { + let dbPath: string; + let basePath: string; + + beforeEach(() => { + dbPath = path.join( + fs.mkdtempSync(path.join(os.tmpdir(), "gsd-slice-gate-")), + "test.db", + ); + openDatabase(dbPath); + + basePath = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-slice-gate-handler-")); + const sliceDir = path.join( + basePath, ".gsd", "milestones", "M001", "slices", "S01", "tasks", + ); + fs.mkdirSync(sliceDir, { recursive: true }); + fs.writeFileSync( + path.join(basePath, ".gsd", "milestones", "M001", "M001-ROADMAP.md"), + [ + "# M001: Test Milestone", + "", + "## Slices", + "", + '- [ ] **S01: Test Slice** `risk:medium` `depends:[]`', + " - After this: basic functionality works", + ].join("\n"), + ); + + insertMilestone({ id: "M001" }); + insertSlice({ id: "S01", milestoneId: "M001" }); + insertTask({ + id: "T01", sliceId: "S01", milestoneId: "M001", + status: "complete", title: "Task 1", + }); + + // Seed Q8 as pending — this is what plan-slice does today. + insertGateRow({ + milestoneId: "M001", sliceId: "S01", + gateId: "Q8", scope: "slice", + }); + }); + + afterEach(() => { + closeDatabase(); + fs.rmSync(path.dirname(dbPath), { recursive: true, force: true }); + fs.rmSync(basePath, { recursive: true, force: true }); + }); + + test("Q8 closes as 'pass' when operationalReadiness is populated", async () => { + const params = makeValidSliceParams({ + operationalReadiness: [ + "- Health signal: /health endpoint returns 200", + "- Failure signal: error rate alert in observability dashboard", + "- Recovery: systemd auto-restart", + ].join("\n"), + }); + + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result), `handler failed: ${(result as any).error}`); + + const gates = getGateResults("M001", "S01", "slice"); + const q8 = gates.find((g) => g.gate_id === "Q8"); + assert.ok(q8, "Q8 row must exist after complete-slice"); + assert.equal(q8.status, "complete"); + assert.equal(q8.verdict, "pass"); + assert.ok( + q8.findings.includes("Health signal"), + "Q8 findings must capture the operationalReadiness content", + ); + }); + + test("Q8 closes as 'omitted' when operationalReadiness is empty", async () => { + const params = makeValidSliceParams({ operationalReadiness: "" }); + + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result), `handler failed: ${(result as any).error}`); + + const gates = getGateResults("M001", "S01", "slice"); + const q8 = gates.find((g) => g.gate_id === "Q8"); + assert.ok(q8, "Q8 row must exist after complete-slice"); + assert.equal(q8.status, "complete"); + assert.equal(q8.verdict, "omitted"); + }); + + test("Q8 also closes when operationalReadiness is omitted entirely", async () => { + // A model that doesn't pass operationalReadiness at all must still + // move Q8 out of 'pending' — leaving it pending produces the stall. + const params = makeValidSliceParams(); + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result), `handler failed: ${(result as any).error}`); + + const gates = getGateResults("M001", "S01", "slice"); + const q8 = gates.find((g) => g.gate_id === "Q8"); + assert.ok(q8); + assert.notEqual(q8.status, "pending", "Q8 must never remain pending after complete-slice"); + assert.equal(q8.verdict, "omitted"); + }); + + test("summary markdown contains Operational Readiness section", async () => { + const params = makeValidSliceParams({ + operationalReadiness: "- Health signal: /health\n- Failure signal: alert", + }); + const result = await handleCompleteSlice(params, basePath); + assert.ok(!("error" in result)); + if (!("error" in result)) { + const summary = fs.readFileSync(result.summaryPath, "utf-8"); + assert.match(summary, /^## Operational Readiness/m); + assert.match(summary, /Health signal: \/health/); + } + }); +}); diff --git a/src/resources/extensions/gsd/tests/complete-slice-prompt-task-summary-layout.test.ts b/src/resources/extensions/gsd/tests/complete-slice-prompt-task-summary-layout.test.ts new file mode 100644 index 000000000..c50389a1d --- /dev/null +++ b/src/resources/extensions/gsd/tests/complete-slice-prompt-task-summary-layout.test.ts @@ -0,0 +1,18 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const promptPath = join(process.cwd(), "src/resources/extensions/gsd/prompts/complete-slice.md"); +const prompt = readFileSync(promptPath, "utf-8"); + +test("complete-slice prompt explains the flat task summary layout", () => { + assert.match(prompt, /flat file layout/i); + assert.match(prompt, /T01-SUMMARY\.md/); + assert.match(prompt, /not inside per-task subdirectories like `tasks\/T01\/SUMMARY\.md`/i); +}); + +test("complete-slice prompt forbids the wrong task summary glob", () => { + assert.match(prompt, /find .*tasks -name "\*-SUMMARY\.md"/i); + assert.match(prompt, /Never use `tasks\/\*\/SUMMARY\.md`/); +}); diff --git a/src/resources/extensions/gsd/tests/definition-io.test.ts b/src/resources/extensions/gsd/tests/definition-io.test.ts new file mode 100644 index 000000000..bbf9b793f --- /dev/null +++ b/src/resources/extensions/gsd/tests/definition-io.test.ts @@ -0,0 +1,57 @@ +/** + * definition-io.ts — unit tests for readFrozenDefinition. + */ + +import { describe, test, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync, realpathSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { readFrozenDefinition } from "../definition-io.ts"; + +function createTmpDir(): string { + return realpathSync(mkdtempSync(join(tmpdir(), "gsd-defio-test-"))); +} + +describe("readFrozenDefinition", () => { + let runDir: string; + + beforeEach(() => { + runDir = createTmpDir(); + }); + + afterEach(() => { + rmSync(runDir, { recursive: true, force: true }); + }); + + test("parses a valid DEFINITION.yaml", () => { + const yaml = [ + "version: 1", + "name: test-workflow", + "description: A test workflow", + "steps:", + " - id: step-1", + " prompt: do the thing", + ].join("\n"); + writeFileSync(join(runDir, "DEFINITION.yaml"), yaml, "utf-8"); + + const def = readFrozenDefinition(runDir); + assert.equal(def.version, 1); + assert.equal(def.name, "test-workflow"); + assert.equal(def.description, "A test workflow"); + assert.equal(def.steps.length, 1); + assert.equal(def.steps[0].id, "step-1"); + }); + + test("throws when DEFINITION.yaml is missing", () => { + assert.throws(() => readFrozenDefinition(runDir), { + code: "ENOENT", + }); + }); + + test("throws on malformed YAML", () => { + writeFileSync(join(runDir, "DEFINITION.yaml"), ": : : not valid yaml [", "utf-8"); + assert.throws(() => readFrozenDefinition(runDir)); + }); +}); diff --git a/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts b/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts new file mode 100644 index 000000000..035e5efb2 --- /dev/null +++ b/src/resources/extensions/gsd/tests/derive-state-helpers.test.ts @@ -0,0 +1,436 @@ +// GSD Extension — Tests for extracted deriveStateFromDb helper functions +// Copyright (c) 2026 Jeremy McSpadden +// +// Tests the composable helpers extracted from deriveStateFromDb: +// reconcileDiskToDb, buildCompletenessSet, buildRegistryAndFindActive, +// handleNoActiveMilestone, resolveSliceDependencies, reconcileSliceTasks, +// detectBlockers, checkReplanTrigger, checkInterruptedWork +// +// Helpers are private — exercised through deriveStateFromDb integration. + +import { describe, test, beforeEach, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +import { invalidateStateCache, deriveStateFromDb } from '../state.ts'; +import { + openDatabase, + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + updateTaskStatus, +} from '../gsd-db.ts'; + +// ─── Fixture Helpers ─────────────────────────────────────────────────────── + +function createFixtureBase(): string { + const base = mkdtempSync(join(tmpdir(), 'gsd-helpers-')); + mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true }); + return base; +} + +function writeFile(base: string, relativePath: string, content: string): void { + const full = join(base, '.gsd', relativePath); + mkdirSync(join(full, '..'), { recursive: true }); + writeFileSync(full, content); +} + +function cleanup(base: string): void { + rmSync(base, { recursive: true, force: true }); +} + +const ROADMAP_CONTENT = `# M001: Test Milestone + +**Vision:** Test helpers. + +## Slices + +- [ ] **S01: First Slice** \`risk:low\` \`depends:[]\` + > After this: Slice done. + +- [ ] **S02: Second Slice** \`risk:low\` \`depends:[S01]\` + > After this: All done. +`; + +const PLAN_CONTENT = `# S01: First Slice + +**Goal:** Test executing. +**Demo:** Tests pass. + +## Tasks + +- [ ] **T01: First Task** \`est:10m\` + First task description. + +- [x] **T02: Done Task** \`est:10m\` + Already done. +`; + +// ═══════════════════════════════════════════════════════════════════════════ +// Tests +// ═══════════════════════════════════════════════════════════════════════════ + +describe('derive-state-helpers', () => { + + // ─── handleNoActiveMilestone: all parked ───────────────────────────── + test('handleNoActiveMilestone: all milestones parked returns pre-planning with unpark hint', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-CONTEXT.md', '# M001\n\nContext.'); + writeFile(base, 'milestones/M001/M001-PARKED.md', 'Parked.'); + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nContext.'); + writeFile(base, 'milestones/M002/M002-PARKED.md', 'Also parked.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'parked' }); + insertMilestone({ id: 'M002', title: 'Second', status: 'parked' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'pre-planning', 'all-parked: phase is pre-planning'); + assert.equal(state.activeMilestone, null, 'all-parked: no active milestone'); + assert.ok(state.nextAction.includes('parked'), 'all-parked: nextAction mentions parked'); + assert.ok(state.nextAction.includes('unpark'), 'all-parked: nextAction hints unpark'); + assert.equal(state.registry.length, 2, 'all-parked: both in registry'); + assert.ok(state.registry.every(e => e.status === 'parked'), 'all-parked: all registry entries parked'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── handleNoActiveMilestone: all complete with active requirements ── + test('handleNoActiveMilestone: all complete with unmapped requirements', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.'); + writeFile(base, 'REQUIREMENTS.md', `# Requirements\n\n## Active\n\n### R001 — Unmapped\n- Status: active\n- Description: Not mapped.\n`); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'complete', 'complete-reqs: phase is complete'); + assert.ok(state.nextAction.includes('1 active requirement'), 'complete-reqs: nextAction notes unmapped reqs'); + assert.equal(state.requirements?.active, 1, 'complete-reqs: requirements.active = 1'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── resolveSliceDependencies: GSD_SLICE_LOCK with missing slice ──── + test('resolveSliceDependencies: GSD_SLICE_LOCK pointing to non-existent slice returns blocked', async () => { + const base = createFixtureBase(); + const origLock = process.env.GSD_SLICE_LOCK; + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + + process.env.GSD_SLICE_LOCK = 'S99'; + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'blocked', 'slice-lock-miss: phase is blocked'); + assert.ok(state.blockers.some(b => b.includes('GSD_SLICE_LOCK=S99')), 'slice-lock-miss: blocker mentions lock'); + } finally { + if (origLock !== undefined) process.env.GSD_SLICE_LOCK = origLock; + else delete process.env.GSD_SLICE_LOCK; + closeDatabase(); + cleanup(base); + } + }); + + // ─── resolveSliceDependencies: GSD_SLICE_LOCK with valid slice ────── + test('resolveSliceDependencies: GSD_SLICE_LOCK targeting valid slice bypasses deps', async () => { + const base = createFixtureBase(); + const origLock = process.env.GSD_SLICE_LOCK; + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + // S02 depends on S01 but we lock to S02 directly + writeFile(base, 'milestones/M001/slices/S02/S02-PLAN.md', `# S02\n\n**Goal:** Test.\n**Demo:** Pass.\n\n## Tasks\n\n- [ ] **T01: Task** \`est:5m\`\n Do thing.\n`); + writeFile(base, 'milestones/M001/slices/S02/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S02/tasks/T01-PLAN.md', '# T01 Plan'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'pending', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S02', milestoneId: 'M001', title: 'Task', status: 'pending' }); + + process.env.GSD_SLICE_LOCK = 'S02'; + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.activeSlice?.id, 'S02', 'slice-lock-valid: activeSlice is S02 (locked)'); + assert.equal(state.phase, 'executing', 'slice-lock-valid: phase is executing'); + } finally { + if (origLock !== undefined) process.env.GSD_SLICE_LOCK = origLock; + else delete process.env.GSD_SLICE_LOCK; + closeDatabase(); + cleanup(base); + } + }); + + // ─── reconcileSliceTasks: plan file imports tasks when DB empty ────── + test('reconcileSliceTasks: imports tasks from plan file when DB has zero tasks (#3600)', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + // No tasks inserted — reconcileSliceTasks should import from plan file + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // Plan has T01 (pending) and T02 (done) — reconciliation imports both + assert.equal(state.phase, 'executing', 'task-reconcile: phase is executing (tasks imported)'); + assert.equal(state.activeTask?.id, 'T01', 'task-reconcile: activeTask is T01'); + assert.equal(state.progress?.tasks?.total, 2, 'task-reconcile: total tasks = 2'); + assert.equal(state.progress?.tasks?.done, 1, 'task-reconcile: done tasks = 1 (T02 was [x])'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── reconcileSliceTasks: stale task reconciled from disk summary ──── + test('reconcileSliceTasks: stale pending task reconciled to complete when disk SUMMARY exists (#2514)', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + // T01 has a summary on disk but DB still says pending + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-SUMMARY.md', '# T01 Summary\n\nDone on disk.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // T01 should have been reconciled to complete (SUMMARY exists on disk) + // Both tasks complete → phase should be summarizing + assert.equal(state.phase, 'summarizing', 'stale-task: phase is summarizing (T01 reconciled)'); + assert.equal(state.activeTask, null, 'stale-task: no active task (all done)'); + assert.equal(state.progress?.tasks?.done, 2, 'stale-task: tasks.done = 2'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── detectBlockers: blocker_discovered triggers replanning ────────── + test('detectBlockers: task with blocker_discovered triggers replanning-slice', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + // T02 completed with blocker discovered — written in summary frontmatter + writeFile(base, 'milestones/M001/slices/S01/tasks/T02-SUMMARY.md', + '---\nblocker_discovered: true\n---\n\n# T02 Summary\n\nFound a blocker.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'replanning-slice', 'blocker: phase is replanning-slice'); + assert.ok(state.blockers.some(b => b.includes('T02')), 'blocker: blockers mention T02'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── checkInterruptedWork: continue.md triggers resume hint ───────── + test('checkInterruptedWork: continue.md present triggers resume nextAction', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/S01-PLAN.md', PLAN_CONTENT); + writeFile(base, 'milestones/M001/slices/S01/tasks/.gitkeep', ''); + writeFile(base, 'milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01 Plan'); + writeFile(base, 'milestones/M001/slices/S01/S01-CONTINUE.md', 'Resume from here.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'First', status: 'active', risk: 'low', depends: [] }); + insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Second', status: 'pending', risk: 'low', depends: ['S01'] }); + insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'First Task', status: 'pending' }); + insertTask({ id: 'T02', sliceId: 'S01', milestoneId: 'M001', title: 'Done Task', status: 'complete' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'executing', 'continue: phase is still executing'); + assert.ok(state.nextAction.includes('Resume interrupted work'), 'continue: nextAction mentions resume'); + assert.ok(state.nextAction.includes('continue.md'), 'continue: nextAction mentions continue.md'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── buildCompletenessSet: SUMMARY-on-disk marks complete ─────────── + test('buildCompletenessSet: milestone with SUMMARY on disk treated as complete', async () => { + const base = createFixtureBase(); + try { + // M001 has summary on disk but DB status is still 'active' + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.'); + // M002 is the real active milestone + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nActive.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'First', status: 'active' }); + insertMilestone({ id: 'M002', title: 'Second', status: 'active' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // M001 should be complete (summary on disk), M002 should be active + const m1 = state.registry.find(e => e.id === 'M001'); + assert.equal(m1?.status, 'complete', 'summary-disk: M001 marked complete via disk SUMMARY'); + assert.equal(state.activeMilestone?.id, 'M002', 'summary-disk: M002 is active'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── reconcileDiskToDb: disk slices synced into DB (#2533) ────────── + test('reconcileDiskToDb: slices in ROADMAP.md but missing from DB are auto-inserted (#2533)', async () => { + const base = createFixtureBase(); + try { + writeFile(base, 'milestones/M001/M001-ROADMAP.md', ROADMAP_CONTENT); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Test', status: 'active' }); + // No slices inserted — reconcileDiskToDb should insert from roadmap + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // Slices should have been reconciled from roadmap, S01 should be the active slice + assert.equal(state.activeMilestone?.id, 'M001', 'slice-reconcile: M001 is active'); + assert.equal(state.activeSlice?.id, 'S01', 'slice-reconcile: S01 reconciled and active'); + assert.ok((state.progress?.slices?.total ?? 0) >= 2, 'slice-reconcile: at least 2 slices reconciled'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── Queue order: milestones sorted by custom queue order ─────────── + test('deriveStateFromDb respects custom queue order from QUEUE-ORDER.json', async () => { + const base = createFixtureBase(); + try { + // M003 should come first per queue order, M001 second + const queueOrder = JSON.stringify({ order: ['M003', 'M001', 'M002'], updatedAt: new Date().toISOString() }); + writeFileSync(join(base, '.gsd', 'QUEUE-ORDER.json'), queueOrder); + writeFile(base, 'milestones/M001/M001-CONTEXT.md', '# M001\n\nContext.'); + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002\n\nContext.'); + writeFile(base, 'milestones/M003/M003-CONTEXT.md', '# M003\n\nContext.'); + + openDatabase(':memory:'); + // Insert in natural order — queue ordering should override + insertMilestone({ id: 'M001', title: 'First', status: 'active' }); + insertMilestone({ id: 'M002', title: 'Second', status: 'active' }); + insertMilestone({ id: 'M003', title: 'Third', status: 'active' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // M003 should be the active milestone (first in queue) + assert.equal(state.activeMilestone?.id, 'M003', 'queue-order: M003 is active (first in queue)'); + assert.equal(state.registry[0]?.id, 'M003', 'queue-order: registry[0] is M003'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── handleAllSlicesDone: needs-remediation re-triggers validation ── + test('handleAllSlicesDone: needs-remediation verdict triggers validating-milestone', async () => { + const base = createFixtureBase(); + try { + const doneRoadmap = `# M001: Remediation Test\n\n**Vision:** Test.\n\n## Slices\n\n- [x] **S01: Done** \`risk:low\` \`depends:[]\`\n > Done.\n`; + writeFile(base, 'milestones/M001/M001-ROADMAP.md', doneRoadmap); + writeFile(base, 'milestones/M001/M001-VALIDATION.md', + '---\nverdict: needs-remediation\nremediation_round: 1\n---\n\n# Validation\nNeeds remediation.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Remediation Test', status: 'active' }); + insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Done', status: 'complete', risk: 'low', depends: [] }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + assert.equal(state.phase, 'validating-milestone', 'remediation: phase is validating-milestone'); + assert.equal(state.activeMilestone?.id, 'M001', 'remediation: activeMilestone is M001'); + } finally { + closeDatabase(); + cleanup(base); + } + }); + + // ─── Deferred queued shell: shell milestone deferred, real one promoted ── + test('buildRegistryAndFindActive: queued shell deferred, later real milestone becomes active (#3470)', async () => { + const base = createFixtureBase(); + try { + // M001: queued shell — no content, no slices + mkdirSync(join(base, '.gsd', 'milestones', 'M001'), { recursive: true }); + // M002: real milestone with context + writeFile(base, 'milestones/M002/M002-CONTEXT.md', '# M002: Real\n\nActive milestone.'); + + openDatabase(':memory:'); + insertMilestone({ id: 'M001', title: 'Shell', status: 'queued' }); + insertMilestone({ id: 'M002', title: 'Real', status: 'active' }); + + invalidateStateCache(); + const state = await deriveStateFromDb(base); + + // M002 should be active (M001 queued shell deferred) + assert.equal(state.activeMilestone?.id, 'M002', 'deferred-shell: M002 is active (shell deferred)'); + } finally { + closeDatabase(); + cleanup(base); + } + }); +}); diff --git a/src/resources/extensions/gsd/tests/discord-invite-links.test.ts b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts index 8b82d4749..dffe0af61 100644 --- a/src/resources/extensions/gsd/tests/discord-invite-links.test.ts +++ b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts @@ -18,7 +18,7 @@ const VALID_INVITE = "https://discord.com/invite/nKXTsAcmbT"; /** Files that contain user-facing Discord invite links. */ const FILES_WITH_INVITE_LINKS: string[] = [ "README.md", - "docs/what-is-pi/15-pi-packages-the-ecosystem.md", + "docs/dev/what-is-pi/15-pi-packages-the-ecosystem.md", ]; describe("Discord invite links (#2699)", () => { diff --git a/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts b/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts index aa3f0d42f..80b01a20b 100644 --- a/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts +++ b/src/resources/extensions/gsd/tests/discuss-incremental-persistence.test.ts @@ -27,10 +27,19 @@ describe("discuss incremental persistence (#2152)", () => { assert.match(content, /Incremental persistence/, "should have incremental persistence section"); }); + test("new-project discuss prompt includes CONTEXT-DRAFT save instruction", () => { + const content = readFileSync(join(promptsDir, "discuss.md"), "utf-8"); + assert.match(content, /CONTEXT-DRAFT/, "should mention CONTEXT-DRAFT"); + assert.match(content, /Incremental persistence/, "should have incremental persistence section"); + assert.match(content, /gsd_summary_save/, "should use gsd_summary_save tool"); + }); + test("drafts are saved silently without user notification", () => { const milestone = readFileSync(join(promptsDir, "guided-discuss-milestone.md"), "utf-8"); const slice = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8"); + const discuss = readFileSync(join(promptsDir, "discuss.md"), "utf-8"); assert.match(milestone, /Do NOT mention this save to the user/); assert.match(slice, /Do NOT mention this to the user/); + assert.match(discuss, /Do NOT mention this save to the user/); }); }); diff --git a/src/resources/extensions/gsd/tests/dispatch-guard.test.ts b/src/resources/extensions/gsd/tests/dispatch-guard.test.ts index 1989a0195..c67de2a47 100644 --- a/src/resources/extensions/gsd/tests/dispatch-guard.test.ts +++ b/src/resources/extensions/gsd/tests/dispatch-guard.test.ts @@ -145,6 +145,59 @@ test("dispatch guard falls back to positional ordering when no dependencies decl ); }); +test("dispatch guard ignores positionally-earlier reverse dependents for zero-dependency slices (#3720)", (t) => { + const repo = setupRepo(); + t.after(() => teardownRepo(repo)); + + mkdirSync(join(repo, ".gsd", "milestones", "M015"), { recursive: true }); + + insertMilestone({ id: "M015", title: "Reverse dependency fallback" }); + insertSlice({ id: "S03", milestoneId: "M015", title: "Complete prerequisite", status: "complete", depends: [], sequence: 0 }); + insertSlice({ id: "S04", milestoneId: "M015", title: "Depends on S04A", status: "pending", depends: ["S03", "S04A"], sequence: 0 }); + insertSlice({ id: "S04A", milestoneId: "M015", title: "No explicit deps", status: "pending", depends: [], sequence: 0 }); + + writeFileSync(join(repo, ".gsd", "milestones", "M015", "M015-ROADMAP.md"), "# M015\n"); + + // S04A has no declared dependencies and should not be blocked by S04, because + // S04 itself depends on S04A. With sequence=0, DB ordering falls back to id. + assert.equal( + getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M015/S04A/T02"), + null, + ); + + // The reverse direction is still blocked normally. + assert.equal( + getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M015/S04/T01"), + "Cannot dispatch execute-task M015/S04/T01: dependency slice M015/S04A is not complete.", + ); +}); + +test("dispatch guard treats zero-dependency slices as independent when a milestone uses explicit deps (#3998)", (t) => { + const repo = setupRepo(); + t.after(() => teardownRepo(repo)); + + mkdirSync(join(repo, ".gsd", "milestones", "M022"), { recursive: true }); + + insertMilestone({ id: "M022", title: "Mixed dependency milestone" }); + insertSlice({ id: "S02", milestoneId: "M022", title: "Core A", status: "complete", depends: [], sequence: 2 }); + insertSlice({ id: "S03", milestoneId: "M022", title: "Core B", status: "complete", depends: [], sequence: 3 }); + insertSlice({ id: "S05", milestoneId: "M022", title: "Blocked integration", status: "pending", depends: ["S02", "S03", "S07"], sequence: 5 }); + insertSlice({ id: "S06", milestoneId: "M022", title: "Independent zero-dep slice", status: "pending", depends: [], sequence: 6 }); + insertSlice({ id: "S07", milestoneId: "M022", title: "Late prerequisite", status: "pending", depends: ["S02"], sequence: 7 }); + + writeFileSync(join(repo, ".gsd", "milestones", "M022", "M022-ROADMAP.md"), "# M022\n"); + + assert.equal( + getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M022/S06/T02"), + null, + ); + + assert.equal( + getPriorSliceCompletionBlocker(repo, "main", "execute-task", "M022/S05/T01"), + "Cannot dispatch execute-task M022/S05/T01: dependency slice M022/S07 is not complete.", + ); +}); + test("dispatch guard allows slice with all declared dependencies complete", (t) => { const repo = setupRepo(); t.after(() => teardownRepo(repo)); diff --git a/src/resources/extensions/gsd/tests/doctor-heal-fixable-warnings.test.ts b/src/resources/extensions/gsd/tests/doctor-heal-fixable-warnings.test.ts new file mode 100644 index 000000000..718e06f7b --- /dev/null +++ b/src/resources/extensions/gsd/tests/doctor-heal-fixable-warnings.test.ts @@ -0,0 +1,14 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { isDoctorHealActionable } from "../commands-handlers.js"; + +test("doctor heal actionable filter keeps fixable warnings and errors", () => { + assert.equal(isDoctorHealActionable({ fixable: true, severity: "warning" }), true); + assert.equal(isDoctorHealActionable({ fixable: true, severity: "error" }), true); +}); + +test("doctor heal actionable filter excludes info and non-fixable issues", () => { + assert.equal(isDoctorHealActionable({ fixable: true, severity: "info" }), false); + assert.equal(isDoctorHealActionable({ fixable: false, severity: "warning" }), false); + assert.equal(isDoctorHealActionable({ fixable: false, severity: "error" }), false); +}); diff --git a/src/resources/extensions/gsd/tests/doctor-providers.test.ts b/src/resources/extensions/gsd/tests/doctor-providers.test.ts index 8df31fc10..3fee92d75 100644 --- a/src/resources/extensions/gsd/tests/doctor-providers.test.ts +++ b/src/resources/extensions/gsd/tests/doctor-providers.test.ts @@ -574,6 +574,42 @@ test("runProviderChecks reports ok for OpenAI via openai-codex auth.json (#2922) rmSync(tmpHome, { recursive: true, force: true }); }); +test("runProviderChecks reports ok for claude-code without any API key", () => { + const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-cc-repo-"))); + mkdirSync(join(repo, ".gsd"), { recursive: true }); + writeFileSync( + join(repo, ".gsd", "PREFERENCES.md"), + [ + "---", + "models:", + " execution:", + " model: claude-sonnet-4-6", + " provider: claude-code", + "---", + "", + ].join("\n"), + ); + + const tmpHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-cc-home-"))); + + withEnv({ + HOME: tmpHome, + ANTHROPIC_API_KEY: undefined, + ANTHROPIC_OAUTH_TOKEN: undefined, + }, () => { + withCwd(repo, () => { + const results = runProviderChecks(); + const cc = results.find(r => r.name === "claude-code"); + assert.ok(cc, "claude-code result should exist"); + assert.equal(cc!.status, "ok", "claude-code uses CLI auth — must be ok without API keys"); + assert.ok(cc!.message.includes("CLI auth"), "should indicate CLI auth"); + }); + }); + + rmSync(repo, { recursive: true, force: true }); + rmSync(tmpHome, { recursive: true, force: true }); +}); + test("PROVIDER_ROUTES includes google-gemini-cli as route for google (#2922)", async () => { const { readFileSync: readFS } = await import("node:fs"); const { dirname: dirn, join: joinPath } = await import("node:path"); diff --git a/src/resources/extensions/gsd/tests/doctor-scope-db-unavailable.test.ts b/src/resources/extensions/gsd/tests/doctor-scope-db-unavailable.test.ts new file mode 100644 index 000000000..caeb403b5 --- /dev/null +++ b/src/resources/extensions/gsd/tests/doctor-scope-db-unavailable.test.ts @@ -0,0 +1,43 @@ +import { afterEach, test } from "node:test"; +import assert from "node:assert/strict"; +import { closeDatabase } from "../gsd-db.ts"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { filterDoctorIssues } from "../doctor-format.ts"; +import { checkEngineHealth } from "../doctor-engine-checks.ts"; + +afterEach(() => { + closeDatabase(); +}); + +test("filterDoctorIssues keeps project and environment issues in scoped reports", () => { + const issues = [ + { severity: "error", code: "env_dependencies", scope: "project", unitId: "environment", message: "node_modules missing", fixable: false }, + { severity: "warning", code: "db_unavailable", scope: "project", unitId: "project", message: "DB unavailable", fixable: false }, + { severity: "warning", code: "state_file_missing", scope: "slice", unitId: "M016/S01", message: "slice warning", fixable: false }, + ] as const; + + const filtered = filterDoctorIssues([...issues], { scope: "M016", includeWarnings: true }); + assert.deepEqual( + filtered.map((issue) => issue.unitId), + ["environment", "project", "M016/S01"], + ); +}); + +test("checkEngineHealth reports db_unavailable when gsd.db exists but the DB is closed", async (t) => { + const base = mkdtempSync(join(tmpdir(), "gsd-doctor-db-unavailable-")); + t.after(() => rmSync(base, { recursive: true, force: true })); + + const gsdDir = join(base, ".gsd"); + mkdirSync(gsdDir, { recursive: true }); + writeFileSync(join(gsdDir, "gsd.db"), ""); + + const issues: any[] = []; + await checkEngineHealth(base, issues, []); + + const dbIssue = issues.find((issue) => issue.code === "db_unavailable"); + assert.ok(dbIssue, "doctor should surface degraded DB mode when a DB file exists"); + assert.equal(dbIssue.unitId, "project"); + assert.equal(dbIssue.file, ".gsd/gsd.db"); +}); diff --git a/src/resources/extensions/gsd/tests/execute-task-prompt-existing-artifact-guard.test.ts b/src/resources/extensions/gsd/tests/execute-task-prompt-existing-artifact-guard.test.ts new file mode 100644 index 000000000..f3acf4871 --- /dev/null +++ b/src/resources/extensions/gsd/tests/execute-task-prompt-existing-artifact-guard.test.ts @@ -0,0 +1,33 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const promptsDir = join(__dirname, "..", "prompts"); + +test("execute-task prompt requires reading existing artifacts before write", () => { + const prompt = readFileSync(join(promptsDir, "execute-task.md"), "utf-8"); + + assert.match( + prompt, + /Before any `Write` that creates an artifact or output file, check whether that path already exists\./, + "execute-task prompt should require an existence check before creating artifacts", + ); + assert.match( + prompt, + /If it does, read it first and decide whether the work is already done, should be extended, or truly needs replacement\./, + "execute-task prompt should require reading existing artifacts before replacement", + ); +}); + +test("guided resume prompt checks for pre-existing artifacts", () => { + const prompt = readFileSync(join(promptsDir, "guided-resume-task.md"), "utf-8"); + + assert.match( + prompt, + /Before you create any expected artifact or output file, check whether it already exists and read it first/i, + "guided resume prompt should guard pre-existing artifacts", + ); +}); diff --git a/src/resources/extensions/gsd/tests/false-degraded-mode-warning.test.ts b/src/resources/extensions/gsd/tests/false-degraded-mode-warning.test.ts new file mode 100644 index 000000000..510a88e9a --- /dev/null +++ b/src/resources/extensions/gsd/tests/false-degraded-mode-warning.test.ts @@ -0,0 +1,104 @@ +/** + * false-degraded-mode-warning.test.ts — Regression tests for #3922. + * + * Before this fix, deriveState() logged a "DB unavailable — degraded mode" + * warning even when the DB simply hadn't been opened yet (e.g. during + * before_agent_start context injection). The fix introduces wasDbOpenAttempted() + * to distinguish "not yet initialized" from "genuinely unavailable." + * + * Two aspects: + * 1. gsd-db: wasDbOpenAttempted() tracks whether openDatabase() was ever called. + * 2. state: the degraded-mode warning is gated behind wasDbOpenAttempted(). + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { + openDatabase, + closeDatabase, + isDbAvailable, + wasDbOpenAttempted, +} from "../gsd-db.ts"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const stateSource = readFileSync(join(__dirname, "..", "state.ts"), "utf-8"); + +// ═══════════════════════════════════════════════════════════════════════════ +// 1. gsd-db: wasDbOpenAttempted flag +// ═══════════════════════════════════════════════════════════════════════════ + +describe("wasDbOpenAttempted (#3922)", () => { + + test("wasDbOpenAttempted returns true after openDatabase is called", () => { + // By this point in the test suite, openDatabase may or may not have been + // called by other tests. So we call it explicitly and verify it returns true. + openDatabase(":memory:"); + assert.strictEqual(wasDbOpenAttempted(), true, + "wasDbOpenAttempted should be true after openDatabase call"); + closeDatabase(); + }); + + test("openDatabase sets the flag even if it fails on invalid path", () => { + // openDatabase with an unreachable path may fail, but the flag should + // still be set because the attempt was made. + try { openDatabase("/nonexistent/path/that/will/fail.db"); } catch { /* expected */ } + assert.strictEqual(wasDbOpenAttempted(), true, + "wasDbOpenAttempted should be true even after a failed open attempt"); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// 2. state.ts: degraded-mode warning is gated behind wasDbOpenAttempted +// ═══════════════════════════════════════════════════════════════════════════ + +describe("degraded-mode warning guard (#3922)", () => { + + test("state.ts imports wasDbOpenAttempted from gsd-db", () => { + assert.ok( + stateSource.includes("wasDbOpenAttempted"), + "state.ts must import wasDbOpenAttempted to gate the degraded-mode warning", + ); + }); + + test("degraded-mode warning is inside a wasDbOpenAttempted() guard", () => { + // Find the degraded-mode warning string + const warningStr = 'DB unavailable — using filesystem state derivation (degraded mode)'; + const warningIdx = stateSource.indexOf(warningStr); + assert.ok(warningIdx > 0, "degraded-mode warning string must exist in state.ts"); + + // The wasDbOpenAttempted() check must appear BEFORE the warning, + // within the same else-branch (i.e. within a reasonable distance). + // Look backwards from the warning for the guard. + const searchWindow = stateSource.slice(Math.max(0, warningIdx - 300), warningIdx); + assert.ok( + searchWindow.includes("wasDbOpenAttempted()"), + "wasDbOpenAttempted() guard must appear shortly before the degraded-mode warning " + + "to prevent false warnings when DB has not been initialized yet", + ); + }); + + test("warning is NOT emitted unconditionally in the else branch", () => { + // The old code had `logWarning(...)` directly in the else branch. + // The fix wraps it in `if (wasDbOpenAttempted())`. + // Verify the logWarning call is inside a conditional, not bare. + const lines = stateSource.split("\n"); + for (let i = 0; i < lines.length; i++) { + if (lines[i]!.includes("DB unavailable") && lines[i]!.includes("degraded mode")) { + // This line has the warning. Check that the preceding non-empty line + // contains an if-condition (wasDbOpenAttempted), not a bare else. + let prev = i - 1; + while (prev >= 0 && lines[prev]!.trim() === "") prev--; + const prevLine = lines[prev]!.trim(); + assert.ok( + prevLine.includes("wasDbOpenAttempted"), + `Line ${i + 1} emits degraded-mode warning — preceding line ${prev + 1} must ` + + `contain wasDbOpenAttempted guard, but found: "${prevLine}"`, + ); + break; + } + } + }); +}); diff --git a/src/resources/extensions/gsd/tests/file-lock.test.ts b/src/resources/extensions/gsd/tests/file-lock.test.ts new file mode 100644 index 000000000..b45b091d8 --- /dev/null +++ b/src/resources/extensions/gsd/tests/file-lock.test.ts @@ -0,0 +1,103 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { createRequire } from "node:module"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { withFileLock, withFileLockSync } from "../file-lock.ts"; + +const require = createRequire(import.meta.url); + +function hasProperLockfile(): boolean { + try { + require("proper-lockfile"); + return true; + } catch { + return false; + } +} + +test("withFileLockSync: executes callback when file does not exist", () => { + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + try { + const missingPath = join(dir, "missing.txt"); + let called = 0; + const result = withFileLockSync(missingPath, () => { + called++; + return "ok"; + }); + + assert.equal(result, "ok"); + assert.equal(called, 1, "callback should execute exactly once"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("withFileLock: executes callback when file does not exist", async () => { + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + try { + const missingPath = join(dir, "missing.txt"); + let called = 0; + const result = await withFileLock(missingPath, async () => { + called++; + return "ok"; + }); + + assert.equal(result, "ok"); + assert.equal(called, 1, "callback should execute exactly once"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("withFileLockSync: falls back to unlocked callback on ELOCKED", () => { + if (!hasProperLockfile() || process.platform === "win32") { + return; + } + + const lockfile = require("proper-lockfile"); + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + const filePath = join(dir, "locked.jsonl"); + writeFileSync(filePath, "{}\n", "utf-8"); + + const release = lockfile.lockSync(filePath, { retries: 0, stale: 10000 }); + try { + let called = 0; + const result = withFileLockSync(filePath, () => { + called++; + return "fallback-ok"; + }); + assert.equal(result, "fallback-ok"); + assert.equal(called, 1, "callback should run even when lock acquisition fails"); + } finally { + release(); + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("withFileLock: falls back to unlocked callback on ELOCKED", async () => { + if (!hasProperLockfile() || process.platform === "win32") { + return; + } + + const lockfile = require("proper-lockfile"); + const dir = mkdtempSync(join(tmpdir(), "gsd-file-lock-test-")); + const filePath = join(dir, "locked.jsonl"); + writeFileSync(filePath, "{}\n", "utf-8"); + + const release = await lockfile.lock(filePath, { retries: 0, stale: 10000 }); + try { + let called = 0; + const result = await withFileLock(filePath, async () => { + called++; + return "fallback-ok"; + }); + assert.equal(result, "fallback-ok"); + assert.equal(called, 1, "callback should run even when lock acquisition fails"); + } finally { + await release(); + rmSync(dir, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts b/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts index 555570bab..8b03e0cf7 100644 --- a/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts +++ b/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts @@ -101,3 +101,65 @@ test("#1943 detectStuckLoops ignores watchdog duplicates but flags real re-dispa assert.equal(anomalies.length, 1, `expected 1 anomaly (for the 3x dispatched task), got ${anomalies.length}`); assert.ok(anomalies[0].summary.includes("3 times")); }); + +test("#3760 detectStuckLoops ignores cross-session recovery re-dispatches", () => { + const anomalies: ForensicAnomaly[] = []; + + const units: UnitMetrics[] = [ + makeUnit({ + type: "plan-slice", + id: "M001/S02", + startedAt: 1000, + finishedAt: 2000, + autoSessionKey: "session-a", + }), + makeUnit({ + type: "plan-slice", + id: "M001/S02", + startedAt: 5000, + finishedAt: 6000, + autoSessionKey: "session-b", + }), + ]; + + detectStuckLoops(units, anomalies); + + assert.equal(anomalies.length, 0, "cross-session recovery should not be flagged as a stuck loop"); +}); + +test("#3760 detectStuckLoops still flags repeated dispatches within one auto session", () => { + const anomalies: ForensicAnomaly[] = []; + + const units: UnitMetrics[] = [ + makeUnit({ + type: "complete-slice", + id: "M011/S02", + startedAt: 1000, + finishedAt: 2000, + autoSessionKey: "session-a", + }), + makeUnit({ + type: "complete-slice", + id: "M011/S02", + startedAt: 5000, + finishedAt: 6000, + autoSessionKey: "session-a", + }), + makeUnit({ + type: "complete-slice", + id: "M011/S02", + startedAt: 9000, + finishedAt: 10000, + autoSessionKey: "session-b", + }), + ]; + + detectStuckLoops(units, anomalies); + + assert.equal(anomalies.length, 1, "within-session retries should still be flagged"); + assert.ok(anomalies[0].summary.includes("2 times"), `summary should reflect the worst same-session loop: ${anomalies[0].summary}`); + assert.ok( + anomalies[0].details.includes("Cross-session recovery runs are ignored"), + `details should explain the session-aware rule: ${anomalies[0].details}`, + ); +}); diff --git a/src/resources/extensions/gsd/tests/format-shortcut.test.ts b/src/resources/extensions/gsd/tests/format-shortcut.test.ts index b6c90e4b1..01471fad2 100644 --- a/src/resources/extensions/gsd/tests/format-shortcut.test.ts +++ b/src/resources/extensions/gsd/tests/format-shortcut.test.ts @@ -4,6 +4,7 @@ import test from 'node:test'; import assert from 'node:assert/strict'; import { formatShortcut } from '../files.ts'; +import { formattedShortcutPair, primaryShortcutCombo, fallbackShortcutCombo } from '../shortcut-defs.ts'; // ─── formatShortcut renders per-platform shortcuts ────────────────────── @@ -67,3 +68,33 @@ test('formatShortcut: passes through plain key names', () => { assert.strictEqual(formatShortcut('Escape'), 'Escape'); assert.strictEqual(formatShortcut('Enter'), 'Enter'); }); + +test("shortcut-defs: exposes canonical dashboard combos", () => { + assert.equal(primaryShortcutCombo("dashboard"), "Ctrl+Alt+G"); + assert.equal(fallbackShortcutCombo("dashboard"), "Ctrl+Shift+G"); +}); + +test("shortcut-defs: formats shortcut pair using platform symbols", () => { + const pair = formattedShortcutPair("notifications"); + if (process.platform === "darwin") { + assert.equal(pair, "⌃⌥N / ⌃⇧N"); + } else { + assert.equal(pair, "Ctrl+Alt+N / Ctrl+Shift+N"); + } +}); + +test("shortcut-defs: parallel shortcut omits fallback (hasFallback: false)", () => { + const pair = formattedShortcutPair("parallel"); + if (process.platform === "darwin") { + assert.equal(pair, "⌃⌥P", "parallel should only show primary combo"); + } else { + assert.equal(pair, "Ctrl+Alt+P", "parallel should only show primary combo"); + } + // Verify it does NOT contain the fallback separator + assert.ok(!pair.includes("/"), "parallel pair should not contain fallback separator"); +}); + +test("shortcut-defs: dashboard shortcut includes fallback (hasFallback: true)", () => { + const pair = formattedShortcutPair("dashboard"); + assert.ok(pair.includes("/"), "dashboard pair should contain fallback separator"); +}); diff --git a/src/resources/extensions/gsd/tests/gate-dispatch.test.ts b/src/resources/extensions/gsd/tests/gate-dispatch.test.ts index 3b18a2fbf..36fdbe2c9 100644 --- a/src/resources/extensions/gsd/tests/gate-dispatch.test.ts +++ b/src/resources/extensions/gsd/tests/gate-dispatch.test.ts @@ -186,4 +186,31 @@ describe("evaluating-gates phase", () => { insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" }); assert.equal(getPendingSliceGateCount("M001", "S01"), 1); }); + + test("Q8 (owned by complete-slice) does not block evaluating-gates phase", async () => { + // Regression: Q8 is stored with scope:"slice" but owned by the + // complete-slice turn. Before the gate registry landed, deriveState + // counted Q8 as a blocker for evaluating-gates while the gate-evaluate + // prompt silently dropped Q8 — an unrecoverable stall. After the + // registry change, deriveState filters by owner turn, so Q8 never + // blocks evaluating-gates. + planSlice(tmpDir); + await renderPlanFromDb(tmpDir, "M001", "S01"); + + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q8", scope: "slice" }); + + saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", verdict: "pass", rationale: "OK", findings: "" }); + saveGateResult({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", verdict: "omitted", rationale: "N/A", findings: "" }); + // Q8 deliberately left pending — it's complete-slice's problem. + + invalidateStateCache(); + const state = await deriveState(tmpDir); + assert.equal( + state.phase, + "executing", + `pending Q8 must not stall evaluating-gates — got phase=${state.phase}`, + ); + }); }); diff --git a/src/resources/extensions/gsd/tests/gate-registry.test.ts b/src/resources/extensions/gsd/tests/gate-registry.test.ts new file mode 100644 index 000000000..3bb1d6c3c --- /dev/null +++ b/src/resources/extensions/gsd/tests/gate-registry.test.ts @@ -0,0 +1,140 @@ +/** + * Gate registry tests — enforce that every declared GateId has a registry + * entry, that every owner-turn bucket is non-empty, and that coverage + * assertions fail loudly instead of silently skipping unknown gates. + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; + +import { + GATE_REGISTRY, + assertGateCoverage, + getGateDefinition, + getGateIdsForTurn, + getGatesForTurn, + getOwnerTurn, + type OwnerTurn, +} from "../gate-registry.ts"; +import type { GateId } from "../types.ts"; + +/** Authoritative list of GateIds as declared in types.ts. */ +const ALL_GATE_IDS: readonly GateId[] = [ + "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", + "MV01", "MV02", "MV03", "MV04", +]; + +const ALL_OWNER_TURNS: readonly OwnerTurn[] = [ + "gate-evaluate", + "execute-task", + "complete-slice", + "validate-milestone", +]; + +describe("gate-registry", () => { + test("every declared GateId has a registry entry", () => { + for (const id of ALL_GATE_IDS) { + const def = GATE_REGISTRY[id]; + assert.ok(def, `missing registry entry for gate ${id}`); + assert.equal(def.id, id); + assert.ok(def.question.length > 0, `${id} missing question`); + assert.ok(def.guidance.length > 0, `${id} missing guidance`); + assert.ok(def.promptSection.length > 0, `${id} missing promptSection`); + } + }); + + test("registry contains no extra gate entries", () => { + const registryIds = new Set(Object.keys(GATE_REGISTRY)); + const declaredIds = new Set(ALL_GATE_IDS); + for (const id of registryIds) { + assert.ok(declaredIds.has(id), `registry has unknown gate ${id}`); + } + }); + + test("every owner turn owns at least one gate", () => { + for (const turn of ALL_OWNER_TURNS) { + const gates = getGatesForTurn(turn); + assert.ok( + gates.length > 0, + `owner turn "${turn}" has no gates — likely a registry mistake`, + ); + } + }); + + test("owner turn buckets are disjoint", () => { + const seen = new Set(); + for (const turn of ALL_OWNER_TURNS) { + for (const def of getGatesForTurn(turn)) { + assert.ok(!seen.has(def.id), `gate ${def.id} claimed by two turns`); + seen.add(def.id); + } + } + // Every gate should appear in exactly one bucket. + assert.equal(seen.size, ALL_GATE_IDS.length); + }); + + test("getOwnerTurn round-trips against GATE_REGISTRY", () => { + for (const id of ALL_GATE_IDS) { + const turn = getOwnerTurn(id); + const idsForTurn = getGateIdsForTurn(turn); + assert.ok(idsForTurn.has(id), `${id} not in ${turn} bucket`); + } + }); + + test("getGateDefinition returns undefined for unknown ids", () => { + assert.equal(getGateDefinition("Q99"), undefined); + assert.equal(getGateDefinition("not-a-gate"), undefined); + }); +}); + +describe("assertGateCoverage", () => { + test("throws when a row is owned by a different turn", () => { + // Q8 is owned by complete-slice, not gate-evaluate — this used to be + // silently dropped by the old `if (!meta) continue;` filter, causing + // the evaluating-gates phase to stall. + assert.throws( + () => assertGateCoverage([{ gate_id: "Q8" }], "gate-evaluate"), + (err: Error) => + err.message.includes("Q8") && err.message.includes("gate-evaluate"), + ); + }); + + test("throws when a row has an unknown gate id", () => { + assert.throws( + () => assertGateCoverage([{ gate_id: "Q999" as GateId }], "gate-evaluate", { requireAll: false }), + (err: Error) => err.message.includes("Q999"), + ); + }); + + test("throws when requireAll is true and an owned gate is missing", () => { + // gate-evaluate owns Q3 and Q4. Passing only Q3 should fail. + assert.throws( + () => assertGateCoverage([{ gate_id: "Q3" }], "gate-evaluate", { requireAll: true }), + (err: Error) => err.message.includes("Q4"), + ); + }); + + test("passes when requireAll is false and only a subset is pending", () => { + // execute-task owns Q5/Q6/Q7, but a task with no external dependencies + // may only have Q7 seeded. That's still valid coverage. + assert.doesNotThrow(() => + assertGateCoverage([{ gate_id: "Q7" }], "execute-task", { requireAll: false }), + ); + }); + + test("passes when requireAll is true and every owned gate is pending", () => { + assert.doesNotThrow(() => + assertGateCoverage( + [{ gate_id: "Q3" }, { gate_id: "Q4" }], + "gate-evaluate", + { requireAll: true }, + ), + ); + }); + + test("empty pending list passes when requireAll is false", () => { + assert.doesNotThrow(() => + assertGateCoverage([], "complete-slice", { requireAll: false }), + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/gsd-db.test.ts b/src/resources/extensions/gsd/tests/gsd-db.test.ts index 0987b9ad3..097ea7151 100644 --- a/src/resources/extensions/gsd/tests/gsd-db.test.ts +++ b/src/resources/extensions/gsd/tests/gsd-db.test.ts @@ -15,6 +15,7 @@ import { getRequirementById, getActiveDecisions, getActiveRequirements, + getTask, transaction, _getAdapter, _resetProvider, @@ -43,6 +44,16 @@ function cleanup(dbPath: string): void { } } +function withPlatform(platform: NodeJS.Platform, fn: () => T): T { + const original = process.platform; + Object.defineProperty(process, 'platform', { value: platform }); + try { + return fn(); + } finally { + Object.defineProperty(process, 'platform', { value: original }); + } +} + // ═══════════════════════════════════════════════════════════════════════════ // gsd-db tests // ═══════════════════════════════════════════════════════════════════════════ @@ -279,6 +290,26 @@ describe('gsd-db', () => { cleanup(dbPath); }); + test('gsd-db: mmap stays disabled on darwin file-backed DBs', () => { + const darwinDbPath = tempDbPath(); + withPlatform('darwin', () => { + openDatabase(darwinDbPath); + const adapter = _getAdapter()!; + const mmap = adapter.prepare('PRAGMA mmap_size').get(); + assert.deepStrictEqual(mmap?.['mmap_size'], 0, 'darwin should leave mmap_size disabled'); + cleanup(darwinDbPath); + }); + + const linuxDbPath = tempDbPath(); + withPlatform('linux', () => { + openDatabase(linuxDbPath); + const adapter = _getAdapter()!; + const mmap = adapter.prepare('PRAGMA mmap_size').get(); + assert.deepStrictEqual(mmap?.['mmap_size'], 67108864, 'non-darwin should still enable mmap_size'); + cleanup(linuxDbPath); + }); + }); + test('gsd-db: transaction rollback on error', () => { openDatabase(':memory:'); @@ -329,6 +360,79 @@ describe('gsd-db', () => { closeDatabase(); }); + test('gsd-db: recreates missing verification evidence dedup index after removing duplicate rows', () => { + const dbPath = tempDbPath(); + openDatabase(dbPath); + + let adapter = _getAdapter()!; + adapter.prepare("INSERT INTO milestones (id, created_at) VALUES (?, '')").run('M001'); + adapter.prepare("INSERT INTO slices (milestone_id, id, created_at) VALUES (?, ?, '')").run('M001', 'S01'); + adapter.prepare("INSERT INTO tasks (milestone_id, slice_id, id) VALUES (?, ?, ?)").run('M001', 'S01', 'T01'); + adapter.exec('DROP INDEX IF EXISTS idx_verification_evidence_dedup'); + + const insertEvidence = adapter.prepare( + `INSERT INTO verification_evidence ( + task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + ); + insertEvidence.run('T01', 'S01', 'M001', 'npm test', 1, 'fail', 125, '2026-04-12T00:00:00.000Z'); + insertEvidence.run('T01', 'S01', 'M001', 'npm test', 1, 'fail', 125, '2026-04-12T00:00:01.000Z'); + insertEvidence.run('T01', 'S01', 'M001', 'npm run lint', 0, 'pass', 90, '2026-04-12T00:00:02.000Z'); + + closeDatabase(); + + assert.equal(openDatabase(dbPath), true, 'openDatabase should repair legacy duplicate evidence rows'); + + adapter = _getAdapter()!; + const countRow = adapter.prepare( + `SELECT count(*) as cnt + FROM verification_evidence + WHERE task_id = ? AND slice_id = ? AND milestone_id = ? AND command = ? AND verdict = ?`, + ).get('T01', 'S01', 'M001', 'npm test', 'fail'); + assert.equal(countRow?.['cnt'], 1, 'duplicate verification evidence rows should be deduplicated before index creation'); + + const indexRow = adapter.prepare( + "SELECT name FROM sqlite_master WHERE type = 'index' AND name = 'idx_verification_evidence_dedup'", + ).get(); + assert.equal(indexRow?.['name'], 'idx_verification_evidence_dedup', 'dedup index should be recreated on reopen'); + + cleanup(dbPath); + }); + + test('gsd-db: rowToTask tolerates legacy comma-separated task arrays', () => { + openDatabase(':memory:'); + + const adapter = _getAdapter()!; + adapter.prepare("INSERT INTO milestones (id, created_at) VALUES (?, '')").run('M001'); + adapter.prepare("INSERT INTO slices (milestone_id, id, created_at) VALUES (?, ?, '')").run('M001', 'S01'); + adapter.prepare( + `INSERT INTO tasks ( + milestone_id, slice_id, id, key_files, key_decisions, files, inputs, expected_output + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + ).run( + 'M001', + 'S01', + 'T01', + '[]', + '[]', + 'tests/test_verify.py, config.yaml, configs/roster_2026-05-11.yaml', + 'tests/test_verify.py', + 'reports/summary.md, artifacts/output.json', + ); + + const task = getTask('M001', 'S01', 'T01'); + assert.ok(task, 'task should load successfully from DB'); + assert.deepEqual(task?.files, [ + 'tests/test_verify.py', + 'config.yaml', + 'configs/roster_2026-05-11.yaml', + ]); + assert.deepEqual(task?.inputs, ['tests/test_verify.py']); + assert.deepEqual(task?.expected_output, ['reports/summary.md', 'artifacts/output.json']); + + closeDatabase(); + }); + test('gsd-db: query wrappers return null/empty when DB unavailable', () => { // Ensure DB is closed closeDatabase(); @@ -347,15 +451,13 @@ describe('gsd-db', () => { assert.deepStrictEqual(ar, [], 'getActiveRequirements returns [] when DB closed'); }); - test('gsd-db: wasDbOpenAttempted tracks openDatabase calls', () => { - // wasDbOpenAttempted should return true once openDatabase has been called - // (previous tests in this suite already called openDatabase, so the flag is set) + test('gsd-db: closeDatabase resets wasDbOpenAttempted after an intentional close', () => { + openDatabase(':memory:'); assert.ok(wasDbOpenAttempted(), 'wasDbOpenAttempted should be true after openDatabase was called'); - // Verify the flag persists even after closeDatabase closeDatabase(); assert.ok(!isDbAvailable(), 'DB should not be available after close'); - assert.ok(wasDbOpenAttempted(), 'wasDbOpenAttempted should remain true after closeDatabase'); + assert.ok(!wasDbOpenAttempted(), 'wasDbOpenAttempted should reset after closeDatabase'); }); // ─── Final Report ────────────────────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/tests/gsd-no-project-error.test.ts b/src/resources/extensions/gsd/tests/gsd-no-project-error.test.ts new file mode 100644 index 000000000..1ceaa5f2d --- /dev/null +++ b/src/resources/extensions/gsd/tests/gsd-no-project-error.test.ts @@ -0,0 +1,73 @@ +/** + * GSDNoProjectError — tests for friendly home-directory error handling. + * + * Verifies that GSDNoProjectError is thrown for blocked directories and + * that the dispatcher catches it with a user-friendly message. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const contextSrc = readFileSync(join(__dirname, "..", "commands", "context.ts"), "utf-8"); +const dispatcherSrc = readFileSync(join(__dirname, "..", "commands", "dispatcher.ts"), "utf-8"); + +// ─── GSDNoProjectError class ────────────────────────────────────────────── + +test("GSDNoProjectError class is exported from context.ts", () => { + assert.ok( + contextSrc.includes("export class GSDNoProjectError extends Error"), + "GSDNoProjectError should be an exported Error subclass", + ); +}); + +test("GSDNoProjectError sets name property", () => { + assert.ok( + contextSrc.includes('this.name = "GSDNoProjectError"'), + "GSDNoProjectError should set its name for instanceof checks", + ); +}); + +// ─── projectRoot blocked directory handling ─────────────────────────────── + +test("projectRoot uses validateDirectory and checks for blocked severity", () => { + assert.ok( + contextSrc.includes("validateDirectory(pathToCheck)"), + "projectRoot should call validateDirectory", + ); + assert.ok( + contextSrc.includes('result.severity === "blocked"'), + "projectRoot should check for blocked severity", + ); +}); + +test("projectRoot throws GSDNoProjectError on blocked directory", () => { + assert.ok( + contextSrc.includes("throw new GSDNoProjectError"), + "projectRoot should throw GSDNoProjectError when directory is blocked", + ); +}); + +// ─── Dispatcher catch ───────────────────────────────────────────────────── + +test("dispatcher catches GSDNoProjectError with user-friendly message", () => { + assert.ok( + dispatcherSrc.includes("err instanceof GSDNoProjectError"), + "dispatcher should catch GSDNoProjectError specifically", + ); + assert.ok( + dispatcherSrc.includes("cd"), + "error message should suggest cd-ing into a project directory", + ); +}); + +test("dispatcher re-throws non-GSDNoProjectError exceptions", () => { + assert.ok( + dispatcherSrc.includes("throw err"), + "dispatcher should re-throw unexpected errors", + ); +}); diff --git a/src/resources/extensions/gsd/tests/infra-errors-cooldown.test.ts b/src/resources/extensions/gsd/tests/infra-errors-cooldown.test.ts new file mode 100644 index 000000000..ebaa774a6 --- /dev/null +++ b/src/resources/extensions/gsd/tests/infra-errors-cooldown.test.ts @@ -0,0 +1,180 @@ +// gsd / infra-errors cooldown detection tests +// Copyright (c) 2026 Jeremy McSpadden + +import test, { describe } from "node:test"; +import assert from "node:assert/strict"; + +import { + isTransientCooldownError, + getCooldownRetryAfterMs, + MAX_COOLDOWN_RETRIES, + COOLDOWN_FALLBACK_WAIT_MS, +} from "../auto/infra-errors.js"; + +// ─── Constants ──────────────────────────────────────────────────────────────── + +describe("infra-errors cooldown constants", () => { + test("COOLDOWN_FALLBACK_WAIT_MS is a positive number greater than the 30s rate-limit backoff", () => { + assert.ok(typeof COOLDOWN_FALLBACK_WAIT_MS === "number"); + assert.ok(COOLDOWN_FALLBACK_WAIT_MS > 30_000, "should exceed the 30s rate-limit window"); + }); + + test("MAX_COOLDOWN_RETRIES is a positive integer", () => { + assert.ok(typeof MAX_COOLDOWN_RETRIES === "number"); + assert.ok(Number.isInteger(MAX_COOLDOWN_RETRIES)); + assert.ok(MAX_COOLDOWN_RETRIES > 0); + }); + + test("COOLDOWN_FALLBACK_WAIT_MS is 35_000", () => { + assert.equal(COOLDOWN_FALLBACK_WAIT_MS, 35_000); + }); + + test("MAX_COOLDOWN_RETRIES is 5", () => { + assert.equal(MAX_COOLDOWN_RETRIES, 5); + }); +}); + +// ─── isTransientCooldownError: structured detection ────────────────────────── + +describe("isTransientCooldownError — structured code detection", () => { + test("returns true for an object with code === AUTH_COOLDOWN", () => { + const err = { code: "AUTH_COOLDOWN", message: "credentials in cooldown" }; + assert.equal(isTransientCooldownError(err), true); + }); + + test("returns true for a real CredentialCooldownError-shaped error", () => { + // Simulate CredentialCooldownError without importing sdk.ts (leaf-module rule) + const err = Object.assign(new Error('All credentials for "anthropic" are in a cooldown window.'), { + code: "AUTH_COOLDOWN", + retryAfterMs: 30_000, + name: "CredentialCooldownError", + }); + assert.equal(isTransientCooldownError(err), true); + }); + + test("returns false for an object with a different code", () => { + const err = { code: "ENOSPC", message: "disk full" }; + assert.equal(isTransientCooldownError(err), false); + }); + + test("returns false for an object with no code property", () => { + const err = { message: "some random error" }; + assert.equal(isTransientCooldownError(err), false); + }); +}); + +// ─── isTransientCooldownError: message fallback ─────────────────────────────── + +describe("isTransientCooldownError — message fallback (cross-process)", () => { + test("returns true when message contains 'in a cooldown window'", () => { + const err = new Error('All credentials for "openai" are in a cooldown window. Please wait.'); + assert.equal(isTransientCooldownError(err), true); + }); + + test("returns true when message matches case-insensitively", () => { + const err = new Error("credentials IN A COOLDOWN WINDOW"); + assert.equal(isTransientCooldownError(err), true); + }); + + test("returns true for a plain string containing cooldown window phrase", () => { + assert.equal(isTransientCooldownError("all keys in a cooldown window"), true); + }); + + test("returns false for a generic error message", () => { + const err = new Error("rate limit exceeded"); + assert.equal(isTransientCooldownError(err), false); + }); + + test("returns false for an error message about auth failure without cooldown phrase", () => { + const err = new Error("Authentication failed: invalid API key"); + assert.equal(isTransientCooldownError(err), false); + }); +}); + +// ─── isTransientCooldownError: edge cases ──────────────────────────────────── + +describe("isTransientCooldownError — edge cases", () => { + test("returns false for null", () => { + assert.equal(isTransientCooldownError(null), false); + }); + + test("returns false for undefined", () => { + assert.equal(isTransientCooldownError(undefined), false); + }); + + test("returns false for a number", () => { + assert.equal(isTransientCooldownError(42), false); + }); + + test("returns false for an empty object", () => { + assert.equal(isTransientCooldownError({}), false); + }); + + test("returns false for an object with code === AUTH_COOLDOWN as a non-string", () => { + // code must be a string matching "AUTH_COOLDOWN" exactly + const err = { code: 42 }; + assert.equal(isTransientCooldownError(err), false); + }); +}); + +// ─── getCooldownRetryAfterMs: structured extraction ────────────────────────── + +describe("getCooldownRetryAfterMs — structured extraction", () => { + test("returns retryAfterMs when code is AUTH_COOLDOWN and retryAfterMs is set", () => { + const err = { code: "AUTH_COOLDOWN", retryAfterMs: 30_000 }; + assert.equal(getCooldownRetryAfterMs(err), 30_000); + }); + + test("returns undefined when code is AUTH_COOLDOWN but retryAfterMs is absent", () => { + const err = { code: "AUTH_COOLDOWN" }; + assert.equal(getCooldownRetryAfterMs(err), undefined); + }); + + test("returns 0 when retryAfterMs is explicitly 0", () => { + const err = { code: "AUTH_COOLDOWN", retryAfterMs: 0 }; + assert.equal(getCooldownRetryAfterMs(err), 0); + }); + + test("returns undefined for an error with a different code even if retryAfterMs is set", () => { + const err = { code: "ENOSPC", retryAfterMs: 5_000 }; + assert.equal(getCooldownRetryAfterMs(err), undefined); + }); + + test("returns undefined for a plain Error with no code property", () => { + const err = new Error("something went wrong"); + assert.equal(getCooldownRetryAfterMs(err), undefined); + }); + + test("returns retryAfterMs from a full CredentialCooldownError-shaped object", () => { + const err = Object.assign(new Error('All credentials for "anthropic" are in a cooldown window.'), { + code: "AUTH_COOLDOWN", + retryAfterMs: 15_000, + name: "CredentialCooldownError", + }); + assert.equal(getCooldownRetryAfterMs(err), 15_000); + }); +}); + +// ─── getCooldownRetryAfterMs: edge cases ───────────────────────────────────── + +describe("getCooldownRetryAfterMs — edge cases", () => { + test("returns undefined for null", () => { + assert.equal(getCooldownRetryAfterMs(null), undefined); + }); + + test("returns undefined for undefined", () => { + assert.equal(getCooldownRetryAfterMs(undefined), undefined); + }); + + test("returns undefined for a plain string", () => { + assert.equal(getCooldownRetryAfterMs("AUTH_COOLDOWN"), undefined); + }); + + test("returns undefined for an empty object", () => { + assert.equal(getCooldownRetryAfterMs({}), undefined); + }); + + test("returns undefined for a number", () => { + assert.equal(getCooldownRetryAfterMs(42), undefined); + }); +}); diff --git a/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts b/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts deleted file mode 100644 index ea4aa4f50..000000000 --- a/src/resources/extensions/gsd/tests/integration-prepared-discussion.test.ts +++ /dev/null @@ -1,525 +0,0 @@ -/** - * Integration tests for the prepared discussion system. - * - * Exercises the full preparation pipeline against the real GSD-2 codebase: - * - runPreparation() produces valid briefs - * - TypeScript is detected as primary language - * - Module structure includes top-level directories - * - Completes within R112 timing requirement (<60s) - * - prepareAndBuildDiscussPrompt() uses discuss-prepared template when enabled - * - Fallback to standard prompt when preparation is disabled - */ - -import test from "node:test"; -import assert from "node:assert/strict"; -import { join } from "node:path"; -import { existsSync } from "node:fs"; -import { - runPreparation, - formatCodebaseBrief, - formatPriorContextBrief, - formatEcosystemBrief, - type PreparationUIContext, - type PreparationPreferences, - type PreparationResult, -} from "../preparation.ts"; -import { validateEnhancedContext } from "../prompt-validation.ts"; -import { getLastPreparationResult, clearPreparationResult } from "../guided-flow.ts"; - -// ─── Test Helpers ─────────────────────────────────────────────────────────────── - -/** - * Mock UI context that captures notifications for testing. - * Follows the pattern from preparation.test.ts. - */ -function createMockUI(): PreparationUIContext & { notifications: Array<{ message: string; type?: string }> } { - const notifications: Array<{ message: string; type?: string }> = []; - return { - notifications, - notify(message: string, type?: "info" | "warning" | "error" | "success") { - notifications.push({ message, type }); - }, - }; -} - -/** - * Get the GSD extension source directory for integration testing. - * This is the real codebase we'll analyze. - */ -function getGsdExtensionDir(): string { - // Navigate from tests/ up to gsd/ directory - return join(import.meta.dirname, ".."); -} - -/** - * Get the GSD-2 project root for full codebase analysis. - */ -function getProjectRoot(): string { - // Navigate from tests/ up to the project root - // tests/ -> gsd/ -> extensions/ -> resources/ -> src/ -> gsd-2/ - return join(import.meta.dirname, "..", "..", "..", "..", ".."); -} - -// ─── R111 Validation: runPreparation against real codebase ────────────────────── - -test("R111: runPreparation() produces valid codebase brief for GSD extension", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, // Skip web research to avoid API key requirement - discuss_depth: "standard", - }; - - const result = await runPreparation(dir, ui, prefs); - - // Verify preparation completed successfully - assert.equal(result.enabled, true, "preparation should be enabled"); - assert.ok(result.codebase, "should have codebase brief"); - assert.ok(result.codebaseBrief, "should have formatted codebase brief"); - - // Verify TypeScript is detected as primary language - assert.equal( - result.codebase.techStack.primaryLanguage, - "javascript/typescript", - "should detect TypeScript as primary language", - ); - - // Verify module structure includes top-level directories - const topLevelDirs = result.codebase.moduleStructure.topLevelDirs; - assert.ok(topLevelDirs.length > 0, "should detect top-level directories"); - - // Common directories in the GSD extension - const expectedDirs = ["tests", "prompts", "templates", "migrate"]; - const foundExpected = expectedDirs.filter(d => topLevelDirs.includes(d)); - assert.ok( - foundExpected.length >= 2, - `should detect common directories, found: ${topLevelDirs.join(", ")}`, - ); - - // Verify sampled files exist - assert.ok(result.codebase.sampledFiles.length > 0, "should sample source files"); -}); - -test("R111: runPreparation() produces valid prior context brief", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, ui, prefs); - - // Verify prior context brief structure - assert.ok(result.priorContext, "should have prior context"); - assert.ok(result.priorContextBrief, "should have formatted prior context brief"); - - // Prior context aggregates decisions, requirements, knowledge, summaries - assert.ok("decisions" in result.priorContext, "should have decisions"); - assert.ok("requirements" in result.priorContext, "should have requirements"); - assert.ok("knowledge" in result.priorContext, "should have knowledge"); - assert.ok("summaries" in result.priorContext, "should have summaries"); -}); - -test("R111: runPreparation() produces valid ecosystem brief (skipped without API key)", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, // Explicitly disable - }; - - const result = await runPreparation(dir, ui, prefs); - - // Verify ecosystem brief structure - assert.ok(result.ecosystem, "should have ecosystem brief"); - assert.ok(result.ecosystemBrief, "should have formatted ecosystem brief"); - assert.equal(result.ecosystem.available, false, "ecosystem should be unavailable when web research disabled"); - assert.ok(result.ecosystem.skippedReason, "should have skip reason"); -}); - -test("R112: runPreparation() completes within 60s requirement", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - discuss_depth: "standard", - }; - - const startTime = performance.now(); - const result = await runPreparation(dir, null, prefs); - const elapsed = performance.now() - startTime; - - // R112 requirement: preparation must complete within 60 seconds - assert.ok(result.durationMs < 60000, `should complete within 60s, took ${result.durationMs}ms`); - assert.ok(elapsed < 60000, `wall-clock time should be under 60s, was ${elapsed}ms`); - - // Should be much faster for a local directory analysis - assert.ok(result.durationMs < 10000, `should typically complete within 10s, took ${result.durationMs}ms`); -}); - -// ─── Codebase Pattern Detection ───────────────────────────────────────────────── - -test("runPreparation() detects code patterns from GSD extension", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - - // The GSD extension uses async/await extensively - assert.ok( - result.codebase.patterns.asyncStyle === "async/await" || result.codebase.patterns.asyncStyle === "mixed", - `should detect async/await or mixed, got ${result.codebase.patterns.asyncStyle}`, - ); - - // The GSD extension uses try/catch for error handling - assert.ok( - result.codebase.patterns.errorHandling === "try/catch" || result.codebase.patterns.errorHandling === "mixed", - `should detect try/catch or mixed, got ${result.codebase.patterns.errorHandling}`, - ); - - // TypeScript uses camelCase or mixed naming - assert.ok( - result.codebase.patterns.namingConvention === "camelCase" || result.codebase.patterns.namingConvention === "mixed", - `should detect camelCase or mixed, got ${result.codebase.patterns.namingConvention}`, - ); - - // Evidence should be populated - assert.ok(result.codebase.patterns.evidence.asyncStyle.length > 0, "should have async style evidence"); -}); - -test("runPreparation() samples TypeScript files from src/ or project root", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - - // Should sample TypeScript files - const tsFiles = result.codebase.sampledFiles.filter( - f => f.endsWith(".ts") || f.endsWith(".tsx"), - ); - assert.ok(tsFiles.length > 0, "should sample TypeScript files"); - - // Should exclude test files - const testFiles = result.codebase.sampledFiles.filter( - f => f.includes(".test.") || f.includes(".spec."), - ); - assert.equal(testFiles.length, 0, "should not sample test files"); -}); - -// ─── Brief Formatting ─────────────────────────────────────────────────────────── - -test("formatCodebaseBrief() produces LLM-readable markdown", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - const formatted = formatCodebaseBrief(result.codebase); - - // Should contain expected sections - assert.ok(formatted.includes("## Tech Stack"), "should have Tech Stack section"); - assert.ok(formatted.includes("## Module Structure"), "should have Module Structure section"); - assert.ok(formatted.includes("## Code Patterns"), "should have Code Patterns section"); - - // Should contain detected tech - assert.ok(formatted.includes("javascript/typescript"), "should include detected language"); - - // Should be within character limit - assert.ok(formatted.length <= 3000, `should cap at 3000 chars, got ${formatted.length}`); -}); - -test("formatPriorContextBrief() produces structured prior context output", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - const formatted = formatPriorContextBrief(result.priorContext); - - // Should contain expected sections - assert.ok(formatted.includes("## Prior Decisions"), "should have Prior Decisions section"); - assert.ok(formatted.includes("## Prior Requirements"), "should have Prior Requirements section"); - assert.ok(formatted.includes("## Prior Knowledge"), "should have Prior Knowledge section"); - assert.ok(formatted.includes("## Prior Milestone Summaries"), "should have Prior Milestone Summaries section"); - - // Should be within character limit - assert.ok(formatted.length <= 6000, `should cap at 6000 chars, got ${formatted.length}`); -}); - -test("formatEcosystemBrief() returns simplified message (research happens during discussion)", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - const result = await runPreparation(dir, null, prefs); - const formatted = formatEcosystemBrief(result.ecosystem); - - // Should contain section header - assert.ok(formatted.includes("## Ecosystem Research"), "should have Ecosystem Research section"); - - // Should indicate research happens during discussion - assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion"); - assert.ok(formatted.includes("web search tools"), "should mention web search tools"); - - // Should be within character limit - assert.ok(formatted.length <= 4000, `should cap at 4000 chars, got ${formatted.length}`); -}); - -// ─── Preparation Result Storage ───────────────────────────────────────────────── - -test("getLastPreparationResult() returns null initially", async (t) => { - // Clear any existing state - clearPreparationResult(); - - const result = getLastPreparationResult(); - assert.equal(result, null, "should return null when no preparation has run"); -}); - -test("clearPreparationResult() clears stored result", async (t) => { - // This test verifies the clear function works - // We can't easily test the set behavior without running the full guided-flow - clearPreparationResult(); - const result = getLastPreparationResult(); - assert.equal(result, null, "should be null after clear"); -}); - -// ─── TUI Progress Notifications ───────────────────────────────────────────────── - -test("runPreparation() emits TUI progress notifications", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - await runPreparation(dir, ui, prefs); - - // Should have notifications for each phase - assert.ok(ui.notifications.length > 0, "should have notifications"); - - // Verify codebase analysis notifications - assert.ok( - ui.notifications.some(n => n.message.includes("Analyzing codebase")), - "should show codebase analysis start", - ); - assert.ok( - ui.notifications.some(n => n.message.includes("✓ Analyzed codebase")), - "should show codebase analysis complete", - ); - - // Verify prior context notifications - assert.ok( - ui.notifications.some(n => n.message.includes("Reviewing prior context")), - "should show prior context start", - ); - assert.ok( - ui.notifications.some(n => n.message.includes("✓ Reviewed prior context")), - "should show prior context complete", - ); -}); - -test("runPreparation() works in silent mode (no UI)", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - // Pass null for UI - const result = await runPreparation(dir, null, prefs); - - // Should complete without error - assert.equal(result.enabled, true, "should work without UI"); - assert.ok(result.codebase, "should have codebase"); - assert.ok(result.priorContext, "should have priorContext"); - assert.ok(result.durationMs > 0, "should have duration"); -}); - -// ─── Preference-Controlled Behavior ───────────────────────────────────────────── - -test("runPreparation() returns early when discuss_preparation is false", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: false, - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, false, "should indicate preparation disabled"); - assert.equal(result.codebaseBrief, "", "should have empty codebase brief"); - assert.equal(result.priorContextBrief, "", "should have empty prior context brief"); - assert.equal(result.ecosystemBrief, "", "should have empty ecosystem brief"); - assert.equal(ui.notifications.length, 0, "should not show any notifications"); -}); - -test("runPreparation() ecosystem research always returns unavailable (happens during discussion)", async (t) => { - const dir = getGsdExtensionDir(); - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: true, // Even with this enabled, ecosystem research returns unavailable - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, true); - assert.equal(result.ecosystemResearchPerformed, false, "should not perform ecosystem research from preparation"); - assert.equal(result.ecosystem.available, false); - assert.ok( - result.ecosystem.skippedReason?.includes("during the discussion"), - "should indicate research happens during discussion", - ); - - // Should NOT have ecosystem research notifications (no longer part of preparation) - assert.ok( - !ui.notifications.some(n => n.message.includes("Researching ecosystem")), - "should not show ecosystem research notification", - ); -}); - -// ─── validateEnhancedContext Integration ──────────────────────────────────────── - -test("validateEnhancedContext() validates required sections", async (t) => { - // Test with valid enhanced context - const validContext = `# M001 — Test Milestone - -## Scope - -This milestone covers X, Y, Z. - -## Architectural Decisions - -### Decision 1: Use TypeScript - -We will use TypeScript for type safety. - -## Acceptance Criteria - -- [ ] Feature A works -- [ ] Feature B works -`; - - const validResult = validateEnhancedContext(validContext); - assert.equal(validResult.valid, true, "should validate complete context"); - assert.deepEqual(validResult.missing, [], "should have no missing sections"); - - // Test with missing sections - const invalidContext = `# M001 — Test Milestone - -## Scope - -This milestone covers X, Y, Z. -`; - - const invalidResult = validateEnhancedContext(invalidContext); - assert.equal(invalidResult.valid, false, "should reject incomplete context"); - assert.ok(invalidResult.missing.length > 0, "should list missing sections"); - assert.ok( - invalidResult.missing.some(m => m.includes("Architectural Decisions")), - "should report missing Architectural Decisions", - ); - assert.ok( - invalidResult.missing.some(m => m.includes("Acceptance Criteria")), - "should report missing Acceptance Criteria", - ); -}); - -test("validateEnhancedContext() requires decision entries in Architectural Decisions", async (t) => { - // Empty architectural decisions section - const emptyDecisions = `# M001 — Test Milestone - -## Scope - -This milestone covers X, Y, Z. - -## Architectural Decisions - -(No decisions yet) - -## Acceptance Criteria - -- [ ] Feature A works -`; - - const result = validateEnhancedContext(emptyDecisions); - assert.equal(result.valid, false, "should reject empty decisions section"); - assert.ok( - result.missing.some(m => m.includes("decision entry")), - "should report missing decision entry", - ); -}); - -// ─── Full Pipeline Integration ────────────────────────────────────────────────── - -test("Full pipeline: preparation produces consistent results across runs", async (t) => { - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - // Run preparation twice - const result1 = await runPreparation(dir, null, prefs); - const result2 = await runPreparation(dir, null, prefs); - - // Results should be consistent (same codebase, same analysis) - assert.equal( - result1.codebase.techStack.primaryLanguage, - result2.codebase.techStack.primaryLanguage, - "primary language should be consistent", - ); - - assert.deepEqual( - result1.codebase.moduleStructure.topLevelDirs.sort(), - result2.codebase.moduleStructure.topLevelDirs.sort(), - "top-level directories should be consistent", - ); - - assert.equal( - result1.codebase.patterns.asyncStyle, - result2.codebase.patterns.asyncStyle, - "async style should be consistent", - ); -}); - -test("Full pipeline: preparation handles empty .gsd directory gracefully", async (t) => { - // The GSD extension directory may or may not have a .gsd subdirectory - // Either way, preparation should not crash - const dir = getGsdExtensionDir(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - let result: PreparationResult | undefined; - let error: unknown; - - try { - result = await runPreparation(dir, null, prefs); - } catch (e) { - error = e; - } - - assert.equal(error, undefined, "should not throw"); - assert.ok(result, "should return result"); - assert.equal(result!.enabled, true, "should be enabled"); - - // Prior context should gracefully handle missing files - assert.ok(result!.priorContext, "should have prior context even if files missing"); -}); diff --git a/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts b/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts index 48f5897d9..dd2742957 100644 --- a/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts +++ b/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts @@ -12,7 +12,7 @@ import { describe, test, afterEach } from "node:test"; import assert from "node:assert/strict"; -import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync, readFileSync } from "node:fs"; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, realpathSync, readFileSync, symlinkSync, unlinkSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { execSync } from "node:child_process"; @@ -44,6 +44,27 @@ function createTempRepo(): string { return dir; } +function createTempRepoWithExternalGsd(): { repo: string; externalState: string } { + const realTmp = realpathSync(tmpdir()); + const repo = realpathSync(mkdtempSync(join(realTmp, "wt-ms-merge-ext-test-"))); + const externalState = realpathSync(mkdtempSync(join(realTmp, "wt-ms-merge-ext-state-"))); + + run("git init", repo); + run("git config user.email test@test.com", repo); + run("git config user.name Test", repo); + + mkdirSync(join(externalState, "worktrees"), { recursive: true }); + symlinkSync(externalState, join(repo, ".gsd")); + + writeFileSync(join(repo, "README.md"), "# test\n"); + writeFileSync(join(externalState, "STATE.md"), "# State\n"); + run("git add .", repo); + run("git commit -m init", repo); + run("git branch -M main", repo); + + return { repo, externalState }; +} + /** Minimal roadmap content for mergeMilestoneToMain. */ function makeRoadmap(milestoneId: string, title: string, slices: Array<{ id: string; title: string }>): string { const sliceLines = slices.map(s => `- [x] **${s.id}: ${s.title}**`).join("\n"); @@ -87,6 +108,12 @@ describe("auto-worktree-milestone-merge", { timeout: 300_000 }, () => { return d; } + function freshRepoWithExternalGsd(): { repo: string; externalState: string } { + const { repo, externalState } = createTempRepoWithExternalGsd(); + tempDirs.push(repo, externalState); + return { repo, externalState }; + } + afterEach(() => { process.chdir(savedCwd); for (const d of tempDirs) { @@ -638,6 +665,44 @@ describe("auto-worktree-milestone-merge", { timeout: 300_000 }, () => { "#1906: codeFilesChanged must be false when only .gsd/ files were merged"); }); + test("#2156: mergeMilestoneToMain removes external-state worktrees using the milestone branch name", () => { + const { repo, externalState } = freshRepoWithExternalGsd(); + const wtPath = createAutoWorktree(repo, "M215"); + + addSliceToMilestone(repo, wtPath, "M215", "S01", "External cleanup", [ + { file: "external-cleanup.ts", content: "export const externalCleanup = true;\n", message: "add external cleanup" }, + ]); + + const realWtPath = realpathSync(wtPath); + assert.ok( + realWtPath.startsWith(externalState), + `worktree should be registered under external .gsd state, got ${realWtPath}`, + ); + + // Recreate the exact divergence from #1852: local .gsd/ is replaced with a + // stale real directory, so worktreePath() no longer matches git's record. + unlinkSync(join(repo, ".gsd")); + mkdirSync(join(repo, ".gsd", "worktrees", "M215"), { recursive: true }); + writeFileSync(join(repo, ".gsd", "STATE.md"), "# Local stale state\n"); + writeFileSync(join(repo, ".gsd", "worktrees", "M215", "stale.txt"), "stale local artifact\n"); + + const roadmap = makeRoadmap("M215", "External cleanup", [ + { id: "S01", title: "External cleanup" }, + ]); + + mergeMilestoneToMain(repo, "M215", roadmap); + + assert.ok( + !run("git worktree list", repo).includes("M215"), + "merged milestone worktree should be removed from git worktree list", + ); + assert.ok(!existsSync(realWtPath), "real external worktree directory should be removed"); + assert.ok( + !run("git branch", repo).includes("milestone/M215"), + "milestone branch should be deleted after merge cleanup", + ); + }); + test("#2912: MERGE_HEAD cleaned up after squash-merge conflict", () => { const repo = freshRepo(); const wtPath = createAutoWorktree(repo, "M291"); diff --git a/src/resources/extensions/gsd/tests/integration/git-service.test.ts b/src/resources/extensions/gsd/tests/integration/git-service.test.ts index f88901431..b99c2e8ca 100644 --- a/src/resources/extensions/gsd/tests/integration/git-service.test.ts +++ b/src/resources/extensions/gsd/tests/integration/git-service.test.ts @@ -248,23 +248,25 @@ describe('git-service', async () => { assert.deepStrictEqual( RUNTIME_EXCLUSION_PATHS.length, - 13, - "exactly 13 runtime exclusion paths" + 15, + "exactly 15 runtime exclusion paths" ); const expectedPaths = [ ".gsd/activity/", + ".gsd/forensics/", ".gsd/runtime/", ".gsd/worktrees/", + ".gsd/parallel/", ".gsd/auto.lock", ".gsd/metrics.json", - ".gsd/completed-units.json", + ".gsd/completed-units*.json", + ".gsd/state-manifest.json", ".gsd/STATE.md", - ".gsd/gsd.db", - ".gsd/gsd.db-shm", - ".gsd/gsd.db-wal", + ".gsd/gsd.db*", ".gsd/journal/", ".gsd/doctor-history.jsonl", + ".gsd/event-log.jsonl", ".gsd/DISCUSSION-MANIFEST.json", ]; diff --git a/src/resources/extensions/gsd/tests/integration/test-isolation.ts b/src/resources/extensions/gsd/tests/integration/test-isolation.ts deleted file mode 100644 index bc8270222..000000000 --- a/src/resources/extensions/gsd/tests/integration/test-isolation.ts +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Test isolation utilities for integration tests. - * - * Integration tests often call `mergeMilestoneToMain` and other functions that - * load preferences. If the user's global ~/.gsd/preferences.md has - * `git.main_branch: master`, tests fail because test repos use `main`. - * - * These utilities isolate tests from the user's global environment. - */ - -import { mkdtempSync, rmSync, realpathSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; - -import { _resetServiceCache } from "../../worktree.ts"; -import { _clearGsdRootCache } from "../../paths.ts"; - -let originalHome: string | undefined; -let fakeHome: string | null = null; - -/** - * Isolate the test environment from user's global preferences. - * Creates a fake HOME directory so loadEffectiveGSDPreferences() returns - * empty global preferences instead of the user's ~/.gsd/preferences.md. - * - * Call this in a test.before() hook. - */ -export function isolateFromGlobalPreferences(): void { - originalHome = process.env.HOME; - fakeHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-test-home-"))); - process.env.HOME = fakeHome; - _clearGsdRootCache(); - _resetServiceCache(); -} - -/** - * Restore the original HOME and clean up the fake home directory. - * - * Call this in a test.after() hook. - */ -export function restoreGlobalPreferences(): void { - if (originalHome !== undefined) { - process.env.HOME = originalHome; - } else { - delete process.env.HOME; - } - _clearGsdRootCache(); - _resetServiceCache(); - if (fakeHome) { - rmSync(fakeHome, { recursive: true, force: true }); - fakeHome = null; - } -} diff --git a/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts b/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts new file mode 100644 index 000000000..f209ecc8d --- /dev/null +++ b/src/resources/extensions/gsd/tests/interactive-routing-bypass.test.ts @@ -0,0 +1,207 @@ +// GSD Extension — Interactive Routing Bypass Tests +// Verifies that dynamic routing is skipped for interactive (guided-flow) dispatches +// and that model downgrade notifications always fire (#3962). +// Copyright (c) 2026 Jeremy McSpadden + +import test, { describe } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// ─── Source-level structural tests ────────────────────────────────────────── + +const modelSelectionSrc = readFileSync( + join(__dirname, "..", "auto-model-selection.ts"), + "utf-8", +); + +const guidedFlowSrc = readFileSync( + join(__dirname, "..", "guided-flow.ts"), + "utf-8", +); + +const autoStartSrc = readFileSync( + join(__dirname, "..", "auto-start.ts"), + "utf-8", +); + +describe("interactive routing bypass (#3962)", () => { + test("selectAndApplyModel accepts isAutoMode parameter", () => { + // The function signature should include isAutoMode with a default of true + assert.ok( + modelSelectionSrc.includes("isAutoMode"), + "selectAndApplyModel should have isAutoMode parameter", + ); + assert.ok( + modelSelectionSrc.includes("isAutoMode = true"), + "isAutoMode should default to true (auto-mode behavior preserved)", + ); + }); + + test("routing is disabled when isAutoMode is false", () => { + // The code should disable routing when not in auto-mode + assert.ok( + modelSelectionSrc.includes("if (!isAutoMode)"), + "should check isAutoMode flag to disable routing", + ); + assert.ok( + modelSelectionSrc.includes("routingConfig.enabled = false"), + "should set routingConfig.enabled = false for interactive mode", + ); + }); + + test("resolvePreferredModelConfig skips routing synthesis when isAutoMode is false", () => { + // resolvePreferredModelConfig should accept isAutoMode and bail early + // before synthesizing a routing ceiling from tier_models (#3962 codex review) + assert.ok( + modelSelectionSrc.includes("function resolvePreferredModelConfig"), + "resolvePreferredModelConfig should exist", + ); + // The function should check isAutoMode before routing synthesis + const fnIdx = modelSelectionSrc.indexOf("function resolvePreferredModelConfig"); + const fnBody = modelSelectionSrc.slice(fnIdx, fnIdx + 600); + assert.ok( + fnBody.includes("isAutoMode"), + "resolvePreferredModelConfig should accept isAutoMode parameter", + ); + assert.ok( + fnBody.includes("if (!isAutoMode) return undefined"), + "should return undefined (skip routing synthesis) when not in auto-mode", + ); + }); + + test("selectAndApplyModel threads isAutoMode to resolvePreferredModelConfig", () => { + // The call to resolvePreferredModelConfig inside selectAndApplyModel + // should pass isAutoMode as the third argument + const callSite = "resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode)"; + assert.ok( + modelSelectionSrc.includes(callSite), + "selectAndApplyModel should pass isAutoMode to resolvePreferredModelConfig", + ); + }); + + test("guided-flow passes isAutoMode=false", () => { + // guided-flow.ts should explicitly pass isAutoMode as false + assert.ok( + guidedFlowSrc.includes("/* isAutoMode */ false"), + "guided-flow should pass isAutoMode=false to selectAndApplyModel", + ); + }); + + test("auto/phases.ts does NOT pass isAutoMode=false", () => { + // auto/phases.ts should use the default (true) — it's auto-mode + const phasesSrc = readFileSync( + join(__dirname, "..", "auto", "phases.ts"), + "utf-8", + ); + assert.ok( + !phasesSrc.includes("isAutoMode"), + "auto/phases.ts should use default isAutoMode=true (not pass it explicitly)", + ); + }); +}); + +describe("model downgrade notifications always visible (#3962)", () => { + test("downgrade notification is not gated by verbose flag", () => { + // The downgrade notification block should NOT be wrapped in `if (verbose)` + // Find the downgrade block and verify it's not behind a verbose check + const downgradeBlock = "if (routingResult.wasDowngraded)"; + const downgradeIdx = modelSelectionSrc.indexOf(downgradeBlock); + assert.ok(downgradeIdx > 0, "downgrade block should exist"); + + // Extract the code between wasDowngraded check and the next routing label assignment + const afterDowngrade = modelSelectionSrc.slice( + downgradeIdx, + modelSelectionSrc.indexOf("routingTierLabel =", downgradeIdx), + ); + + // The notification calls should NOT be wrapped in `if (verbose)` + assert.ok( + !afterDowngrade.includes("if (verbose)"), + "downgrade notifications should not be gated by verbose flag", + ); + + // But the notification calls should exist + assert.ok( + afterDowngrade.includes('ctx.ui.notify('), + "downgrade notifications should still fire", + ); + }); + + test("tier escalation notification is not gated by verbose flag", () => { + // Extract the escalation block: from "if (escalated)" to its closing + // and verify the notification is present but `if (verbose)` is not. + const escalatedIdx = modelSelectionSrc.indexOf("if (escalated)"); + assert.ok(escalatedIdx > 0, "escalation block should exist"); + + // Get the block from "if (escalated)" to the next closing brace pattern + const block = modelSelectionSrc.slice(escalatedIdx, escalatedIdx + 400); + assert.ok( + block.includes("Tier escalation:"), + "escalation block should contain the notification", + ); + assert.ok( + !block.includes("if (verbose)"), + "escalation block should not gate notification behind verbose flag", + ); + }); +}); + +describe("auto-mode start routing banner (#3962)", () => { + test("auto-start shows dynamic routing status on startup", () => { + assert.ok( + autoStartSrc.includes("Dynamic routing:"), + "auto-start should display routing status banner", + ); + assert.ok( + autoStartSrc.includes("resolveDynamicRoutingConfig"), + "auto-start should import resolveDynamicRoutingConfig", + ); + }); + + test("banner shows different messages for enabled vs disabled routing", () => { + assert.ok( + autoStartSrc.includes("Dynamic routing: enabled"), + "should show message when routing is enabled", + ); + assert.ok( + autoStartSrc.includes("Dynamic routing: disabled"), + "should show message when routing is disabled", + ); + }); + + test("banner shows the ceiling model", () => { + assert.ok( + autoStartSrc.includes("startModelLabel"), + "banner should reference the start/ceiling model", + ); + }); + + test("banner accounts for flat-rate provider suppression", () => { + // The banner should check isFlatRateProvider to accurately reflect + // whether routing will actually be active at dispatch time (#3962 codex review) + assert.ok( + autoStartSrc.includes("isFlatRateProvider"), + "banner should check flat-rate provider status", + ); + assert.ok( + autoStartSrc.includes("effectivelyEnabled"), + "banner should compute effective routing state, not just raw config", + ); + }); + + test("banner uses effective ceiling from tier_models.heavy when configured", () => { + // The actual ceiling may come from tier_models.heavy, not the start model + assert.ok( + autoStartSrc.includes("tier_models?.heavy"), + "banner should check tier_models.heavy for the effective ceiling", + ); + assert.ok( + autoStartSrc.includes("effectiveCeiling"), + "banner should compute the effective ceiling model", + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/key-manager.test.ts b/src/resources/extensions/gsd/tests/key-manager.test.ts index 785c34945..a7614b092 100644 --- a/src/resources/extensions/gsd/tests/key-manager.test.ts +++ b/src/resources/extensions/gsd/tests/key-manager.test.ts @@ -427,3 +427,66 @@ test("formatDoctorFindings shows findings with appropriate icons", () => { assert.ok(output.includes("1 warning")); assert.ok(output.includes("1 fixed")); }); + +// ─── Regression #3891 — alibaba-coding-plan missing from PROVIDER_REGISTRY ─────── +// +// Before this fix, `alibaba-coding-plan` was not in PROVIDER_REGISTRY, causing +// `/gsd keys add alibaba-coding-plan` to silently fail (provider not found). +// alibaba-dashscope is the new standalone provider added in the same PR. + +test("regression #3891 — alibaba-coding-plan is in PROVIDER_REGISTRY", () => { + const provider = findProvider("alibaba-coding-plan"); + assert.ok(provider, "alibaba-coding-plan must be in PROVIDER_REGISTRY for /gsd keys add to work"); + assert.equal(provider.id, "alibaba-coding-plan"); + assert.equal(provider.category, "llm"); + assert.equal(provider.envVar, "ALIBABA_API_KEY"); +}); + +test("alibaba-dashscope is in PROVIDER_REGISTRY", () => { + const provider = findProvider("alibaba-dashscope"); + assert.ok(provider, "alibaba-dashscope must be in PROVIDER_REGISTRY for /gsd keys add to work"); + assert.equal(provider.id, "alibaba-dashscope"); + assert.equal(provider.category, "llm"); + assert.equal(provider.envVar, "DASHSCOPE_API_KEY"); +}); + +test("alibaba-coding-plan and alibaba-dashscope are separate providers (different env vars)", () => { + const codingPlan = findProvider("alibaba-coding-plan"); + const dashscope = findProvider("alibaba-dashscope"); + assert.ok(codingPlan, "alibaba-coding-plan must exist"); + assert.ok(dashscope, "alibaba-dashscope must exist"); + assert.notEqual( + codingPlan.envVar, + dashscope.envVar, + "alibaba-coding-plan and alibaba-dashscope must use different env vars", + ); +}); + +test("getAllKeyStatuses includes alibaba-coding-plan", () => { + const auth = makeAuth(); + const statuses = getAllKeyStatuses(auth); + const found = statuses.find((s) => s.provider.id === "alibaba-coding-plan"); + assert.ok(found, "getAllKeyStatuses must include alibaba-coding-plan"); +}); + +test("getAllKeyStatuses includes alibaba-dashscope", () => { + const auth = makeAuth(); + const statuses = getAllKeyStatuses(auth); + const found = statuses.find((s) => s.provider.id === "alibaba-dashscope"); + assert.ok(found, "getAllKeyStatuses must include alibaba-dashscope"); +}); + +test("getAllKeyStatuses detects DASHSCOPE_API_KEY for alibaba-dashscope (failure path: missing key shows not configured)", () => { + const saved = process.env.DASHSCOPE_API_KEY; + delete process.env.DASHSCOPE_API_KEY; + try { + const auth = makeAuth(); + const statuses = getAllKeyStatuses(auth); + const found = statuses.find((s) => s.provider.id === "alibaba-dashscope"); + assert.ok(found); + assert.equal(found.configured, false); + assert.equal(found.source, "none"); + } finally { + if (saved !== undefined) process.env.DASHSCOPE_API_KEY = saved; + } +}); diff --git a/src/resources/extensions/gsd/tests/mcp-project-config.test.ts b/src/resources/extensions/gsd/tests/mcp-project-config.test.ts index 0c2cdba5c..7638a7e74 100644 --- a/src/resources/extensions/gsd/tests/mcp-project-config.test.ts +++ b/src/resources/extensions/gsd/tests/mcp-project-config.test.ts @@ -26,8 +26,12 @@ test("ensureProjectWorkflowMcpConfig creates .mcp.json with the workflow server" assert.equal(typeof server?.command, "string"); assert.equal(Array.isArray(server?.args), true); assert.equal(server?.env?.GSD_WORKFLOW_PROJECT_ROOT, projectRoot); - assert.match(server?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.js$/); - assert.match(server?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.js$/); + assert.match(server?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.(js|ts)$/); + assert.match(server?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.(js|ts)$/); + if ((server?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "").endsWith(".ts")) { + assert.match(server?.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(server?.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } } finally { rmSync(projectRoot, { recursive: true, force: true }); } diff --git a/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts b/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts new file mode 100644 index 000000000..94fe029cc --- /dev/null +++ b/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts @@ -0,0 +1,54 @@ +/** + * Regression tests for memory pressure monitoring (#3331) and + * stuck detection persistence (#3704) in auto/loop.ts. + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const loopSource = readFileSync(join(__dirname, "..", "auto", "loop.ts"), "utf-8"); + +describe("memory pressure monitoring (#3331)", () => { + test("checkMemoryPressure function exists", () => { + assert.match(loopSource, /function checkMemoryPressure/); + }); + + test("MEMORY_PRESSURE_THRESHOLD constant is defined", () => { + assert.match(loopSource, /MEMORY_PRESSURE_THRESHOLD\s*=\s*0\.\d+/); + }); + + test("memory check runs every MEMORY_CHECK_INTERVAL iterations", () => { + assert.match(loopSource, /iteration\s*%\s*MEMORY_CHECK_INTERVAL\s*===\s*0/); + }); + + test("memory pressure triggers graceful stopAuto", () => { + assert.match(loopSource, /mem\.pressured/); + assert.match(loopSource, /Stopping gracefully to prevent OOM/); + }); +}); + +describe("stuck detection persistence (#3704)", () => { + test("loadStuckState function exists", () => { + assert.match(loopSource, /function loadStuckState/); + }); + + test("saveStuckState function exists", () => { + assert.match(loopSource, /function saveStuckState/); + }); + + test("loopState initialized from persisted state", () => { + assert.match(loopSource, /loadStuckState\(s\.basePath\)/); + }); + + test("stuck state saved after each iteration", () => { + assert.match(loopSource, /saveStuckState\(s\.basePath,\s*loopState\)/); + }); + + test("stuck state file path uses runtime directory", () => { + assert.match(loopSource, /stuck-state\.json/); + }); +}); diff --git a/src/resources/extensions/gsd/tests/model-isolation.test.ts b/src/resources/extensions/gsd/tests/model-isolation.test.ts index 6dd107b12..8b86a20b8 100644 --- a/src/resources/extensions/gsd/tests/model-isolation.test.ts +++ b/src/resources/extensions/gsd/tests/model-isolation.test.ts @@ -1,6 +1,6 @@ /** * Tests for model config isolation between concurrent instances (#650, #1065) - * and GSD preferences override of settings.json defaults (#3517). + * and session-scoped model precedence behavior. */ import { describe, it, beforeEach, afterEach } from "node:test"; @@ -157,75 +157,60 @@ describe("session model recovery on error (#1065)", () => { }); }); -// ─── GSD Preferences override settings.json (#3517) ───────────────────────── +// ─── Manual session model override precedence ─────────────────────────────── -describe("GSD preferences override settings.json for session model (#3517)", () => { - it("preferredModel takes priority over ctx.model when both are available", () => { - // Simulates auto-start.ts logic: preferredModel ?? ctx.model snapshot - const preferredModel = { provider: "openai-codex", id: "gpt-5.4" }; - const ctxModel = { provider: "claude-code", id: "claude-sonnet-4-6" }; +describe("manual session model override precedence", () => { + it("manual session override takes priority over preferences and ctx.model", () => { + const manualSessionOverride = { provider: "openai-codex", id: "gpt-5.4" }; + const preferredModel = { provider: "anthropic", id: "claude-sonnet-4-6" }; + const ctxModel = { provider: "claude-code", id: "claude-opus-4-6" }; - const startModelSnapshot = preferredModel + const startModelSnapshot = manualSessionOverride + ?? preferredModel ?? { provider: ctxModel.provider, id: ctxModel.id }; - assert.equal(startModelSnapshot.provider, "openai-codex", - "preferredModel provider should win over ctx.model"); - assert.equal(startModelSnapshot.id, "gpt-5.4", - "preferredModel id should win over ctx.model"); + assert.equal(startModelSnapshot.provider, "openai-codex"); + assert.equal(startModelSnapshot.id, "gpt-5.4"); }); - it("falls back to ctx.model when no GSD preferences are configured", () => { + it("falls back to preferences when no manual override is active", () => { + const manualSessionOverride: { provider: string; id: string } | undefined = undefined; + const preferredModel = { provider: "anthropic", id: "claude-sonnet-4-6" }; + const ctxModel = { provider: "claude-code", id: "claude-opus-4-6" }; + + const startModelSnapshot = manualSessionOverride + ?? preferredModel + ?? { provider: ctxModel.provider, id: ctxModel.id }; + + assert.equal(startModelSnapshot.provider, "anthropic"); + assert.equal(startModelSnapshot.id, "claude-sonnet-4-6"); + }); + + it("falls back to ctx.model when no manual override or preferences are configured", () => { + const manualSessionOverride: { provider: string; id: string } | undefined = undefined; const preferredModel: { provider: string; id: string } | undefined = undefined; - const ctxModel = { provider: "claude-code", id: "claude-sonnet-4-6" }; + const ctxModel = { provider: "claude-code", id: "claude-opus-4-6" }; - const startModelSnapshot = preferredModel + const startModelSnapshot = manualSessionOverride + ?? preferredModel ?? { provider: ctxModel.provider, id: ctxModel.id }; - assert.equal(startModelSnapshot.provider, "claude-code", - "should fall back to ctx.model provider when no preferences"); - assert.equal(startModelSnapshot.id, "claude-sonnet-4-6", - "should fall back to ctx.model id when no preferences"); + assert.equal(startModelSnapshot.provider, "claude-code"); + assert.equal(startModelSnapshot.id, "claude-opus-4-6"); }); - it("handles null ctx.model with no preferences gracefully", () => { + it("handles null ctx.model with no override or preferences gracefully", () => { + const manualSessionOverride: { provider: string; id: string } | undefined = undefined; const preferredModel: { provider: string; id: string } | undefined = undefined; // Use a function to prevent TS from narrowing to `never` in the ternary function getCtxModel(): { provider: string; id: string } | null { return null; } const ctxModel = getCtxModel(); - const startModelSnapshot = preferredModel + const startModelSnapshot = manualSessionOverride + ?? preferredModel ?? (ctxModel ? { provider: ctxModel.provider, id: ctxModel.id } : null); assert.equal(startModelSnapshot, null, - "should be null when neither preferences nor ctx.model exist"); - }); - - it("bare model ID uses session provider when available", () => { - // Simulates: PREFERENCES.md has "gpt-5.4" (no provider), session is openai-codex - const preferredModel = { provider: "openai-codex", id: "gpt-5.4" }; // from resolveDefaultSessionModel("openai-codex") - const ctxModel = { provider: "openai-codex", id: "claude-sonnet-4-6" }; - - const startModelSnapshot = preferredModel - ?? { provider: ctxModel.provider, id: ctxModel.id }; - - assert.equal(startModelSnapshot.provider, "openai-codex"); - assert.equal(startModelSnapshot.id, "gpt-5.4", - "bare model ID from preferences should still override ctx.model"); - }); - - it("stale settings.json does not leak when preferences are set", () => { - // Scenario: settings.json has claude-code, PREFERENCES.md has openai-codex - const settingsJsonDefault = { provider: "claude-code", id: "claude-sonnet-4-6" }; - const preferencesModel = { provider: "openai-codex", id: "gpt-5.4" }; - - // auto-start.ts captures preferredModel first, which preempts settingsJsonDefault - const startModelSnapshot = preferencesModel ?? settingsJsonDefault; - - assert.equal(startModelSnapshot.provider, "openai-codex", - "PREFERENCES.md must override stale settings.json provider"); - assert.equal(startModelSnapshot.id, "gpt-5.4", - "PREFERENCES.md must override stale settings.json model"); - assert.notEqual(startModelSnapshot.provider, settingsJsonDefault.provider, - "settings.json provider must NOT leak through"); + "should be null when no model source is available"); }); }); diff --git a/src/resources/extensions/gsd/tests/model-router.test.ts b/src/resources/extensions/gsd/tests/model-router.test.ts index c81242215..d12a71df9 100644 --- a/src/resources/extensions/gsd/tests/model-router.test.ts +++ b/src/resources/extensions/gsd/tests/model-router.test.ts @@ -287,9 +287,9 @@ test("resolveModelForComplexity falls back to tier-only when capability_routing assert.ok(!result.selectionMethod || result.selectionMethod === "tier-only"); }); -test("MODEL_CAPABILITY_PROFILES has entries for core models", () => { +test("MODEL_CAPABILITY_PROFILES has entries for all tier-mapped models", () => { const profiledModels = Object.keys(MODEL_CAPABILITY_PROFILES); - assert.ok(profiledModels.length >= 9, `Expected ≥9 profiles, got ${profiledModels.length}`); + assert.ok(profiledModels.length >= 30, `Expected ≥30 profiles, got ${profiledModels.length}`); assert.ok(MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]); assert.ok(MODEL_CAPABILITY_PROFILES["claude-haiku-4-5"]); }); diff --git a/src/resources/extensions/gsd/tests/notification-store.test.ts b/src/resources/extensions/gsd/tests/notification-store.test.ts index 8f13fb873..106dd617f 100644 --- a/src/resources/extensions/gsd/tests/notification-store.test.ts +++ b/src/resources/extensions/gsd/tests/notification-store.test.ts @@ -16,6 +16,7 @@ import { getLineCount, suppressPersistence, unsuppressPersistence, + onNotificationStoreChange, _resetNotificationStore, } from "../notification-store.js"; @@ -187,6 +188,23 @@ describe("notification-store", () => { assert.ok(!entries.some((e) => e.message === "suppressed")); }); + test("appendNotification suppresses identical messages within the dedup window", (t) => { + initNotificationStore(tmp); + let now = 1_000; + t.mock.method(Date, "now", () => now); + + appendNotification("same", "warning"); + now += 1_000; + appendNotification("same", "warning"); + now += 31_000; + appendNotification("same", "warning"); + + const entries = readNotifications(); + assert.equal(entries.length, 2); + assert.equal(entries[0].message, "same"); + assert.equal(entries[1].message, "same"); + }); + test("suppressPersistence is ref-counted", () => { initNotificationStore(tmp); suppressPersistence(); @@ -279,4 +297,21 @@ describe("notification-store", () => { rmSync(lockPath, { force: true }); }); + + test("listeners are notified on append, markAllRead, and clear", () => { + initNotificationStore(tmp); + let calls = 0; + const unsubscribe = onNotificationStoreChange(() => { calls++; }); + + appendNotification("msg1", "info"); + assert.equal(calls, 1, "append should emit one change"); + + markAllRead(); + assert.equal(calls, 2, "markAllRead should emit one change when state changes"); + + clearNotifications(); + assert.equal(calls, 3, "clear should emit one change"); + + unsubscribe(); + }); }); diff --git a/src/resources/extensions/gsd/tests/notification-widget.test.ts b/src/resources/extensions/gsd/tests/notification-widget.test.ts new file mode 100644 index 000000000..ef74f03fe --- /dev/null +++ b/src/resources/extensions/gsd/tests/notification-widget.test.ts @@ -0,0 +1,26 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { initNotificationStore, appendNotification, _resetNotificationStore } from "../notification-store.js"; +import { buildNotificationWidgetLines } from "../notification-widget.js"; + +test("buildNotificationWidgetLines shows unread count with shortcut pair", () => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-notification-widget-")); + try { + mkdirSync(join(tmp, ".gsd"), { recursive: true }); + _resetNotificationStore(); + initNotificationStore(tmp); + appendNotification("Need attention", "warning"); + + const lines = buildNotificationWidgetLines(); + assert.equal(lines.length, 1); + assert.match(lines[0]!, /Notifications:\s+1 unread/); + assert.match(lines[0]!, /\(.+\/.+\)/); + } finally { + _resetNotificationStore(); + rmSync(tmp, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/notifications-handler.test.ts b/src/resources/extensions/gsd/tests/notifications-handler.test.ts new file mode 100644 index 000000000..fc503f7cc --- /dev/null +++ b/src/resources/extensions/gsd/tests/notifications-handler.test.ts @@ -0,0 +1,90 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { mkdirSync, rmSync } from "node:fs"; + +import { handleNotificationsCommand } from "../commands/handlers/notifications-handler.ts"; +import { + _resetNotificationStore, + appendNotification, + initNotificationStore, +} from "../notification-store.ts"; + +function makeTempDir(prefix: string): string { + const dir = join( + tmpdir(), + `gsd-notifications-handler-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, + ); + mkdirSync(dir, { recursive: true }); + mkdirSync(join(dir, ".gsd"), { recursive: true }); + return dir; +} + +function cleanup(dir: string): void { + try { + rmSync(dir, { recursive: true, force: true }); + } catch { + // best-effort + } +} + +test("notifications command falls back to text output when overlay returns undefined", async (t) => { + const base = makeTempDir("overlay-fallback"); + initNotificationStore(base); + appendNotification("Build complete", "success"); + + t.after(() => { + _resetNotificationStore(); + cleanup(base); + }); + + const notices: Array<{ message: string; level?: string }> = []; + await handleNotificationsCommand( + "", + { + hasUI: true, + ui: { + custom: async () => undefined, + notify: (message: string, level?: string) => { + notices.push({ message, level }); + }, + }, + } as any, + {} as any, + ); + + assert.equal(notices.length, 1, "text fallback should be emitted when overlay cannot render"); + assert.match(notices[0].message, /Recent notifications:/); +}); + +test("notifications tail caps inline output and hints to open overlay", async (t) => { + const base = makeTempDir("tail-cap"); + initNotificationStore(base); + for (let i = 0; i < 55; i++) { + appendNotification(`notification-${i + 1}`, "info"); + } + + t.after(() => { + _resetNotificationStore(); + cleanup(base); + }); + + const notices: Array<{ message: string; level?: string }> = []; + await handleNotificationsCommand( + "tail 200", + { + hasUI: true, + ui: { + notify: (message: string, level?: string) => { + notices.push({ message, level }); + }, + }, + } as any, + {} as any, + ); + + assert.equal(notices.length, 1); + assert.match(notices[0].message, /Last 40 notification\(s\):/); + assert.match(notices[0].message, /\.\.\. and \d+ more \(open \/gsd notifications to browse all\)/); +}); diff --git a/src/resources/extensions/gsd/tests/parallel-monitor-overlay.test.ts b/src/resources/extensions/gsd/tests/parallel-monitor-overlay.test.ts index cc1d19ac6..1c34df459 100644 --- a/src/resources/extensions/gsd/tests/parallel-monitor-overlay.test.ts +++ b/src/resources/extensions/gsd/tests/parallel-monitor-overlay.test.ts @@ -56,6 +56,7 @@ describe("parallel-monitor-overlay", () => { overlay2.handleInput("q"); assert.ok(closed, "pressing q should trigger onClose"); overlay2.dispose(); + }); it("ParallelMonitorOverlay clamps scrollOffset during render", async () => { diff --git a/src/resources/extensions/gsd/tests/park-db-sync.test.ts b/src/resources/extensions/gsd/tests/park-db-sync.test.ts index 0580337e2..684f7904d 100644 --- a/src/resources/extensions/gsd/tests/park-db-sync.test.ts +++ b/src/resources/extensions/gsd/tests/park-db-sync.test.ts @@ -69,6 +69,24 @@ test("unparkMilestone updates DB status to 'active' (#2694)", () => { } }); +test("unparkMilestone repairs parked DB state when PARKED.md is missing (#3707)", () => { + const base = createBase(); + try { + openDatabase(":memory:"); + insertMilestone({ id: "M001", title: "Test", status: "parked" }); + + const unparked = unparkMilestone(base, "M001"); + + assert.ok(unparked, "unparkMilestone should recover DB-only parked state"); + assert.equal(getMilestone("M001")!.status, "active", "DB status should be repaired to active"); + + closeDatabase(); + } finally { + closeDatabase(); + rmSync(base, { recursive: true, force: true }); + } +}); + test("park/unpark are safe when DB is not available (#2694 guard)", () => { const base = createBase(); try { diff --git a/src/resources/extensions/gsd/tests/plan-milestone-artifact-verification.test.ts b/src/resources/extensions/gsd/tests/plan-milestone-artifact-verification.test.ts new file mode 100644 index 000000000..eb2d90533 --- /dev/null +++ b/src/resources/extensions/gsd/tests/plan-milestone-artifact-verification.test.ts @@ -0,0 +1,62 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { verifyExpectedArtifact } from "../auto-recovery.ts"; + +function createFixtureBase(): string { + const base = mkdtempSync(join(tmpdir(), "gsd-plan-milestone-artifact-")); + mkdirSync(join(base, ".gsd", "milestones"), { recursive: true }); + return base; +} + +function writeRoadmap(base: string, milestoneId: string, content: string): void { + const milestoneDir = join(base, ".gsd", "milestones", milestoneId); + mkdirSync(milestoneDir, { recursive: true }); + writeFileSync(join(milestoneDir, `${milestoneId}-ROADMAP.md`), content, "utf-8"); +} + +test("#3405: plan-milestone roadmap stub does not count as a verified artifact", () => { + const base = createFixtureBase(); + try { + writeRoadmap(base, "M001", [ + "# M001: Placeholder", + "", + "**Vision:** Stub only.", + "", + "## Slices", + "", + "_TBD_", + "", + ].join("\n")); + + const result = verifyExpectedArtifact("plan-milestone", "M001", base); + assert.equal(result, false, "zero-slice roadmap stubs must fail verification"); + } finally { + rmSync(base, { recursive: true, force: true }); + } +}); + +test("#3405: plan-milestone roadmap with real slices still passes artifact verification", () => { + const base = createFixtureBase(); + try { + writeRoadmap(base, "M001", [ + "# M001: Real roadmap", + "", + "**Vision:** Real work.", + "", + "## Slices", + "", + "- [ ] **S01: First slice** `risk:low` `depends:[]`", + " > After this: a real slice exists.", + "", + ].join("\n")); + + const result = verifyExpectedArtifact("plan-milestone", "M001", base); + assert.equal(result, true, "real roadmap slices should keep passing verification"); + } finally { + rmSync(base, { recursive: true, force: true }); + } +}); diff --git a/src/resources/extensions/gsd/tests/post-unit-state-rebuild.test.ts b/src/resources/extensions/gsd/tests/post-unit-state-rebuild.test.ts new file mode 100644 index 000000000..43e73388d --- /dev/null +++ b/src/resources/extensions/gsd/tests/post-unit-state-rebuild.test.ts @@ -0,0 +1,34 @@ +/** + * Regression test for #3869: normal post-unit flow should rebuild STATE.md + * before syncing worktree state back to the project root. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const source = readFileSync(join(import.meta.dirname, "..", "auto-post-unit.ts"), "utf-8"); + +test("auto-post-unit imports rebuildState", () => { + assert.ok( + source.includes('import { rebuildState } from "./doctor.js";'), + "auto-post-unit.ts should import rebuildState from doctor.ts", + ); +}); + +test("postUnitPreVerification rebuilds STATE.md before worktree sync", () => { + const fnStart = source.indexOf("export async function postUnitPreVerification"); + assert.ok(fnStart > 0, "postUnitPreVerification should exist"); + + const section = source.slice(fnStart, fnStart + 8000); + const rebuildIdx = section.indexOf('await runSafely("postUnit", "state-rebuild"'); + const syncIdx = section.indexOf('await runSafely("postUnit", "worktree-sync"'); + + assert.ok(rebuildIdx > 0, "postUnitPreVerification should rebuild STATE.md after unit completion"); + assert.ok(syncIdx > 0, "postUnitPreVerification should sync worktree state back to the project root"); + assert.ok( + rebuildIdx < syncIdx, + "STATE.md rebuild should happen before worktree sync so synced state is fresh", + ); +}); diff --git a/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts b/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts index ffdeae7c8..1f1ac2d35 100644 --- a/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts +++ b/src/resources/extensions/gsd/tests/pre-exec-backtick-strip.test.ts @@ -12,7 +12,7 @@ import { describe, it } from 'node:test' import assert from 'node:assert/strict' import { normalizeFilePath, checkFilePathConsistency } from '../pre-execution-checks.ts' -import { readFileSync } from 'node:fs' +import { mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs' import { resolve } from 'node:path' const src = readFileSync( @@ -25,6 +25,11 @@ describe('normalizeFilePath backtick stripping (#3649)', () => { assert.equal(normalizeFilePath('`src/foo.ts`'), 'src/foo.ts') }) + it('strips doubled backticks and trailing notes from file paths', () => { + assert.equal(normalizeFilePath('``src/foo.ts`` - current state'), 'src/foo.ts') + assert.equal(normalizeFilePath('``src/foo.ts`` (current state)'), 'src/foo.ts') + }) + it('strips backticks even when mixed with other normalization', () => { assert.equal(normalizeFilePath('`./src//bar.ts`'), 'src/bar.ts') }) @@ -66,3 +71,45 @@ describe('checkFilePathConsistency checks task.inputs not task.files (#3626)', ( ) }) }) + +describe('checkFilePathConsistency handles doubled-backtick annotations (#3892)', () => { + it('accepts existing files when task.inputs include doubled-backtick notes', () => { + const task = { + milestone_id: 'M001', + slice_id: 'S01', + id: 'T01', + title: 'Test Task', + status: 'pending', + one_liner: '', + narrative: '', + verification_result: '', + duration: '', + completed_at: null, + blocker_discovered: false, + deviations: '', + known_issues: '', + key_files: [], + key_decisions: [], + full_summary_md: '', + description: '', + estimate: '', + files: [], + verify: '', + inputs: ['``src/foo.ts`` (current state)'], + expected_output: [], + observability_impact: '', + full_plan_md: '', + sequence: 0, + } + + const tmp = resolve(process.cwd(), '.tmp-pre-exec-3892') + try { + mkdirSync(resolve(tmp, 'src'), { recursive: true }) + writeFileSync(resolve(tmp, 'src', 'foo.ts'), '// ok') + const results = checkFilePathConsistency([task as any], tmp) + assert.deepEqual(results, []) + } finally { + rmSync(tmp, { recursive: true, force: true }) + } + }) +}) diff --git a/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts b/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts index 79ac6a692..091896fdb 100644 --- a/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts +++ b/src/resources/extensions/gsd/tests/pre-execution-checks.test.ts @@ -1107,6 +1107,38 @@ describe("checkTaskOrdering false positive regression (#3677)", () => { assert.equal(results[0].target, "`later.ts` — needed first"); assert.ok(results[0].message.includes("sequence violation")); }); + + test("existing on-disk files do not trigger ordering violations just because a later task modifies them", () => { + const tempDir = join(tmpdir(), `pre-exec-ordering-existing-file-${Date.now()}`); + const existingFile = "frontend/src/__tests__/ProcurementPage29.test.tsx"; + + mkdirSync(join(tempDir, "frontend", "src", "__tests__"), { recursive: true }); + writeFileSync(join(tempDir, existingFile), "// existing file"); + + try { + const tasks = [ + createTask({ + id: "T01", + sequence: 0, + files: [], + inputs: ["`frontend/src/__tests__/ProcurementPage29.test.tsx` — contains matchMedia stub to remove"], + expected_output: [], + }), + createTask({ + id: "T03", + sequence: 2, + files: [], + inputs: [], + expected_output: ["frontend/src/__tests__/ProcurementPage29.test.tsx"], + }), + ]; + + const results = checkTaskOrdering(tasks, tempDir); + assert.equal(results.length, 0, "Pre-existing files should not be treated as created by later tasks"); + } finally { + rmSync(tempDir, { recursive: true, force: true }); + } + }); }); // ─── checkFilePathConsistency additional edge cases ────────────────────────── @@ -1175,6 +1207,23 @@ describe("checkFilePathConsistency additional edge cases", () => { assert.equal(results![0].blocking, true); }); + test("multi-word prose inputs are ignored by path consistency checks", () => { + const tasks = [ + createTask({ + id: "T01", + files: [], + inputs: [ + "Current WIZARD_PRODUCTS enum", + "Existing test patterns in wizard.test.ts", + ], + expected_output: [], + }), + ]; + + const results = checkFilePathConsistency(tasks, "/tmp"); + assert.equal(results.length, 0, "Prose planning hints should not be treated as missing file paths"); + }); + test("empty inputs array produces no results", () => { // A task with no inputs and only files should produce zero results from // consistency check — files are not checked (#3626). diff --git a/src/resources/extensions/gsd/tests/preferences-formatting.test.ts b/src/resources/extensions/gsd/tests/preferences-formatting.test.ts new file mode 100644 index 000000000..f14a7a16e --- /dev/null +++ b/src/resources/extensions/gsd/tests/preferences-formatting.test.ts @@ -0,0 +1,87 @@ +/** + * Tests for formatSkillRef — pure formatting logic for skill references + * in the system prompt. Moved from preferences-skills.ts to preferences-types.ts + * to break the preferences ↔ preferences-skills circular dependency. + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; + +import { formatSkillRef } from "../preferences-types.ts"; +import type { SkillResolution } from "../preferences-types.ts"; + +function makeResolutions(entries: [string, Partial][]): Map { + const map = new Map(); + for (const [key, partial] of entries) { + map.set(key, { + original: partial.original ?? key, + resolvedPath: partial.resolvedPath ?? null, + method: partial.method ?? "unresolved", + }); + } + return map; +} + +describe("formatSkillRef", () => { + test("marks unresolved references with a warning", () => { + const resolutions = makeResolutions([ + ["my-skill", { method: "unresolved" }], + ]); + const result = formatSkillRef("my-skill", resolutions); + assert.match(result, /my-skill/); + assert.match(result, /not found/); + }); + + test("marks unknown references (not in map) with a warning", () => { + const resolutions = new Map(); + const result = formatSkillRef("unknown-skill", resolutions); + assert.match(result, /unknown-skill/); + assert.match(result, /not found/); + }); + + test("returns bare ref for absolute-path resolution", () => { + const resolutions = makeResolutions([ + ["/home/user/skills/SKILL.md", { + method: "absolute-path", + resolvedPath: "/home/user/skills/SKILL.md", + }], + ]); + const result = formatSkillRef("/home/user/skills/SKILL.md", resolutions); + assert.equal(result, "/home/user/skills/SKILL.md"); + }); + + test("returns bare ref for absolute-dir resolution", () => { + const resolutions = makeResolutions([ + ["/home/user/skills/my-skill", { + method: "absolute-dir", + resolvedPath: "/home/user/skills/my-skill/SKILL.md", + }], + ]); + const result = formatSkillRef("/home/user/skills/my-skill", resolutions); + assert.equal(result, "/home/user/skills/my-skill"); + }); + + test("shows resolved path for user-skill resolution", () => { + const resolutions = makeResolutions([ + ["code-review", { + method: "user-skill", + resolvedPath: "/home/user/.claude/skills/code-review/SKILL.md", + }], + ]); + const result = formatSkillRef("code-review", resolutions); + assert.match(result, /code-review/); + assert.match(result, /\.claude\/skills\/code-review\/SKILL\.md/); + }); + + test("shows resolved path for project-skill resolution", () => { + const resolutions = makeResolutions([ + ["lint-fix", { + method: "project-skill", + resolvedPath: "/repo/.gsd/skills/lint-fix/SKILL.md", + }], + ]); + const result = formatSkillRef("lint-fix", resolutions); + assert.match(result, /lint-fix/); + assert.match(result, /\.gsd\/skills\/lint-fix\/SKILL\.md/); + }); +}); diff --git a/src/resources/extensions/gsd/tests/preferences.test.ts b/src/resources/extensions/gsd/tests/preferences.test.ts index 7e5f4177e..fa08a857e 100644 --- a/src/resources/extensions/gsd/tests/preferences.test.ts +++ b/src/resources/extensions/gsd/tests/preferences.test.ts @@ -10,10 +10,14 @@ import test from "node:test"; import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; import { validatePreferences, applyModeDefaults, getIsolationMode, + loadEffectiveGSDPreferences, parsePreferencesMarkdown, _resetParseWarningFlag, } from "../preferences.ts"; @@ -501,6 +505,55 @@ test("experimental.rtk parses correctly from preferences markdown", () => { assert.equal(prefs!.experimental?.rtk, true); }); +test("loadEffectiveGSDPreferences preserves experimental prefs across global+project merge", () => { + const originalCwd = process.cwd(); + const originalGsdHome = process.env.GSD_HOME; + const tempProject = mkdtempSync(join(tmpdir(), "gsd-prefs-project-")); + const tempGsdHome = mkdtempSync(join(tmpdir(), "gsd-prefs-home-")); + + try { + mkdirSync(join(tempProject, ".gsd"), { recursive: true }); + + writeFileSync( + join(tempGsdHome, "preferences.md"), + [ + "---", + "version: 1", + "experimental:", + " rtk: true", + "---", + ].join("\n"), + "utf-8", + ); + + writeFileSync( + join(tempProject, ".gsd", "PREFERENCES.md"), + [ + "---", + "version: 1", + "git:", + " isolation: none", + "---", + ].join("\n"), + "utf-8", + ); + + process.env.GSD_HOME = tempGsdHome; + process.chdir(tempProject); + + const loaded = loadEffectiveGSDPreferences(); + assert.notEqual(loaded, null); + assert.equal(loaded!.preferences.experimental?.rtk, true); + assert.equal(loaded!.preferences.git?.isolation, "none"); + } finally { + process.chdir(originalCwd); + if (originalGsdHome === undefined) delete process.env.GSD_HOME; + else process.env.GSD_HOME = originalGsdHome; + rmSync(tempProject, { recursive: true, force: true }); + rmSync(tempGsdHome, { recursive: true, force: true }); + } +}); + test("experimental.rtk defaults to off in new project preferences", () => { // No experimental key → feature is disabled const content = "---\nversion: 1\n---\n"; diff --git a/src/resources/extensions/gsd/tests/preparation.test.ts b/src/resources/extensions/gsd/tests/preparation.test.ts deleted file mode 100644 index 569efed10..000000000 --- a/src/resources/extensions/gsd/tests/preparation.test.ts +++ /dev/null @@ -1,1211 +0,0 @@ -/** - * Unit tests for GSD Preparation — codebase analysis and brief generation. - * - * Exercises the pure preparation functions: - * - analyzeCodebase() with various project layouts - * - formatCodebaseBrief() output format and truncation - * - Pattern extraction from sampled files - */ - -import test from "node:test"; -import assert from "node:assert/strict"; -import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs"; -import { join } from "node:path"; -import { tmpdir } from "node:os"; -import { - analyzeCodebase, - formatCodebaseBrief, - aggregatePriorContext, - formatPriorContextBrief, - researchEcosystem, - formatEcosystemBrief, - runPreparation, - type CodebaseBrief, - type PriorContextBrief, - type EcosystemBrief, - type EcosystemFinding, - type PreparationUIContext, - type PreparationPreferences, - type PreparationResult, -} from "../preparation.ts"; -import { PROJECT_FILES } from "../detection.ts"; - -// ─── Test Helpers ─────────────────────────────────────────────────────────────── - -function makeTempDir(prefix: string): string { - const dir = join( - tmpdir(), - `gsd-preparation-test-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, - ); - mkdirSync(dir, { recursive: true }); - return dir; -} - -function cleanup(dir: string): void { - try { - rmSync(dir, { recursive: true, force: true }); - } catch { - // best-effort - } -} - -// ─── analyzeCodebase ──────────────────────────────────────────────────────────── - -test("analyzeCodebase: empty directory returns valid brief structure", async (t) => { - const dir = makeTempDir("empty"); - t.after(() => cleanup(dir)); - - const brief = await analyzeCodebase(dir); - - assert.ok(brief, "should return a brief"); - assert.ok(brief.techStack, "should have techStack"); - assert.ok(brief.moduleStructure, "should have moduleStructure"); - assert.ok(brief.patterns, "should have patterns"); - assert.ok(Array.isArray(brief.sampledFiles), "should have sampledFiles array"); - assert.equal(brief.sampledFiles.length, 0, "empty dir should have no sampled files"); -}); - -test("analyzeCodebase: detects package.json in PROJECT_FILES", async (t) => { - const dir = makeTempDir("pkg-json"); - t.after(() => cleanup(dir)); - - writeFileSync( - join(dir, "package.json"), - JSON.stringify({ name: "test-project", scripts: { test: "jest" } }), - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok( - brief.techStack.detectedFiles.includes("package.json"), - "should detect package.json", - ); - assert.equal(brief.techStack.primaryLanguage, "javascript/typescript"); -}); - -test("analyzeCodebase: detects module structure from src/ directory", async (t) => { - const dir = makeTempDir("module-struct"); - t.after(() => cleanup(dir)); - - // Create src directory with subdirs - mkdirSync(join(dir, "src", "components"), { recursive: true }); - mkdirSync(join(dir, "src", "utils"), { recursive: true }); - mkdirSync(join(dir, "src", "hooks"), { recursive: true }); - mkdirSync(join(dir, "test"), { recursive: true }); - - const brief = await analyzeCodebase(dir); - - assert.ok( - brief.moduleStructure.topLevelDirs.includes("src"), - "should detect src as top-level dir", - ); - assert.ok( - brief.moduleStructure.topLevelDirs.includes("test"), - "should detect test as top-level dir", - ); - assert.ok( - brief.moduleStructure.srcSubdirs.includes("components"), - "should detect components subdir", - ); - assert.ok( - brief.moduleStructure.srcSubdirs.includes("utils"), - "should detect utils subdir", - ); - assert.ok( - brief.moduleStructure.srcSubdirs.includes("hooks"), - "should detect hooks subdir", - ); -}); - -test("analyzeCodebase: samples TypeScript files from src/", async (t) => { - const dir = makeTempDir("sample-ts"); - t.after(() => cleanup(dir)); - - // Create src directory with TypeScript files - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "index.ts"), - `export async function main() { await fetch('/api'); }`, - "utf-8", - ); - writeFileSync( - join(dir, "src", "utils.ts"), - `export function helper() { try { return 1; } catch (e) { throw e; } }`, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok(brief.sampledFiles.length > 0, "should sample at least one file"); - assert.ok( - brief.sampledFiles.some((f) => f.startsWith("src/")), - "should prefer src/ files", - ); -}); - -test("analyzeCodebase: excludes test files from sampling", async (t) => { - const dir = makeTempDir("exclude-tests"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync(join(dir, "src", "index.ts"), `export const x = 1;`, "utf-8"); - writeFileSync( - join(dir, "src", "index.test.ts"), - `import test from 'node:test'; test('x', () => {});`, - "utf-8", - ); - writeFileSync( - join(dir, "src", "utils.spec.ts"), - `describe('utils', () => { it('works', () => {}); });`, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - // Should only have index.ts, not test/spec files - for (const file of brief.sampledFiles) { - assert.ok(!file.endsWith(".test.ts"), `should not sample ${file}`); - assert.ok(!file.endsWith(".spec.ts"), `should not sample ${file}`); - } -}); - -test("analyzeCodebase: excludes node_modules from sampling", async (t) => { - const dir = makeTempDir("exclude-nm"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - mkdirSync(join(dir, "node_modules", "some-pkg"), { recursive: true }); - writeFileSync(join(dir, "src", "index.ts"), `export const x = 1;`, "utf-8"); - writeFileSync( - join(dir, "node_modules", "some-pkg", "index.js"), - `module.exports = {};`, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - for (const file of brief.sampledFiles) { - assert.ok(!file.includes("node_modules"), `should not sample ${file}`); - } -}); - -test("analyzeCodebase: extracts async/await pattern", async (t) => { - const dir = makeTempDir("async-await"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "api.ts"), - ` -export async function fetchData() { - const res = await fetch('/api'); - const data = await res.json(); - return data; -} - -export async function saveData(data: any) { - await fetch('/api', { method: 'POST', body: JSON.stringify(data) }); -} - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.equal( - brief.patterns.asyncStyle, - "async/await", - "should detect async/await as primary style", - ); -}); - -test("analyzeCodebase: extracts try/catch error handling", async (t) => { - const dir = makeTempDir("try-catch"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "handler.ts"), - ` -export function handleError() { - try { - doSomething(); - } catch (error) { - console.error(error); - } -} - -export function anotherHandler() { - try { - doOther(); - } catch (e) { - throw new Error('wrapped'); - } -} - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.equal( - brief.patterns.errorHandling, - "try/catch", - "should detect try/catch as primary error handling", - ); -}); - -test("analyzeCodebase: extracts camelCase naming convention", async (t) => { - const dir = makeTempDir("camel-case"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - writeFileSync( - join(dir, "src", "utils.ts"), - ` -export function getUserById(userId: string) { - return fetchUser(userId); -} - -export function calculateTotalPrice(itemPrices: number[]) { - return itemPrices.reduce((a, b) => a + b, 0); -} - -export function formatDisplayName(firstName: string, lastName: string) { - return \`\${firstName} \${lastName}\`; -} - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - // camelCase should be detected (getUserById, userId, fetchUser, etc.) - assert.ok( - brief.patterns.namingConvention === "camelCase" || brief.patterns.namingConvention === "mixed", - `should detect camelCase or mixed, got ${brief.patterns.namingConvention}`, - ); -}); - -test("analyzeCodebase: gracefully handles empty directories", async (t) => { - const dir = makeTempDir("empty-src"); - t.after(() => cleanup(dir)); - - // Create empty src directory - mkdirSync(join(dir, "src"), { recursive: true }); - - const brief = await analyzeCodebase(dir); - - // Should not throw, should return valid structure - assert.ok(brief.patterns, "should have patterns"); - assert.equal(brief.patterns.asyncStyle, "unknown", "should return unknown for empty"); - assert.equal(brief.patterns.errorHandling, "unknown", "should return unknown for empty"); - assert.equal(brief.patterns.namingConvention, "unknown", "should return unknown for empty"); -}); - -test("analyzeCodebase: returns unknown for unrecognized language patterns (Ruby)", async (t) => { - // Ruby is detected by LANGUAGE_MAP but not in LANGUAGE_PATTERNS registry - // This tests the graceful fallback behavior: naming convention still works, - // but language-specific patterns (async/error) should return "unknown" - const dir = makeTempDir("ruby-project"); - t.after(() => cleanup(dir)); - - // Create a Ruby project with Gemfile (detected as "ruby" in LANGUAGE_MAP) - writeFileSync(join(dir, "Gemfile"), `source "https://rubygems.org"\ngem "rails"`, "utf-8"); - - // Add a Ruby file with patterns that would match JS/TS regexes incorrectly - mkdirSync(join(dir, "lib"), { recursive: true }); - writeFileSync( - join(dir, "lib", "service.rb"), - ` -class UserService - def fetch_user(user_id) - user = User.find(user_id) - user - rescue ActiveRecord::RecordNotFound => e - Rails.logger.error("User not found: #{e.message}") - nil - end - - def async_task(&block) - # Ruby doesn't have async/await but has yield and blocks - Thread.new { yield } - end -end - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - // Language should be detected as Ruby - assert.equal(brief.techStack.primaryLanguage, "ruby", "should detect ruby from Gemfile"); - - // Language-specific patterns should return "unknown" (not JS/TS patterns) - assert.equal( - brief.patterns.asyncStyle, - "unknown", - "should return unknown for async style in unrecognized language", - ); - assert.equal( - brief.patterns.errorHandling, - "unknown", - "should return unknown for error handling in unrecognized language", - ); - - // But naming convention detection should still work (it's universal) - // The Ruby code uses snake_case (fetch_user, user_id) and camelCase (UserService) - assert.ok( - brief.patterns.namingConvention !== "unknown", - "naming convention should still be detected for unrecognized languages", - ); - - // Evidence should explain why patterns aren't available - assert.ok( - brief.patterns.evidence.asyncStyle.some((e) => e.includes("not in pattern registry")), - "evidence should explain async style is not available", - ); - assert.ok( - brief.patterns.evidence.errorHandling.some((e) => e.includes("not in pattern registry")), - "evidence should explain error handling is not available", - ); -}); - -// ─── formatCodebaseBrief ──────────────────────────────────────────────────────── - -test("formatCodebaseBrief: produces markdown output", async (t) => { - const brief: CodebaseBrief = { - techStack: { - primaryLanguage: "javascript/typescript", - detectedFiles: ["package.json", "tsconfig.json"], - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: true, - }, - moduleStructure: { - topLevelDirs: ["src", "test"], - srcSubdirs: ["components", "utils"], - totalFilesSampled: 5, - }, - patterns: { - asyncStyle: "async/await", - errorHandling: "try/catch", - namingConvention: "camelCase", - evidence: { - asyncStyle: ["src/api.ts: async/await (5 occurrences)"], - errorHandling: ["src/handler.ts: try/catch (3 occurrences)"], - namingConvention: ["camelCase: 50 occurrences"], - }, - fileCounts: { - asyncAwait: 3, - promises: 0, - callbacks: 0, - tryCatch: 2, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: ["src/index.ts", "src/utils.ts"], - }; - - const formatted = formatCodebaseBrief(brief); - - assert.ok(formatted.includes("## Tech Stack"), "should have Tech Stack section"); - assert.ok(formatted.includes("## Module Structure"), "should have Module Structure section"); - assert.ok(formatted.includes("## Code Patterns"), "should have Code Patterns section"); - assert.ok(formatted.includes("javascript/typescript"), "should include language"); - assert.ok(formatted.includes("npm"), "should include package manager"); - assert.ok(formatted.includes("async/await"), "should include async style"); - assert.ok(formatted.includes("try/catch"), "should include error handling"); - assert.ok(formatted.includes("camelCase"), "should include naming convention"); - assert.ok(formatted.includes("3 async/await files"), "should include file counts for async style"); - assert.ok(formatted.includes("2 try/catch files"), "should include file counts for error handling"); -}); - -test("formatCodebaseBrief: caps output at 3000 chars", async (t) => { - // Create a brief with many files to exceed the limit - const manyFiles = Array.from({ length: 100 }, (_, i) => `file-${i}.ts`); - - const brief: CodebaseBrief = { - techStack: { - primaryLanguage: "javascript/typescript", - detectedFiles: manyFiles, - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: true, - }, - moduleStructure: { - topLevelDirs: Array.from({ length: 50 }, (_, i) => `dir-${i}`), - srcSubdirs: Array.from({ length: 50 }, (_, i) => `subdir-${i}`), - totalFilesSampled: 100, - }, - patterns: { - asyncStyle: "async/await", - errorHandling: "try/catch", - namingConvention: "camelCase", - evidence: { - asyncStyle: manyFiles.map((f) => `${f}: async/await (10 occurrences)`), - errorHandling: manyFiles.map((f) => `${f}: try/catch (5 occurrences)`), - namingConvention: ["camelCase: 500 occurrences"], - }, - fileCounts: { - asyncAwait: 50, - promises: 10, - callbacks: 5, - tryCatch: 30, - errorCallbacks: 5, - resultTypes: 0, - }, - }, - sampledFiles: manyFiles, - }; - - const formatted = formatCodebaseBrief(brief); - - assert.ok( - formatted.length <= 3000, - `should cap at 3000 chars, got ${formatted.length}`, - ); - if (formatted.length === 3000) { - assert.ok(formatted.endsWith("..."), "should end with ellipsis when truncated"); - } -}); - -test("formatCodebaseBrief: handles minimal brief", async (t) => { - const brief: CodebaseBrief = { - techStack: { - primaryLanguage: undefined, - detectedFiles: [], - packageManager: undefined, - isMonorepo: false, - hasTests: false, - hasCI: false, - }, - moduleStructure: { - topLevelDirs: [], - srcSubdirs: [], - totalFilesSampled: 0, - }, - patterns: { - asyncStyle: "unknown", - errorHandling: "unknown", - namingConvention: "unknown", - evidence: { - asyncStyle: [], - errorHandling: [], - namingConvention: [], - }, - fileCounts: { - asyncAwait: 0, - promises: 0, - callbacks: 0, - tryCatch: 0, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: [], - }; - - const formatted = formatCodebaseBrief(brief); - - assert.ok(formatted.includes("## Tech Stack"), "should still have sections"); - assert.ok(formatted.includes("**Monorepo:** No"), "should show monorepo status"); - assert.ok(formatted.includes("unknown"), "should show unknown patterns"); -}); - -// ─── Integration: Brief includes PROJECT_FILES markers ────────────────────────── - -test("analyzeCodebase: brief includes detected files from PROJECT_FILES", async (t) => { - const dir = makeTempDir("project-files"); - t.after(() => cleanup(dir)); - - // Create several PROJECT_FILES markers - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - writeFileSync(join(dir, "tsconfig.json"), '{}', "utf-8"); - mkdirSync(join(dir, ".github", "workflows"), { recursive: true }); - writeFileSync( - join(dir, ".github", "workflows", "ci.yml"), - "name: CI", - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok( - brief.techStack.detectedFiles.includes("package.json"), - "should detect package.json", - ); - assert.ok( - brief.techStack.hasCI, - "should detect CI from .github/workflows", - ); -}); - -test("analyzeCodebase: brief includes sampled file patterns", async (t) => { - const dir = makeTempDir("sampled-patterns"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, "src"), { recursive: true }); - - // Write files with distinct patterns - writeFileSync( - join(dir, "src", "async-heavy.ts"), - ` -async function one() { await fetch('/a'); } -async function two() { await fetch('/b'); } -async function three() { await fetch('/c'); } - `, - "utf-8", - ); - - const brief = await analyzeCodebase(dir); - - assert.ok(brief.sampledFiles.length > 0, "should have sampled files"); - assert.ok( - brief.patterns.evidence.asyncStyle.length > 0, - "should have async style evidence", - ); -}); - -// ─── aggregatePriorContext ────────────────────────────────────────────────────── - -test("aggregatePriorContext: handles missing files gracefully", async (t) => { - const dir = makeTempDir("no-gsd"); - t.after(() => cleanup(dir)); - - // Create .gsd directory but no files - mkdirSync(join(dir, ".gsd"), { recursive: true }); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.decisions.totalCount, 0, "should have no decisions"); - assert.equal(brief.requirements.totalCount, 0, "should have no requirements"); - assert.equal(brief.knowledge, "No prior knowledge recorded.", "should indicate no knowledge"); - assert.equal(brief.summaries, "No prior milestone summaries.", "should indicate no summaries"); -}); - -test("aggregatePriorContext: handles completely empty directory", async (t) => { - const dir = makeTempDir("empty-project"); - t.after(() => cleanup(dir)); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.decisions.totalCount, 0); - assert.equal(brief.requirements.totalCount, 0); - assert.equal(brief.knowledge, "No prior knowledge recorded."); - assert.equal(brief.summaries, "No prior milestone summaries."); -}); - -test("aggregatePriorContext: parses DECISIONS.md and groups by scope", async (t) => { - const dir = makeTempDir("decisions"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync( - join(dir, ".gsd", "DECISIONS.md"), - `# Decisions Register - -| # | When | Scope | Decision | Choice | Rationale | Revisable? | Made By | -|---|------|-------|----------|--------|-----------|------------|---------| -| D001 | M001/S01 | pattern | Async style | async/await | Modern standard | Yes | agent | -| D002 | M001/S02 | architecture | Data layer | SQLite | Simple, embedded | No | human | -| D003 | M001/S03 | pattern | Error handling | try/catch | Consistency | Yes | agent | -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.decisions.totalCount, 3, "should parse all decisions"); - assert.equal(brief.decisions.byScope.get("pattern")?.length, 2, "should group pattern scope"); - assert.equal(brief.decisions.byScope.get("architecture")?.length, 1, "should group architecture scope"); - - const patternDecisions = brief.decisions.byScope.get("pattern")!; - assert.equal(patternDecisions[0].id, "D001"); - assert.equal(patternDecisions[0].decision, "Async style"); - assert.equal(patternDecisions[0].choice, "async/await"); -}); - -test("aggregatePriorContext: parses REQUIREMENTS.md and groups by status", async (t) => { - const dir = makeTempDir("requirements"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync( - join(dir, ".gsd", "REQUIREMENTS.md"), - `# Requirements - -## Active - -### R001 — First requirement -- Status: active -- Description: Something active - -### R002 — Second requirement -- Status: active -- Description: Also active - -## Validated - -### R003 — Validated requirement -- Status: validated -- Description: This was validated - -## Deferred - -### R004 — Deferred requirement -- Status: deferred -- Description: Postponed for later -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.equal(brief.requirements.totalCount, 4, "should parse all requirements"); - assert.equal(brief.requirements.active.length, 2, "should have 2 active"); - assert.equal(brief.requirements.validated.length, 1, "should have 1 validated"); - assert.equal(brief.requirements.deferred.length, 1, "should have 1 deferred"); - - assert.equal(brief.requirements.active[0].id, "R001"); - assert.equal(brief.requirements.active[0].description, "First requirement"); -}); - -test("aggregatePriorContext: loads KNOWLEDGE.md content", async (t) => { - const dir = makeTempDir("knowledge"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync( - join(dir, ".gsd", "KNOWLEDGE.md"), - `# Knowledge Base - -## Rules - -| # | Scope | Rule | Why | Added | -|---|-------|------|-----|-------| -| K001 | global | Always use TypeScript | Type safety | manual | - -## Patterns - -**Pattern X:** Do this for better Y. -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.ok(brief.knowledge.includes("Rules"), "should include knowledge content"); - assert.ok(brief.knowledge.includes("TypeScript"), "should include rule text"); -}); - -test("aggregatePriorContext: truncates oversized content without cutting mid-section", async (t) => { - const dir = makeTempDir("large-knowledge"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - - // Create large knowledge file - const largeContent = `# Knowledge Base - -## Section One - -${"Lorem ipsum dolor sit amet. ".repeat(100)} - -## Section Two - -${"More content here. ".repeat(100)} - -## Section Three - -${"Even more content. ".repeat(100)} -`; - - writeFileSync(join(dir, ".gsd", "KNOWLEDGE.md"), largeContent, "utf-8"); - - const brief = await aggregatePriorContext(dir); - - assert.ok(brief.knowledge.length <= 2000, "should truncate to 2K chars"); - assert.ok(brief.knowledge.includes("[truncated]"), "should indicate truncation"); - // Should try to preserve section boundaries - assert.ok( - brief.knowledge.includes("## Section"), - "should keep section headings intact", - ); -}); - -test("aggregatePriorContext: loads milestone summaries", async (t) => { - const dir = makeTempDir("milestones"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd", "milestones", "M001"), { recursive: true }); - mkdirSync(join(dir, ".gsd", "milestones", "M002"), { recursive: true }); - - writeFileSync( - join(dir, ".gsd", "milestones", "M001", "MILESTONE-SUMMARY.md"), - `# M001 — First Milestone - -**Implemented core functionality and established patterns.** - -## What Happened -Did stuff. -`, - "utf-8", - ); - - writeFileSync( - join(dir, ".gsd", "milestones", "M002", "MILESTONE-SUMMARY.md"), - `# M002 — Second Milestone - -**Extended the system with new features.** - -## What Happened -Did more stuff. -`, - "utf-8", - ); - - const brief = await aggregatePriorContext(dir); - - assert.ok(brief.summaries.includes("M001"), "should include M001 summary"); - assert.ok(brief.summaries.includes("M002"), "should include M002 summary"); - assert.ok( - brief.summaries.includes("core functionality"), - "should extract one-liner from M001", - ); - assert.ok( - brief.summaries.includes("new features"), - "should extract one-liner from M002", - ); -}); - -// ─── formatPriorContextBrief ──────────────────────────────────────────────────── - -test("formatPriorContextBrief: produces markdown with all sections", async (t) => { - const brief: PriorContextBrief = { - decisions: { - byScope: new Map([ - [ - "pattern", - [ - { id: "D001", scope: "pattern", decision: "Async", choice: "await", rationale: "Modern" }, - ], - ], - [ - "architecture", - [ - { id: "D002", scope: "architecture", decision: "DB", choice: "SQLite", rationale: "Simple" }, - ], - ], - ]), - totalCount: 2, - }, - requirements: { - active: [{ id: "R001", description: "Core feature", status: "active" }], - validated: [], - deferred: [], - totalCount: 1, - }, - knowledge: "Some knowledge here.", - summaries: "### M001\nDid things.", - }; - - const formatted = formatPriorContextBrief(brief); - - assert.ok(formatted.includes("## Prior Decisions"), "should have decisions section"); - assert.ok(formatted.includes("## Prior Requirements"), "should have requirements section"); - assert.ok(formatted.includes("## Prior Knowledge"), "should have knowledge section"); - assert.ok(formatted.includes("## Prior Milestone Summaries"), "should have summaries section"); - assert.ok(formatted.includes("D001"), "should include decision ID"); - assert.ok(formatted.includes("R001"), "should include requirement ID"); - assert.ok(formatted.includes("pattern"), "should include scope heading"); -}); - -test("formatPriorContextBrief: handles empty brief", async (t) => { - const brief: PriorContextBrief = { - decisions: { - byScope: new Map(), - totalCount: 0, - }, - requirements: { - active: [], - validated: [], - deferred: [], - totalCount: 0, - }, - knowledge: "No prior knowledge recorded.", - summaries: "No prior milestone summaries.", - }; - - const formatted = formatPriorContextBrief(brief); - - assert.ok(formatted.includes("No prior decisions recorded"), "should indicate no decisions"); - assert.ok(formatted.includes("No prior requirements recorded"), "should indicate no requirements"); - assert.ok(formatted.includes("No prior knowledge recorded"), "should indicate no knowledge"); - assert.ok(formatted.includes("No prior milestone summaries"), "should indicate no summaries"); -}); - -test("formatPriorContextBrief: caps total output at 6K chars", async (t) => { - // Create a brief with lots of content - const manyDecisions: Array<{ - id: string; - scope: string; - decision: string; - choice: string; - rationale: string; - }> = []; - for (let i = 0; i < 100; i++) { - manyDecisions.push({ - id: `D${String(i).padStart(3, "0")}`, - scope: "pattern", - decision: `Decision number ${i} with some extra text for length`, - choice: `Choice ${i} with more text to make it longer`, - rationale: `Rationale ${i}`, - }); - } - - const manyRequirements: Array<{ - id: string; - description: string; - status: "active"; - }> = []; - for (let i = 0; i < 100; i++) { - manyRequirements.push({ - id: `R${String(i).padStart(3, "0")}`, - description: `Requirement ${i} with a long description that takes up space`, - status: "active", - }); - } - - const brief: PriorContextBrief = { - decisions: { - byScope: new Map([["pattern", manyDecisions]]), - totalCount: 100, - }, - requirements: { - active: manyRequirements, - validated: [], - deferred: [], - totalCount: 100, - }, - knowledge: "A ".repeat(1000), - summaries: "B ".repeat(1000), - }; - - const formatted = formatPriorContextBrief(brief); - - assert.ok(formatted.length <= 6000, `should cap at 6000 chars, got ${formatted.length}`); -}); - -// ─── researchEcosystem ────────────────────────────────────────────────────────── -// Note: Ecosystem research now always returns available: false from the preparation -// phase. Research happens during the discussion using web search tools. - -test("researchEcosystem: always returns available: false (research happens during discussion)", async (t) => { - const dir = makeTempDir("ecosystem-disabled"); - t.after(() => cleanup(dir)); - - const brief = await researchEcosystem(["Next.js", "TypeScript"], dir); - - assert.equal(brief.available, false, "should indicate research not available from preparation"); - assert.ok(brief.skippedReason, "should have skipped reason"); - assert.ok( - brief.skippedReason!.includes("during the discussion"), - "should explain research happens during discussion", - ); - assert.deepEqual(brief.queries, [], "should have empty queries"); - assert.deepEqual(brief.findings, [], "should have empty findings"); -}); - -test("researchEcosystem: returns consistent result regardless of tech stack", async (t) => { - const dir = makeTempDir("ecosystem-consistent"); - t.after(() => cleanup(dir)); - - // With tech stack - const briefWithTech = await researchEcosystem(["React", "Next.js"], dir); - // Without tech stack - const briefEmpty = await researchEcosystem([], dir); - - // Both should return the same unavailable result - assert.equal(briefWithTech.available, false); - assert.equal(briefEmpty.available, false); - assert.deepEqual(briefWithTech.queries, []); - assert.deepEqual(briefEmpty.queries, []); -}); - -// ─── formatEcosystemBrief ─��───────────────────────────────────────────────────── -// Note: formatEcosystemBrief now returns a simple fixed message since ecosystem -// research always returns unavailable from the preparation phase. - -test("formatEcosystemBrief: returns simplified message for discussion-phase research", async (t) => { - const brief: EcosystemBrief = { - available: false, - queries: [], - findings: [], - skippedReason: "Ecosystem research is performed during the discussion using web search tools, not during preparation.", - }; - - const formatted = formatEcosystemBrief(brief); - - assert.ok(formatted.includes("## Ecosystem Research"), "should have section header"); - assert.ok(formatted.includes("during the discussion"), "should mention discussion phase"); - assert.ok(formatted.includes("web search tools"), "should mention web search tools"); -}); - -test("formatEcosystemBrief: returns consistent output regardless of brief content", async (t) => { - // Even if a brief has findings (which shouldn't happen from preparation), - // the function returns the simplified message - const briefWithFindings: EcosystemBrief = { - available: true, - queries: ["test query"], - findings: [{ query: "test", title: "Test", snippet: "test", url: "https://example.com" }], - provider: "tavily", - }; - - const briefEmpty: EcosystemBrief = { - available: false, - queries: [], - findings: [], - skippedReason: "Test reason", - }; - - const formatted1 = formatEcosystemBrief(briefWithFindings); - const formatted2 = formatEcosystemBrief(briefEmpty); - - // Both should return the same simplified message - assert.equal(formatted1, formatted2, "should return consistent output"); - assert.ok(formatted1.includes("## Ecosystem Research"), "should have section header"); -}); - - -// ─── runPreparation (Orchestrator) ────────────────────────────────────────────── - -/** - * Mock UI context that captures notifications for testing. - */ -function createMockUI(): PreparationUIContext & { notifications: Array<{ message: string; type?: string }> } { - const notifications: Array<{ message: string; type?: string }> = []; - return { - notifications, - notify(message: string, type?: "info" | "warning" | "error" | "success") { - notifications.push({ message, type }); - }, - }; -} - -test("runPreparation: returns complete result with all briefs populated", async (t) => { - const dir = makeTempDir("runprep-full"); - t.after(() => cleanup(dir)); - - // Set up a minimal project - mkdirSync(join(dir, "src"), { recursive: true }); - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test-project"}', "utf-8"); - writeFileSync(join(dir, "src", "index.ts"), 'export const x = 1;', "utf-8"); - - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, // Skip web research to avoid API key requirement - discuss_depth: "standard", - }; - - const result = await runPreparation(dir, ui, prefs); - - // Check result structure - assert.equal(result.enabled, true, "should be enabled"); - assert.ok(result.codebase, "should have codebase"); - assert.ok(result.priorContext, "should have priorContext"); - assert.ok(result.ecosystem, "should have ecosystem"); - assert.ok(typeof result.codebaseBrief === "string", "should have codebaseBrief"); - assert.ok(typeof result.priorContextBrief === "string", "should have priorContextBrief"); - assert.ok(typeof result.ecosystemBrief === "string", "should have ecosystemBrief"); - assert.ok(result.durationMs > 0, "should have positive duration"); - assert.equal(result.ecosystemResearchPerformed, false, "should not have performed ecosystem research"); - - // Check TUI progress notifications - assert.ok(ui.notifications.length > 0, "should have notifications"); - assert.ok( - ui.notifications.some((n) => n.message.includes("Analyzing codebase")), - "should show codebase analysis start", - ); - assert.ok( - ui.notifications.some((n) => n.message.includes("✓ Analyzed codebase")), - "should show codebase analysis complete", - ); - assert.ok( - ui.notifications.some((n) => n.message.includes("Reviewing prior context")), - "should show prior context start", - ); - assert.ok( - ui.notifications.some((n) => n.message.includes("✓ Reviewed prior context")), - "should show prior context complete", - ); -}); - -test("runPreparation: returns early when discuss_preparation is false", async (t) => { - const dir = makeTempDir("runprep-disabled"); - t.after(() => cleanup(dir)); - - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: false, - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, false, "should indicate preparation disabled"); - assert.equal(result.codebaseBrief, "", "should have empty codebase brief"); - assert.equal(result.priorContextBrief, "", "should have empty prior context brief"); - assert.equal(result.ecosystemBrief, "", "should have empty ecosystem brief"); - assert.equal(ui.notifications.length, 0, "should not show any notifications"); - assert.ok(result.durationMs >= 0, "should have non-negative duration"); -}); - -test("runPreparation: ecosystem research always returns unavailable (happens during discussion)", async (t) => { - const dir = makeTempDir("runprep-no-ecosystem"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - - const ui = createMockUI(); - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: true, // Even with this enabled, ecosystem research returns unavailable - }; - - const result = await runPreparation(dir, ui, prefs); - - assert.equal(result.enabled, true); - assert.equal(result.ecosystemResearchPerformed, false, "should not perform ecosystem research from preparation"); - assert.equal(result.ecosystem.available, false); - assert.ok( - result.ecosystem.skippedReason?.includes("during the discussion"), - "should indicate research happens during discussion", - ); - - // Should NOT have ecosystem research notifications (no longer part of preparation) - assert.ok( - !ui.notifications.some((n) => n.message.includes("Researching ecosystem")), - "should not show ecosystem research notification", - ); -}); - -test("runPreparation: works without UI context (silent mode)", async (t) => { - const dir = makeTempDir("runprep-silent"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - }; - - // Pass null for UI to test silent mode - const result = await runPreparation(dir, null, prefs); - - assert.equal(result.enabled, true, "should work without UI"); - assert.ok(result.codebase, "should have codebase"); - assert.ok(result.priorContext, "should have priorContext"); - assert.ok(result.durationMs > 0, "should have duration"); -}); - -test("runPreparation: completes within 60s requirement (R112)", async (t) => { - const dir = makeTempDir("runprep-timing"); - t.after(() => cleanup(dir)); - - // Create a project with some content to analyze - mkdirSync(join(dir, "src"), { recursive: true }); - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - writeFileSync(join(dir, "tsconfig.json"), '{}', "utf-8"); - - for (let i = 0; i < 10; i++) { - writeFileSync( - join(dir, "src", `file${i}.ts`), - `export async function fn${i}() { await Promise.resolve(); }\n`.repeat(50), - "utf-8", - ); - } - - const prefs: PreparationPreferences = { - discuss_preparation: true, - discuss_web_research: false, - discuss_depth: "standard", - }; - - const startTime = performance.now(); - const result = await runPreparation(dir, null, prefs); - const elapsed = performance.now() - startTime; - - assert.ok(result.durationMs < 60000, `should complete within 60s, took ${result.durationMs}ms`); - assert.ok(elapsed < 60000, `elapsed time should be under 60s, was ${elapsed}ms`); -}); - -test("runPreparation: does not throw on any input", async (t) => { - const dir = makeTempDir("runprep-robust"); - t.after(() => cleanup(dir)); - - // Test with completely empty directory - const prefs: PreparationPreferences = {}; - - let result: PreparationResult | undefined; - let error: unknown; - - try { - result = await runPreparation(dir, null, prefs); - } catch (e) { - error = e; - } - - assert.equal(error, undefined, "should not throw"); - assert.ok(result, "should return result"); - assert.equal(result!.enabled, true, "should be enabled by default"); -}); - -test("runPreparation: detects framework from config files in codebase brief", async (t) => { - const dir = makeTempDir("runprep-framework"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - writeFileSync(join(dir, "package.json"), '{"name": "test"}', "utf-8"); - writeFileSync(join(dir, "next.config.mjs"), 'export default {};', "utf-8"); - - const prefs: PreparationPreferences = { - discuss_preparation: true, - }; - - const result = await runPreparation(dir, null, prefs); - - // Should detect Next.js config file in codebase analysis - assert.ok( - result.codebase.techStack.detectedFiles.includes("next.config.mjs"), - "should detect next.config.mjs in codebase brief", - ); - // Ecosystem queries are always empty from preparation (research happens during discussion) - assert.deepEqual(result.ecosystem.queries, [], "ecosystem queries should be empty from preparation"); -}); - -test("runPreparation: default preferences enable preparation and web research", async (t) => { - const dir = makeTempDir("runprep-defaults"); - t.after(() => cleanup(dir)); - - mkdirSync(join(dir, ".gsd"), { recursive: true }); - - const ui = createMockUI(); - const prefs: PreparationPreferences = {}; // All defaults - - const result = await runPreparation(dir, ui, prefs); - - // With defaults, preparation should be enabled - assert.equal(result.enabled, true, "should be enabled by default"); - // Notifications should be shown - assert.ok(ui.notifications.length > 0, "should show notifications"); -}); diff --git a/src/resources/extensions/gsd/tests/projection-regression.test.ts b/src/resources/extensions/gsd/tests/projection-regression.test.ts index 90a06e7b9..f22f4d607 100644 --- a/src/resources/extensions/gsd/tests/projection-regression.test.ts +++ b/src/resources/extensions/gsd/tests/projection-regression.test.ts @@ -5,7 +5,7 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { renderPlanContent, renderRoadmapContent } from '../workflow-projections.ts'; +import { renderPlanContent, renderRoadmapContent, renderSummaryContent } from '../workflow-projections.ts'; import type { SliceRow, TaskRow } from '../gsd-db.ts'; // ─── Helpers ───────────────────────────────────────────────────────────── @@ -172,3 +172,98 @@ test('renderRoadmapContent: slice with status "pending" shows ⬜', () => { assert.ok(content.includes('⬜'), 'pending slice should show ⬜'); }); + +// ─── renderSummaryContent: double-frontmatter regression ───────────────── + +test('renderSummaryContent: uses full_summary_md as-is when it contains frontmatter', () => { + const existingSummary = [ + '---', + 'id: T01', + 'parent: S01', + 'milestone: M001', + 'key_files:', + ' - src/thing.ts', + 'verification_result: passed', + 'completed_at: 2026-01-01T00:00:00Z', + 'blocker_discovered: false', + '---', + '', + '# T01: Did the thing', + '', + '**One-liner summary**', + '', + '## What Happened', + '', + 'Narrative content here.', + '', + '## Deviations', + '', + 'None.', + '', + ].join('\n'); + + const task = makeTaskRow({ + id: 'T01', + status: 'complete', + title: 'Did the thing', + one_liner: 'One-liner summary', + narrative: 'Narrative content here.', + full_summary_md: existingSummary, + }); + + const result = renderSummaryContent(task, 'S01', 'M001'); + + // Must NOT produce double frontmatter + const frontmatterCount = (result.match(/^---$/gm) || []).length; + assert.equal(frontmatterCount, 2, `Expected exactly 2 frontmatter delimiters (one block), got ${frontmatterCount}`); + + // Must NOT produce double H1 heading + const h1Count = (result.match(/^# T01:/gm) || []).length; + assert.equal(h1Count, 1, `Expected exactly 1 H1 heading, got ${h1Count}`); + + // Content should match the full_summary_md exactly + assert.equal(result, existingSummary); +}); + +test('renderSummaryContent: synthesizes from DB columns when full_summary_md is empty', () => { + const task = makeTaskRow({ + id: 'T01', + status: 'complete', + title: 'Did the thing', + one_liner: 'One-liner summary', + narrative: 'Built the feature.', + full_summary_md: '', + deviations: 'Deviated slightly.', + known_issues: 'None.', + }); + + const result = renderSummaryContent(task, 'S01', 'M001'); + + // Should have exactly one frontmatter block + const frontmatterCount = (result.match(/^---$/gm) || []).length; + assert.equal(frontmatterCount, 2, 'Should have one frontmatter block (2 delimiters)'); + + // Should contain synthesized sections + assert.ok(result.includes('## What Happened'), 'Should have What Happened section'); + assert.ok(result.includes('Built the feature.'), 'Should use narrative for content'); + assert.ok(result.includes('## Deviations'), 'Should have Deviations section'); + assert.ok(result.includes('Deviated slightly.'), 'Should include deviation text'); +}); + +test('renderSummaryContent: synthesizes when full_summary_md has no frontmatter', () => { + const task = makeTaskRow({ + id: 'T02', + status: 'complete', + title: 'Partial summary', + narrative: 'Did some work.', + full_summary_md: 'Just a plain text summary with no frontmatter.', + }); + + const result = renderSummaryContent(task, 'S01', 'M001'); + + // Should synthesize with proper frontmatter since the stored md lacks it + assert.ok(result.startsWith('---'), 'Should start with frontmatter'); + assert.ok(result.includes('id: T02'), 'Should have task ID in frontmatter'); + assert.ok(result.includes('## What Happened'), 'Should have What Happened section'); + assert.ok(result.includes('Did some work.'), 'Should use narrative'); +}); diff --git a/src/resources/extensions/gsd/tests/prompt-builder.test.ts b/src/resources/extensions/gsd/tests/prompt-builder.test.ts deleted file mode 100644 index 811357307..000000000 --- a/src/resources/extensions/gsd/tests/prompt-builder.test.ts +++ /dev/null @@ -1,669 +0,0 @@ -/** - * Prompt Builder Tests — Comprehensive tests for S02 components. - * - * Tests cover: - * 1. Template validation (context-enhanced.md, discuss-prepared.md) - * 2. Prompt loading and variable substitution - * 3. Enhanced context validation (R109) - * 4. Integration tests for format functions and prompt injection - */ - -import test, { describe } from "node:test"; -import assert from "node:assert/strict"; -import { readFileSync, existsSync } from "node:fs"; -import { join } from "node:path"; - -// ─── Template Paths ───────────────────────────────────────────────────────────── - -const templatesDir = join(process.cwd(), "src/resources/extensions/gsd/templates"); -const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts"); - -const contextEnhancedPath = join(templatesDir, "context-enhanced.md"); -const contextPath = join(templatesDir, "context.md"); -const discussPreparedPath = join(promptsDir, "discuss-prepared.md"); - -// ─── Template Tests ───────────────────────────────────────────────────────────── - -describe("Template: context-enhanced.md", () => { - test("file exists", () => { - assert.ok(existsSync(contextEnhancedPath), "context-enhanced.md should exist"); - }); - - test("contains all original context.md sections", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - const originalContext = readFileSync(contextPath, "utf-8"); - - // Extract section headers from original context.md - const originalSections = originalContext.match(/^## .+$/gm) ?? []; - - // Each original section should be present in context-enhanced.md - for (const section of originalSections) { - assert.ok( - contextEnhanced.includes(section), - `context-enhanced.md should contain original section: ${section}`, - ); - } - }); - - test("contains new structured sections for prepared discussions", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - - // New sections required by R108 - const newSections = [ - "## Codebase Brief", - "## Architectural Decisions", - "## Interface Contracts", - "## Error Handling Strategy", - "## Testing Requirements", - "## Acceptance Criteria", - "## Ecosystem Notes", - ]; - - for (const section of newSections) { - assert.ok( - contextEnhanced.includes(section), - `context-enhanced.md should contain new section: ${section}`, - ); - } - }); - - test("Codebase Brief has sub-sections", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - - assert.ok( - contextEnhanced.includes("### Technology Stack"), - "Codebase Brief should have Technology Stack sub-section", - ); - assert.ok( - contextEnhanced.includes("### Key Modules"), - "Codebase Brief should have Key Modules sub-section", - ); - assert.ok( - contextEnhanced.includes("### Patterns in Use"), - "Codebase Brief should have Patterns in Use sub-section", - ); - }); - - test("Architectural Decisions has structured format guidance", () => { - const contextEnhanced = readFileSync(contextEnhancedPath, "utf-8"); - - // Check for decision structure markers - assert.ok( - contextEnhanced.includes("**Decision:**"), - "Architectural Decisions should have Decision marker", - ); - assert.ok( - contextEnhanced.includes("**Rationale:**"), - "Architectural Decisions should have Rationale marker", - ); - assert.ok( - contextEnhanced.includes("**Evidence:**"), - "Architectural Decisions should have Evidence marker", - ); - assert.ok( - contextEnhanced.includes("**Alternatives Considered:**"), - "Architectural Decisions should have Alternatives Considered marker", - ); - }); -}); - -describe("Template: discuss-prepared.md", () => { - test("file exists", () => { - assert.ok(existsSync(discussPreparedPath), "discuss-prepared.md should exist"); - }); - - test("contains all three brief placeholders", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - assert.ok( - discussPrepared.includes("{{codebaseBrief}}"), - "discuss-prepared.md should contain {{codebaseBrief}} placeholder", - ); - assert.ok( - discussPrepared.includes("{{priorContextBrief}}"), - "discuss-prepared.md should contain {{priorContextBrief}} placeholder", - ); - assert.ok( - discussPrepared.includes("{{ecosystemBrief}}"), - "discuss-prepared.md should contain {{ecosystemBrief}} placeholder", - ); - }); - - test("contains 4-layer protocol markers", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - // Check for all four layer headings - assert.ok( - discussPrepared.includes("## Layer 1 — Scope"), - "discuss-prepared.md should contain Layer 1 (Scope)", - ); - assert.ok( - discussPrepared.includes("## Layer 2 — Architecture"), - "discuss-prepared.md should contain Layer 2 (Architecture)", - ); - assert.ok( - discussPrepared.includes("## Layer 3 — Error States"), - "discuss-prepared.md should contain Layer 3 (Error States)", - ); - assert.ok( - discussPrepared.includes("## Layer 4 — Quality Bar"), - "discuss-prepared.md should contain Layer 4 (Quality Bar)", - ); - }); - - test("contains gate question IDs for all layers", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - assert.ok( - discussPrepared.includes("layer1_scope_gate"), - "discuss-prepared.md should contain layer1_scope_gate question ID", - ); - assert.ok( - discussPrepared.includes("layer2_architecture_gate"), - "discuss-prepared.md should contain layer2_architecture_gate question ID", - ); - assert.ok( - discussPrepared.includes("layer3_error_gate"), - "discuss-prepared.md should contain layer3_error_gate question ID", - ); - assert.ok( - discussPrepared.includes("layer4_quality_gate"), - "discuss-prepared.md should contain layer4_quality_gate question ID", - ); - }); - - test("contains context-enhanced template guidance", () => { - const discussPrepared = readFileSync(discussPreparedPath, "utf-8"); - - assert.ok( - discussPrepared.includes("context-enhanced"), - "discuss-prepared.md should reference context-enhanced template", - ); - }); -}); - -// ─── Prompt Loading Tests ─────────────────────────────────────────────────────── - -describe("Prompt Loading", () => { - // Dynamic import to work with the module's warm cache - test("loadPrompt substitutes all variables correctly", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - - const result = loadPrompt("discuss-prepared", { - preamble: "Test preamble", - codebaseBrief: "Test codebase brief content", - priorContextBrief: "Test prior context brief content", - ecosystemBrief: "Test ecosystem brief content", - milestoneId: "M001", - contextPath: ".gsd/milestones/M001/M001-CONTEXT.md", - roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md", - inlinedTemplates: "Test templates", - commitInstruction: "Test commit instruction", - multiMilestoneCommitInstruction: "Test multi-milestone commit", - }); - - assert.ok(result.includes("Test codebase brief content"), "codebaseBrief should be substituted"); - assert.ok(result.includes("Test prior context brief content"), "priorContextBrief should be substituted"); - assert.ok(result.includes("Test ecosystem brief content"), "ecosystemBrief should be substituted"); - assert.ok(!result.includes("{{codebaseBrief}}"), "placeholder should not remain"); - }); - - test("loadPrompt throws GSDError for missing variables", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - const { GSDError, GSD_PARSE_ERROR } = await import("../errors.ts"); - - assert.throws( - () => loadPrompt("discuss-prepared", {}), // Missing required variables - (err: unknown) => { - assert.ok(err instanceof GSDError, "should throw GSDError"); - assert.equal((err as InstanceType).code, GSD_PARSE_ERROR, "should have GSD_PARSE_ERROR code"); - return true; - }, - ); - }); - - test("brief content with {{...}} patterns does not cause false variable errors", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - - // Content that contains template-like patterns but should not be treated as variables - const briefWithPatterns = ` -## Tech Stack -- Framework: Uses \`{{slot}}\` placeholder syntax in templates -- Pattern: The codebase has \`{{variableName}}\` markers -`; - - // This should NOT throw, because {{slot}} and {{variableName}} are inside - // the brief value, not undeclared placeholders in the template itself. - const result = loadPrompt("discuss-prepared", { - preamble: "Test", - codebaseBrief: briefWithPatterns, - priorContextBrief: "Test brief", - ecosystemBrief: "Test brief", - milestoneId: "M001", - contextPath: ".gsd/milestones/M001/M001-CONTEXT.md", - roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md", - inlinedTemplates: "Test templates", - commitInstruction: "Test commit instruction", - multiMilestoneCommitInstruction: "Test multi-milestone commit", - }); - - assert.ok(result.includes("{{slot}}"), "template-like patterns in content should be preserved"); - assert.ok(result.includes("{{variableName}}"), "template-like patterns in content should be preserved"); - }); -}); - -// ─── Validation Tests ─────────────────────────────────────────────────────────── - -describe("Enhanced Context Validation", () => { - test("valid enhanced context passes validation", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const validContent = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript -**Rationale:** Type safety - -## Acceptance Criteria - -- Criterion 1 -- Criterion 2 -`; - - const result = validateEnhancedContext(validContent); - assert.equal(result.valid, true, "valid content should pass validation"); - assert.equal(result.missing.length, 0, "no missing sections"); - }); - - test("missing scope section fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentMissingScope = ` -# M001: Test Milestone - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentMissingScope); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.some((m) => m.includes("Scope") || m.includes("Why This Milestone")), - "should report missing scope section", - ); - }); - - test("missing architectural decisions section fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentMissingDecisions = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentMissingDecisions); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.includes("Architectural Decisions"), - "should report missing architectural decisions section", - ); - }); - - test("missing acceptance criteria section fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentMissingCriteria = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Architectural Decisions - -### Decision 1 - -**Decision:** Use TypeScript -`; - - const result = validateEnhancedContext(contentMissingCriteria); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.includes("Acceptance Criteria"), - "should report missing acceptance criteria section", - ); - }); - - test("empty architectural decisions section (no entries) fails", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const contentEmptyDecisions = ` -# M001: Test Milestone - -## Why This Milestone - -This is why we need this milestone. - -## Architectural Decisions - -No decisions yet. - -## Acceptance Criteria - -- Criterion 1 -`; - - const result = validateEnhancedContext(contentEmptyDecisions); - assert.equal(result.valid, false, "should fail validation"); - assert.ok( - result.missing.some((m) => m.includes("decision entry")), - "should report missing decision entry", - ); - }); - - test("alternative scope headers are accepted", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - // Test with ## Scope - const withScope = ` -## Scope - -### In Scope -- Item 1 - -## Architectural Decisions - -### Decision 1 -**Decision:** Test - -## Acceptance Criteria - -- Criterion 1 -`; - assert.equal(validateEnhancedContext(withScope).valid, true, "## Scope should be accepted"); - - // Test with ## Milestone Scope - const withMilestoneScope = ` -## Milestone Scope - -This is the scope. - -## Architectural Decisions - -### Decision 1 -**Decision:** Test - -## Acceptance Criteria - -- Criterion 1 -`; - assert.equal( - validateEnhancedContext(withMilestoneScope).valid, - true, - "## Milestone Scope should be accepted", - ); - }); - - test("alternative acceptance criteria headers are accepted", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const withFinalIntegrated = ` -## Why This Milestone - -Test - -## Architectural Decisions - -### Decision 1 -**Decision:** Test - -## Final Integrated Acceptance - -- Criterion 1 -`; - assert.equal( - validateEnhancedContext(withFinalIntegrated).valid, - true, - "## Final Integrated Acceptance should be accepted", - ); - }); - - test("inline decision format is accepted", async () => { - const { validateEnhancedContext } = await import("../prompt-validation.ts"); - - const withInlineDecision = ` -## Why This Milestone - -Test - -## Architectural Decisions - -**Decision:** Use React for the frontend - -## Acceptance Criteria - -- Criterion 1 -`; - assert.equal( - validateEnhancedContext(withInlineDecision).valid, - true, - "**Decision marker format should be accepted", - ); - }); -}); - -// ─── Integration Tests ────────────────────────────────────────────────────────── - -describe("Integration: Format Functions", () => { - test("formatCodebaseBrief produces non-empty output", async () => { - const { formatCodebaseBrief } = await import("../preparation.ts"); - - const brief = { - techStack: { - primaryLanguage: "TypeScript", - detectedFiles: ["package.json", "tsconfig.json"], - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: true, - }, - moduleStructure: { - topLevelDirs: ["src", "tests"], - srcSubdirs: ["components", "utils"], - totalFilesSampled: 5, - }, - patterns: { - asyncStyle: "async/await" as const, - errorHandling: "try/catch" as const, - namingConvention: "camelCase" as const, - evidence: { - asyncStyle: ["src/foo.ts: async/await (5 occurrences)"], - errorHandling: ["src/bar.ts: try/catch (3 occurrences)"], - namingConvention: ["camelCase: 50 occurrences"], - }, - fileCounts: { - asyncAwait: 3, - promises: 0, - callbacks: 0, - tryCatch: 2, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: ["src/index.ts", "src/utils.ts"], - }; - - const formatted = formatCodebaseBrief(brief); - assert.ok(formatted.length > 0, "formatted brief should not be empty"); - assert.ok(formatted.includes("TypeScript"), "should include primary language"); - assert.ok(formatted.includes("async/await"), "should include async style"); - }); - - test("formatPriorContextBrief produces non-empty output", async () => { - const { formatPriorContextBrief } = await import("../preparation.ts"); - - const brief = { - decisions: { - byScope: new Map([ - ["architecture", [{ id: "D001", scope: "architecture", decision: "Use SQLite", choice: "SQLite", rationale: "Simplicity" }]], - ]), - totalCount: 1, - }, - requirements: { - active: [{ id: "R001", description: "Test requirement", status: "active" as const }], - validated: [], - deferred: [], - totalCount: 1, - }, - knowledge: "Some knowledge entry", - summaries: "M001 completed X and Y", - }; - - const formatted = formatPriorContextBrief(brief); - assert.ok(formatted.length > 0, "formatted brief should not be empty"); - assert.ok(formatted.includes("Prior Decisions"), "should include decisions section"); - assert.ok(formatted.includes("D001"), "should include decision ID"); - }); - - test("formatEcosystemBrief returns simplified message (research happens during discussion)", async () => { - const { formatEcosystemBrief } = await import("../preparation.ts"); - - // formatEcosystemBrief now returns a fixed message regardless of brief content - // because ecosystem research happens during the discussion, not preparation - const briefWithFindings = { - available: true, - queries: ["Next.js best practices 2024"], - findings: [ - { - query: "Next.js best practices 2024", - title: "Server Components Guide", - url: "https://example.com/guide", - snippet: "Use Server Components for data fetching", - }, - ], - provider: "tavily", - }; - - const formatted = formatEcosystemBrief(briefWithFindings); - assert.ok(formatted.length > 0, "formatted brief should not be empty"); - assert.ok(formatted.includes("Ecosystem Research"), "should include research heading"); - assert.ok(formatted.includes("during the discussion"), "should mention research happens during discussion"); - }); - - test("formatEcosystemBrief returns same output for any brief state", async () => { - const { formatEcosystemBrief } = await import("../preparation.ts"); - - const briefUnavailable = { - available: false, - queries: [], - findings: [], - skippedReason: "No API key configured", - }; - - const briefAvailable = { - available: true, - queries: ["test"], - findings: [], - provider: "tavily", - }; - - const formatted1 = formatEcosystemBrief(briefUnavailable); - const formatted2 = formatEcosystemBrief(briefAvailable); - - // Both should return the same simplified message - assert.equal(formatted1, formatted2, "should return consistent output regardless of brief state"); - assert.ok(formatted1.includes("web search tools"), "should mention web search tools"); - }); - - test("formatted briefs can be injected into prompt without errors", async () => { - const { loadPrompt } = await import("../prompt-loader.ts"); - const { formatCodebaseBrief, formatPriorContextBrief, formatEcosystemBrief } = await import("../preparation.ts"); - - // Create realistic briefs - const codebaseBrief = formatCodebaseBrief({ - techStack: { - primaryLanguage: "TypeScript", - detectedFiles: ["package.json"], - packageManager: "npm", - isMonorepo: false, - hasTests: true, - hasCI: false, - }, - moduleStructure: { - topLevelDirs: ["src"], - srcSubdirs: [], - totalFilesSampled: 1, - }, - patterns: { - asyncStyle: "async/await" as const, - errorHandling: "try/catch" as const, - namingConvention: "camelCase" as const, - evidence: { asyncStyle: [], errorHandling: [], namingConvention: [] }, - fileCounts: { - asyncAwait: 0, - promises: 0, - callbacks: 0, - tryCatch: 0, - errorCallbacks: 0, - resultTypes: 0, - }, - }, - sampledFiles: [], - }); - - const priorContextBrief = formatPriorContextBrief({ - decisions: { byScope: new Map(), totalCount: 0 }, - requirements: { active: [], validated: [], deferred: [], totalCount: 0 }, - knowledge: "No prior knowledge recorded.", - summaries: "No prior milestone summaries.", - }); - - const ecosystemBrief = formatEcosystemBrief({ - available: false, - queries: [], - findings: [], - skippedReason: "Preparation disabled", - }); - - // Should not throw when injecting formatted briefs - const result = loadPrompt("discuss-prepared", { - preamble: "Test preamble", - codebaseBrief, - priorContextBrief, - ecosystemBrief, - milestoneId: "M001", - contextPath: ".gsd/milestones/M001/M001-CONTEXT.md", - roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md", - inlinedTemplates: "Test templates", - commitInstruction: "Do not commit", - multiMilestoneCommitInstruction: "Do not commit", - }); - - assert.ok(result.includes("TypeScript"), "codebase brief should be present"); - assert.ok(result.includes("Prior Decisions"), "prior context brief should be present"); - // formatEcosystemBrief now returns a fixed message about research during discussion - assert.ok(result.includes("during the discussion"), "ecosystem brief should be present"); - }); -}); diff --git a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts index 1b19d356c..7124ec494 100644 --- a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +++ b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts @@ -42,9 +42,19 @@ test("system prompt references CODEBASE.md and /gsd codebase", () => { assert.match(prompt, /auto-refreshes it when tracked files change/i); }); +test("system prompt hard rules forbid fabricating user responses", () => { + const prompt = readPrompt("system"); + assert.match(prompt, /never fabricate, simulate, or role-play user responses/i); + assert.match(prompt, /never generate markers like `?\[User\]`?, `?\[Human\]`?, `?User:`?/i); + assert.match(prompt, /ask one question round \(1-3 questions\), then stop and wait for the user's actual response/i); + assert.match(prompt, /ask_user_questions.*only valid structured user input/i); +}); + test("discuss prompt allows implementation questions when they materially matter", () => { const prompt = readPrompt("discuss"); assert.match(prompt, /Lead with experience, but ask implementation when it materially matters/i); + assert.match(prompt, /Never fabricate, simulate, or role-play user responses/i); + assert.match(prompt, /Ask one question round \(1-3 questions\) per turn, then stop and wait for the user's actual response/i); assert.match(prompt, /one gate, not two/i); assert.doesNotMatch(prompt, /Questions must be about the experience, not the implementation/i); }); @@ -56,6 +66,8 @@ test("guided discussion prompts avoid wrap-up prompts after every round", () => assert.match(slicePrompt, /Do \*\*not\*\* ask a meta "ready to wrap up\?" question after every round/i); assert.doesNotMatch(milestonePrompt, /I think I have a solid picture of this milestone\. Ready to wrap up/i); assert.doesNotMatch(slicePrompt, /I think I have a solid picture of this slice\. Ready to wrap up/i); + assert.match(milestonePrompt, /Never fabricate or simulate user input/i); + assert.match(slicePrompt, /Never fabricate or simulate user input/i); }); test("guided milestone discussion scopes depth verification to the milestone id", () => { @@ -64,6 +76,13 @@ test("guided milestone discussion scopes depth verification to the milestone id" assert.doesNotMatch(prompt, /depth_verification_confirm" — this enables the write-gate downstream/i, "legacy global depth gate wording should be gone"); }); +test("queue prompt requires waiting for user response between rounds", () => { + const prompt = readPrompt("queue"); + assert.match(prompt, /Never fabricate or simulate user input during this discussion/i); + assert.match(prompt, /Ask 1-3 questions per round, then wait for the user's response before asking the next round\./i); + assert.doesNotMatch(prompt, /treat that as permission to continue/i); +}); + test("guided-resume-task prompt preserves recovery state until work is superseded", () => { const prompt = readPrompt("guided-resume-task"); assert.match(prompt, /Do \*\*not\*\* delete the continue file immediately/i); diff --git a/src/resources/extensions/gsd/tests/prompt-loader-working-directory.test.ts b/src/resources/extensions/gsd/tests/prompt-loader-working-directory.test.ts new file mode 100644 index 000000000..c0de3c4ec --- /dev/null +++ b/src/resources/extensions/gsd/tests/prompt-loader-working-directory.test.ts @@ -0,0 +1,19 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { loadPrompt } from "../prompt-loader.ts"; + +test("loadPrompt normalizes workingDirectory backslashes for bash-friendly prompts (#4048)", () => { + const prompt = loadPrompt("research-milestone", { + milestoneId: "M001", + milestoneTitle: "Windows path fix", + workingDirectory: "C:\\Dev\\NB\\TR", + inlinedContext: "context", + skillActivation: "skill activation", + skillDiscoveryMode: "off", + skillDiscoveryInstructions: " disabled", + }); + + assert.match(prompt, /Your working directory is `C:\/Dev\/NB\/TR`/); + assert.doesNotMatch(prompt, /C:\\Dev\\NB\\TR/); +}); diff --git a/src/resources/extensions/gsd/tests/prompt-system-gate-coverage.test.ts b/src/resources/extensions/gsd/tests/prompt-system-gate-coverage.test.ts new file mode 100644 index 000000000..fad37ed7d --- /dev/null +++ b/src/resources/extensions/gsd/tests/prompt-system-gate-coverage.test.ts @@ -0,0 +1,208 @@ +/** + * Prompt-system gate coverage tests. + * + * These tests pin the invariants the plan file documents: + * 1. Every pending slice-scoped gate is routed to exactly one owner turn. + * Q8 (owned by complete-slice) MUST NOT leak into gate-evaluate and + * get silently dropped the way it used to before the registry landed. + * 2. getPendingGatesForTurn filters by the registry's owner turn, not + * just the DB scope column. + * 3. Output validators recognize artifacts that contain the required + * gate section headings, and flag ones that don't. + * 4. Prompt output produced by the validators reflects MV01-MV04. + * + * They also assert the VALIDATION.md renderer still produces headings + * matching the registry's promptSection strings, so future renderer + * edits that drift from the registry fail the suite loudly. + */ + +import { describe, test, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { + openDatabase, + closeDatabase, + insertMilestone, + insertSlice, + insertTask, + insertGateRow, + getPendingGates, + getPendingGatesForTurn, +} from "../gsd-db.ts"; +import { + GATE_REGISTRY, + getGatesForTurn, + type OwnerTurn, +} from "../gate-registry.ts"; +import { + validateSliceSummaryOutput, + validateTaskSummaryOutput, + validateMilestoneValidationOutput, + validateGateSections, +} from "../prompt-validation.ts"; + +function setupTestDb(): string { + const tmpDir = mkdtempSync(join(tmpdir(), "prompt-gate-coverage-")); + const dbPath = join(tmpDir, "gsd.db"); + openDatabase(dbPath); + insertMilestone({ id: "M001", title: "Test", status: "active" }); + insertSlice({ + milestoneId: "M001", + id: "S01", + title: "Test Slice", + status: "pending", + risk: "medium", + depends: [], + }); + insertTask({ + id: "T01", + sliceId: "S01", + milestoneId: "M001", + title: "Test Task", + status: "pending", + }); + return tmpDir; +} + +describe("getPendingGatesForTurn routes by owner turn, not scope column", () => { + let tmpDir: string; + beforeEach(() => { + tmpDir = setupTestDb(); + }); + afterEach(() => { + closeDatabase(); + rmSync(tmpDir, { recursive: true, force: true }); + }); + + test("Q8 stored as scope:'slice' is owned by complete-slice, not gate-evaluate", () => { + // Seed the three slice-scoped gates plan-slice writes today. + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q4", scope: "slice" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q8", scope: "slice" }); + + // getPendingGates(..., "slice") returns all three (unchanged). + const allSlicePending = getPendingGates("M001", "S01", "slice"); + assert.equal(allSlicePending.length, 3); + + // But the turn-aware helper routes them correctly. + const gateEval = getPendingGatesForTurn("M001", "S01", "gate-evaluate"); + assert.deepEqual(gateEval.map((g) => g.gate_id).sort(), ["Q3", "Q4"]); + + const completeSlice = getPendingGatesForTurn("M001", "S01", "complete-slice"); + assert.deepEqual(completeSlice.map((g) => g.gate_id), ["Q8"]); + }); + + test("task-scoped gates are scoped to the requested task id", () => { + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T01" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q6", scope: "task", taskId: "T01" }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q5", scope: "task", taskId: "T02" }); + + const t1 = getPendingGatesForTurn("M001", "S01", "execute-task", "T01"); + assert.equal(t1.length, 2); + assert.ok(t1.every((g) => g.gate_id === "Q5" || g.gate_id === "Q6")); + + const t2 = getPendingGatesForTurn("M001", "S01", "execute-task", "T02"); + assert.equal(t2.length, 1); + assert.equal(t2[0].gate_id, "Q5"); + }); +}); + +describe("per-turn output validators", () => { + test("validateSliceSummaryOutput flags missing Operational Readiness", () => { + const md = `# S01: Test Slice\n\n## What Happened\nstuff\n\n## Verification\nstuff\n`; + const result = validateSliceSummaryOutput(md); + assert.equal(result.valid, false); + assert.ok(result.missing.some((m) => m.includes("Q8"))); + assert.ok(result.missing.some((m) => m.includes("Operational Readiness"))); + }); + + test("validateSliceSummaryOutput passes when Operational Readiness heading is present", () => { + const md = `# S01\n\n## Operational Readiness\n- Health: /health\n- Failure: alert\n`; + const result = validateSliceSummaryOutput(md); + assert.equal(result.valid, true); + assert.equal(result.missing.length, 0); + }); + + test("validateMilestoneValidationOutput requires all four MV headings", () => { + // Missing Requirement Coverage. + const md = [ + "# Milestone Validation: M001", + "## Success Criteria Checklist", + "ok", + "## Slice Delivery Audit", + "ok", + "## Cross-Slice Integration", + "ok", + ].join("\n\n"); + const result = validateMilestoneValidationOutput(md); + assert.equal(result.valid, false); + assert.ok(result.missing.some((m) => m.includes("MV04"))); + }); + + test("validateMilestoneValidationOutput passes for a complete VALIDATION.md", () => { + const md = [ + "# Milestone Validation: M001", + "## Success Criteria Checklist", + "ok", + "## Slice Delivery Audit", + "ok", + "## Cross-Slice Integration", + "ok", + "## Requirement Coverage", + "ok", + ].join("\n\n"); + const result = validateMilestoneValidationOutput(md); + assert.equal(result.valid, true, `unexpected missing: ${result.missing.join(", ")}`); + }); + + test("validateTaskSummaryOutput flags missing task-gate sections", () => { + const md = `# T01\n\n## What Happened\nstuff\n\n## Verification\nstuff\n`; + const result = validateTaskSummaryOutput(md); + assert.equal(result.valid, false); + const idsInMissing = result.missing.join(" "); + assert.ok(idsInMissing.includes("Q5")); + assert.ok(idsInMissing.includes("Q6")); + assert.ok(idsInMissing.includes("Q7")); + }); + + test("validateGateSections returns empty missing when gate bucket is empty", () => { + // Build a phoney owner turn that owns nothing (simulate by validating + // against a real turn against an artifact containing every section). + const fullMd = getGatesForTurn("validate-milestone") + .map((g) => `## ${g.promptSection}\n\nstuff`) + .join("\n\n"); + const result = validateGateSections(fullMd, "validate-milestone"); + assert.equal(result.valid, true); + }); +}); + +describe("registry / renderer parity", () => { + test("MV promptSections match the validate-milestone renderer H2 headings", () => { + // Mirror the string literals from tools/validate-milestone.ts + // renderValidationMarkdown() so a rename there flips this test red. + const expectedHeadings = [ + "Success Criteria Checklist", + "Slice Delivery Audit", + "Cross-Slice Integration", + "Requirement Coverage", + ]; + const registryHeadings = getGatesForTurn("validate-milestone").map((g) => g.promptSection); + assert.deepEqual(registryHeadings.sort(), [...expectedHeadings].sort()); + }); + + test("Q8 promptSection matches the complete-slice renderer H2 heading", () => { + // Mirror the slice-summary H2 introduced in tools/complete-slice.ts. + assert.equal(GATE_REGISTRY.Q8.promptSection, "Operational Readiness"); + }); + + test("registry owner turns cover every turn gate-registry.ts declares", () => { + const ownerTurns = new Set(Object.values(GATE_REGISTRY).map((g) => g.ownerTurn)); + assert.ok(ownerTurns.has("gate-evaluate")); + assert.ok(ownerTurns.has("execute-task")); + assert.ok(ownerTurns.has("complete-slice")); + assert.ok(ownerTurns.has("validate-milestone")); + }); +}); diff --git a/src/resources/extensions/gsd/tests/provider-errors.test.ts b/src/resources/extensions/gsd/tests/provider-errors.test.ts index 34c4ed824..2ae75a380 100644 --- a/src/resources/extensions/gsd/tests/provider-errors.test.ts +++ b/src/resources/extensions/gsd/tests/provider-errors.test.ts @@ -32,6 +32,15 @@ test("classifyError detects rate limit from message", () => { assert.equal(result.kind, "rate-limit"); }); +test("classifyError treats OpenRouter affordability errors as transient rate-limit class", () => { + const result = classifyError( + "402 This request requires more credits, or fewer max_tokens. You requested up to 32000 tokens, but can only afford 329.", + ); + assert.ok(isTransient(result)); + assert.equal(result.kind, "rate-limit"); + assert.ok("retryAfterMs" in result && result.retryAfterMs > 0); +}); + test("classifyError extracts reset delay from message", () => { const result = classifyError("rate limit exceeded, reset in 45s"); assert.equal(result.kind, "rate-limit"); @@ -101,6 +110,13 @@ test("classifyError detects quota exceeded as permanent", () => { assert.ok(!isTransient(result)); }); +test("classifyError treats plain 'Connection error.' as transient connection failure (#3594)", () => { + const result = classifyError("Connection error."); + assert.ok(isTransient(result)); + assert.equal(result.kind, "connection"); + assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000); +}); + test("classifyError treats unknown error as not transient", () => { const result = classifyError("something went wrong"); assert.ok(!isTransient(result)); diff --git a/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts b/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts new file mode 100644 index 000000000..8e717234e --- /dev/null +++ b/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts @@ -0,0 +1,97 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { registerHooks } from "../bootstrap/register-hooks.ts"; +import { + getPendingGate, + resetWriteGateState, + shouldBlockContextArtifactSave, +} from "../bootstrap/write-gate.ts"; + +function makeTempDir(prefix: string): string { + const dir = join( + tmpdir(), + `gsd-depth-gate-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, + ); + mkdirSync(dir, { recursive: true }); + return dir; +} + +test("register-hooks unlocks milestone depth verification from question id without guided-flow state (#4047)", async (t) => { + const dir = makeTempDir("manual"); + const originalCwd = process.cwd(); + process.chdir(dir); + resetWriteGateState(); + + t.after(() => { + resetWriteGateState(); + process.chdir(originalCwd); + rmSync(dir, { recursive: true, force: true }); + }); + + const handlers = new Map Promise | void>>(); + const pi = { + on(event: string, handler: (event: any, ctx?: any) => Promise | void) { + const existing = handlers.get(event) ?? []; + existing.push(handler); + handlers.set(event, existing); + }, + } as any; + + registerHooks(pi); + + const questionId = "depth_verification_M001_confirm"; + const questions = [ + { + id: questionId, + question: "Do you agree?", + options: [ + { label: "Yes, you got it (Recommended)" }, + { label: "Needs adjustment" }, + ], + }, + ]; + + const toolCallHandlers = handlers.get("tool_call"); + const toolResultHandlers = handlers.get("tool_result"); + assert.ok(toolCallHandlers?.length, "tool_call handler should be registered"); + assert.ok(toolResultHandlers?.length, "tool_result handler should be registered"); + + for (const handler of toolCallHandlers ?? []) { + await handler({ + toolName: "ask_user_questions", + input: { questions }, + }); + } + + assert.equal(getPendingGate(), questionId, "gate should be set even without guided-flow state"); + assert.equal( + shouldBlockContextArtifactSave("CONTEXT", "M001").block, + true, + "milestone context should still be blocked before confirmation", + ); + + for (const handler of toolResultHandlers ?? []) { + await handler({ + toolName: "ask_user_questions", + input: { questions }, + details: { + response: { + answers: { + [questionId]: { selected: "Yes, you got it (Recommended)" }, + }, + }, + }, + }); + } + + assert.equal(getPendingGate(), null, "confirming the depth question should clear the pending gate"); + assert.equal( + shouldBlockContextArtifactSave("CONTEXT", "M001").block, + false, + "question-id milestone inference should unlock the matching milestone context write", + ); +}); diff --git a/src/resources/extensions/gsd/tests/register-shortcuts.test.ts b/src/resources/extensions/gsd/tests/register-shortcuts.test.ts index e67902af2..90f822bd0 100644 --- a/src/resources/extensions/gsd/tests/register-shortcuts.test.ts +++ b/src/resources/extensions/gsd/tests/register-shortcuts.test.ts @@ -1,6 +1,6 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { mkdirSync, rmSync } from "node:fs"; +import { mkdirSync, realpathSync, rmSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; @@ -37,10 +37,10 @@ test("dashboard shortcut resolves the project root instead of the current worktr }); let capturedHandler: ((ctx: any) => Promise) | null = null; - const shortcuts: Array<{ description: string; handler: (ctx: any) => Promise }> = []; + const shortcuts: Array<{ key: string; description: string; handler: (ctx: any) => Promise }> = []; const pi = { - registerShortcut: (_key: unknown, shortcut: { description: string; handler: (ctx: any) => Promise }) => { - shortcuts.push(shortcut); + registerShortcut: (key: unknown, shortcut: { description: string; handler: (ctx: any) => Promise }) => { + shortcuts.push({ key: String(key), ...shortcut }); if (!capturedHandler) { capturedHandler = shortcut.handler; } @@ -69,5 +69,63 @@ test("dashboard shortcut resolves the project root instead of the current worktr assert.ok(customCalls > 0, "shortcut opens the dashboard overlay when project root is resolved"); assert.equal(notices.length, 0, "shortcut does not fall back to the missing-.gsd warning"); - assert.equal(shortcuts.length, 3, "all GSD shortcuts are still registered"); + assert.equal(shortcuts.length, 5, "all GSD shortcuts are still registered"); + const keys = shortcuts.map((shortcut) => shortcut.key); + assert.ok(keys.includes("ctrl+alt+g"), "primary dashboard shortcut is registered"); + assert.ok(keys.includes("ctrl+shift+g"), "fallback dashboard shortcut is registered"); + assert.ok(keys.includes("ctrl+alt+n"), "primary notifications shortcut is registered"); + assert.ok(keys.includes("ctrl+shift+n"), "fallback notifications shortcut is registered"); + assert.ok(keys.includes("ctrl+alt+p"), "primary parallel shortcut is registered"); + // No Ctrl+Shift+P fallback — conflicts with cycleModelBackward (shift+ctrl+p) + assert.ok(!keys.includes("ctrl+shift+p"), "parallel fallback must not be registered (conflicts with cycleModelBackward)"); +}); + +test("parallel shortcut passes resolved project root into overlay", async (t) => { + const base = makeTempDir("parallel-root"); + const worktreeRoot = join(base, ".gsd", "worktrees", "M001"); + mkdirSync(join(base, ".gsd", "parallel"), { recursive: true }); + mkdirSync(worktreeRoot, { recursive: true }); + + const originalCwd = process.cwd(); + process.chdir(worktreeRoot); + t.after(() => { + process.chdir(originalCwd); + cleanup(base); + }); + + const shortcuts: Array<{ key: string; description: string; handler: (ctx: any) => Promise }> = []; + registerShortcuts({ + registerShortcut: (key: unknown, shortcut: { description: string; handler: (ctx: any) => Promise }) => { + shortcuts.push({ key: String(key), ...shortcut }); + }, + } as any); + + const parallelShortcut = shortcuts.find((shortcut) => shortcut.key === "ctrl+alt+p"); + assert.ok(parallelShortcut, "parallel shortcut is registered"); + + let capturedBasePath: string | undefined; + await parallelShortcut!.handler({ + hasUI: true, + ui: { + custom: async (factory: any) => { + const overlay = factory( + { requestRender() {} }, + { fg: (_color: string, text: string) => text, bold: (text: string) => text }, + null, + () => {}, + ); + capturedBasePath = (overlay as any).basePath; + overlay.dispose?.(); + return true; + }, + notify: () => {}, + }, + }); + + assert.ok(capturedBasePath, "parallel shortcut should construct overlay with a basePath"); + assert.equal( + realpathSync(capturedBasePath), + realpathSync(base), + "parallel overlay should use the resolved project root, not the worktree cwd", + ); }); diff --git a/src/resources/extensions/gsd/tests/resource-loader-import-path.test.ts b/src/resources/extensions/gsd/tests/resource-loader-import-path.test.ts new file mode 100644 index 000000000..0908d12d6 --- /dev/null +++ b/src/resources/extensions/gsd/tests/resource-loader-import-path.test.ts @@ -0,0 +1,38 @@ +// GSD2 — Regression test for broken resource-loader import path +// Ensures auto.ts imports resource-loader via package resolution, not a +// relative path that breaks when deployed to ~/.gsd/agent/extensions/gsd/. + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const autoSrc = readFileSync(join(import.meta.dirname, "..", "auto.ts"), "utf-8"); + +describe("resource-loader import path", () => { + test("must not use relative import reaching above extensions/", () => { + // The old broken pattern: import("../../../" + "resource-loader.js") + // This resolves to ~/.gsd/resource-loader.js from deployed location, which + // doesn't exist. Regression introduced in #3899. + const brokenPattern = /import\(\s*["']\.\.\/\.\.\/\.\..*resource-loader/; + assert.ok( + !brokenPattern.test(autoSrc), + "auto.ts must not import resource-loader via relative path above extensions/ — " + + "breaks when deployed to ~/.gsd/agent/extensions/gsd/ (see #3899)", + ); + }); + + test("uses GSD_PKG_ROOT to resolve resource-loader from package root", () => { + // The fix uses GSD_PKG_ROOT (set by loader.ts) to construct an absolute + // file URL to dist/resource-loader.js — works in both source and deployed, + // and on Windows where raw paths fail with ERR_UNSUPPORTED_ESM_URL_SCHEME. + assert.ok( + autoSrc.includes('process.env.GSD_PKG_ROOT'), + "auto.ts should use GSD_PKG_ROOT to resolve resource-loader", + ); + assert.ok( + autoSrc.includes('pathToFileURL'), + "auto.ts should convert path to file URL for cross-platform import()", + ); + }); +}); diff --git a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts index 18acf7dd4..3e1a5e109 100644 --- a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts +++ b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts @@ -317,3 +317,48 @@ test("secure_env_collect #2997: null from ctx.ui.custom() is still treated as sk "Key returning null must NOT be in applied list", ); }); + +test("secure_env_collect: falls back to secure input prompt when custom UI is unavailable", async (t) => { + const { collectSecretsFromManifest } = await loadOrchestrator(); + + const tmp = makeTempDir("sec-input-fallback-test"); + t.after(() => { + rmSync(tmp, { recursive: true, force: true }); + }); + + const manifest = makeManifest([ + { key: "SECRET_FROM_INPUT_FALLBACK", status: "pending", formatHint: "starts with sk-" }, + ]); + await writeManifestFile(tmp, manifest); + + let callIndex = 0; + const inputCalls: Array<{ title: string; placeholder?: string; opts?: { secure?: boolean } }> = []; + const mockCtx = { + cwd: tmp, + hasUI: true, + ui: { + custom: async (_factory: any) => { + callIndex++; + if (callIndex <= 1) return null; // summary screen dismiss + return undefined; // collect screen unavailable on this surface + }, + input: async (title: string, placeholder?: string, opts?: { secure?: boolean }) => { + inputCalls.push({ title, placeholder, opts }); + return " sk-test-fallback-value "; + }, + }, + }; + + const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any); + + assert.ok( + result.applied.includes("SECRET_FROM_INPUT_FALLBACK"), + "Fallback input should collect and apply the key", + ); + assert.ok( + !result.skipped.includes("SECRET_FROM_INPUT_FALLBACK"), + "Fallback input should not mark the key as skipped", + ); + assert.equal(inputCalls.length, 1, "Fallback input should be requested once"); + assert.equal(inputCalls[0]?.opts?.secure, true, "Fallback input should request secure entry when supported"); +}); diff --git a/src/resources/extensions/gsd/tests/session-model-override.test.ts b/src/resources/extensions/gsd/tests/session-model-override.test.ts new file mode 100644 index 000000000..a6ca1a31b --- /dev/null +++ b/src/resources/extensions/gsd/tests/session-model-override.test.ts @@ -0,0 +1,35 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +import { + clearSessionModelOverride, + getSessionModelOverride, + setSessionModelOverride, +} from "../session-model-override.js"; + +const phasesSource = readFileSync(join(import.meta.dirname, "..", "auto", "phases.ts"), "utf-8"); + +test("setSessionModelOverride stores provider/model for the session", () => { + const sessionId = `session-override-${Date.now()}`; + setSessionModelOverride(sessionId, { provider: "openai-codex", id: "gpt-5.4" }); + + const override = getSessionModelOverride(sessionId); + assert.equal(override?.provider, "openai-codex"); + assert.equal(override?.id, "gpt-5.4"); +}); + +test("clearSessionModelOverride removes the session override", () => { + const sessionId = `session-clear-${Date.now()}`; + setSessionModelOverride(sessionId, { provider: "anthropic", id: "claude-sonnet-4-6" }); + clearSessionModelOverride(sessionId); + assert.equal(getSessionModelOverride(sessionId), undefined); +}); + +test("auto dispatch threads manual session model override into selectAndApplyModel", () => { + assert.ok( + phasesSource.includes("s.manualSessionModelOverride"), + "auto/phases.ts should pass s.manualSessionModelOverride into selectAndApplyModel", + ); +}); diff --git a/src/resources/extensions/gsd/tests/stale-slice-rows.test.ts b/src/resources/extensions/gsd/tests/stale-slice-rows.test.ts new file mode 100644 index 000000000..8fb39c444 --- /dev/null +++ b/src/resources/extensions/gsd/tests/stale-slice-rows.test.ts @@ -0,0 +1,41 @@ +/** + * stale-slice-rows.test.ts — #3658 + * + * Verify that state.ts contains slice-level status reconciliation that + * updates stale DB rows (status "pending") when disk artifacts (SUMMARY) + * prove the slice is complete. Without this, the dependency resolver builds + * doneSliceIds from stale DB rows and downstream slices stay blocked. + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const sourceFile = join(__dirname, "..", "state.ts"); + +describe("stale slice row reconciliation (#3658)", () => { + const source = readFileSync(sourceFile, "utf-8"); + + test("imports updateSliceStatus from gsd-db", () => { + assert.match(source, /import\s*\{[^}]*updateSliceStatus[^}]*\}\s*from/); + }); + + test("checks isStatusDone before reconciling slice rows", () => { + assert.match(source, /isStatusDone\(dbSlice\.status\)/); + }); + + test("resolves SUMMARY file to detect completed slices on disk", () => { + assert.match(source, /resolveSliceFile\(basePath,\s*mid,\s*dbSlice\.id,\s*["']SUMMARY["']\)/); + }); + + test("calls updateSliceStatus to reconcile stale rows", () => { + assert.match(source, /updateSliceStatus\(mid,\s*dbSlice\.id,\s*["']complete["']\)/); + }); + + test("references issue #3599 in reconciliation comment", () => { + assert.match(source, /#3599/); + }); +}); diff --git a/src/resources/extensions/gsd/tests/start-auto-detached.test.ts b/src/resources/extensions/gsd/tests/start-auto-detached.test.ts new file mode 100644 index 000000000..6726b2616 --- /dev/null +++ b/src/resources/extensions/gsd/tests/start-auto-detached.test.ts @@ -0,0 +1,90 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { resolve } from "node:path"; + +const gsdDir = resolve(import.meta.dirname, ".."); + +function readGsdFile(relativePath: string): string { + return readFileSync(resolve(gsdDir, relativePath), "utf-8"); +} + +test("command entrypoints use startAutoDetached instead of awaiting startAuto (#3733)", () => { + const autoHandlerSrc = readGsdFile("commands/handlers/auto.ts"); + const workflowHandlerSrc = readGsdFile("commands/handlers/workflow.ts"); + const guidedFlowSrc = readGsdFile("guided-flow.ts"); + + assert.ok( + !autoHandlerSrc.includes("await startAuto("), + "auto command handler should not await startAuto from the active agent turn", + ); + assert.ok( + !workflowHandlerSrc.includes("await startAuto("), + "workflow command handler should not await startAuto from the active agent turn", + ); + assert.ok( + !guidedFlowSrc.includes("await startAuto("), + "guided flow should not await startAuto from the active agent turn", + ); + + assert.ok( + autoHandlerSrc.includes("startAutoDetached("), + "auto command handler should launch auto-mode through startAutoDetached", + ); + assert.ok( + workflowHandlerSrc.includes("startAutoDetached("), + "workflow handler should launch auto-mode through startAutoDetached", + ); + assert.ok( + guidedFlowSrc.includes("startAutoDetached("), + "guided flow should launch auto-mode through startAutoDetached", + ); +}); + +test("startAutoDetached reports failures asynchronously (#3733)", () => { + const autoSrc = readGsdFile("auto.ts"); + + assert.ok( + autoSrc.includes("export function startAutoDetached"), + "auto.ts should export startAutoDetached", + ); + assert.ok( + autoSrc.includes("void startAuto(ctx, pi, base, verboseMode, options).catch"), + "startAutoDetached should launch startAuto without awaiting it", + ); + assert.ok( + autoSrc.includes("ctx.ui.notify(`Auto-start failed: ${message}`, \"error\")"), + "startAutoDetached should surface async startup failures to the user", + ); +}); + +test("detached auto-start preserves milestone lock across pause/stop cleanup (#3733)", () => { + const autoSrc = readGsdFile("auto.ts"); + const sessionSrc = readGsdFile("auto/session.ts"); + + assert.ok( + autoSrc.includes("milestoneLock?: string | null"), + "startAuto/startAutoDetached options should carry an explicit milestone lock", + ); + assert.ok( + autoSrc.includes("s.sessionMilestoneLock = options.milestoneLock ?? null;"), + "startAuto should capture the requested milestone lock before async work begins", + ); + assert.ok( + autoSrc.includes("milestoneLock: s.sessionMilestoneLock ?? undefined"), + "pause metadata should persist the detached milestone lock for resume", + ); + assert.ok( + autoSrc.includes("s.sessionMilestoneLock = meta.milestoneLock ?? null;"), + "resume should restore the persisted milestone lock", + ); + assert.ok( + autoSrc.includes("restoreMilestoneLockEnv();"), + "auto cleanup should restore the previous process milestone-lock env", + ); + + assert.ok( + sessionSrc.includes("sessionMilestoneLock: string | null = null;"), + "AutoSession should track the detached milestone lock explicitly", + ); +}); diff --git a/src/resources/extensions/gsd/tests/tool-compatibility.test.ts b/src/resources/extensions/gsd/tests/tool-compatibility.test.ts new file mode 100644 index 000000000..6b533bf63 --- /dev/null +++ b/src/resources/extensions/gsd/tests/tool-compatibility.test.ts @@ -0,0 +1,199 @@ +// GSD-2 — Tool Compatibility + Model Router Tool Filtering Tests (ADR-005 Phases 2-3) +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; + +import { + registerToolCompatibility, + getToolCompatibility, + getAllToolCompatibility, + registerMcpToolCompatibility, + resetToolCompatibilityRegistry, +} from "@gsd/pi-coding-agent"; + +import { + isToolCompatibleWithProvider, + filterToolsForProvider, + adjustToolSet, +} from "../model-router.js"; + +import { + getProviderCapabilities, +} from "@gsd/pi-ai"; + +// ─── Tool Compatibility Registry ──────────────────────────────────────────── + +describe("tool compatibility registry", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("built-in tools are pre-registered", () => { + const builtins = ["bash", "read", "write", "edit", "grep", "find", "ls", "lsp"]; + for (const name of builtins) { + const compat = getToolCompatibility(name); + assert.ok(compat !== undefined, `${name} should be pre-registered`); + } + }); + + test("unknown tool returns undefined", () => { + assert.equal(getToolCompatibility("nonexistent_tool_xyz"), undefined); + }); + + test("registerToolCompatibility stores and retrieves metadata", () => { + registerToolCompatibility("screenshot_tool", { + producesImages: true, + minCapabilityTier: "standard", + }); + const compat = getToolCompatibility("screenshot_tool"); + assert.ok(compat); + assert.equal(compat.producesImages, true); + assert.equal(compat.minCapabilityTier, "standard"); + }); + + test("registerMcpToolCompatibility sets default schema features", () => { + registerMcpToolCompatibility("mcp__test__tool"); + const compat = getToolCompatibility("mcp__test__tool"); + assert.ok(compat); + assert.ok(compat.schemaFeatures?.includes("patternProperties")); + }); + + test("registerMcpToolCompatibility allows overrides", () => { + registerMcpToolCompatibility("mcp__test__override", { producesImages: true }); + const compat = getToolCompatibility("mcp__test__override"); + assert.ok(compat); + assert.equal(compat.producesImages, true); + assert.ok(compat.schemaFeatures?.includes("patternProperties")); + }); + + test("getAllToolCompatibility returns all entries", () => { + const all = getAllToolCompatibility(); + assert.ok(all.size >= 10); // at least built-in tools + assert.ok(all.has("bash")); + assert.ok(all.has("read")); + }); + + test("resetToolCompatibilityRegistry clears custom entries but keeps builtins", () => { + registerToolCompatibility("custom_tool", { producesImages: true }); + assert.ok(getToolCompatibility("custom_tool")); + resetToolCompatibilityRegistry(); + assert.equal(getToolCompatibility("custom_tool"), undefined); + assert.ok(getToolCompatibility("bash")); // built-in preserved + }); +}); + +// ─── isToolCompatibleWithProvider ─────────────────────────────────────────── + +describe("isToolCompatibleWithProvider", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("tool without compatibility metadata is always compatible", () => { + const caps = getProviderCapabilities("anthropic-messages"); + assert.equal(isToolCompatibleWithProvider("unknown_tool", caps), true); + }); + + test("built-in tools are compatible with all providers", () => { + const providers = ["anthropic-messages", "openai-responses", "google-generative-ai", "mistral-conversations"]; + const tools = ["bash", "read", "write", "edit"]; + for (const api of providers) { + const caps = getProviderCapabilities(api); + for (const tool of tools) { + assert.equal( + isToolCompatibleWithProvider(tool, caps), + true, + `${tool} should be compatible with ${api}`, + ); + } + } + }); + + test("image-producing tool filtered for providers without image support", () => { + registerToolCompatibility("screenshot", { producesImages: true }); + const openaiCaps = getProviderCapabilities("openai-responses"); + assert.equal(isToolCompatibleWithProvider("screenshot", openaiCaps), false); + + const anthropicCaps = getProviderCapabilities("anthropic-messages"); + assert.equal(isToolCompatibleWithProvider("screenshot", anthropicCaps), true); + }); + + test("tool with unsupported schema features filtered for Google", () => { + registerToolCompatibility("complex_schema_tool", { + schemaFeatures: ["patternProperties"], + }); + const googleCaps = getProviderCapabilities("google-generative-ai"); + assert.equal(isToolCompatibleWithProvider("complex_schema_tool", googleCaps), false); + + const anthropicCaps = getProviderCapabilities("anthropic-messages"); + assert.equal(isToolCompatibleWithProvider("complex_schema_tool", anthropicCaps), true); + }); +}); + +// ─── filterToolsForProvider ───────────────────────────────────────────────── + +describe("filterToolsForProvider", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("all built-in tools pass for any provider", () => { + const toolNames = ["bash", "read", "write", "edit", "grep", "find", "ls"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "mistral-conversations"); + assert.deepEqual(compatible, toolNames); + assert.deepEqual(filtered, []); + }); + + test("image tool filtered for OpenAI Responses", () => { + registerToolCompatibility("browser_screenshot", { producesImages: true }); + const toolNames = ["bash", "read", "browser_screenshot"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "openai-responses"); + assert.deepEqual(compatible, ["bash", "read"]); + assert.deepEqual(filtered, ["browser_screenshot"]); + }); + + test("MCP tool with patternProperties filtered for Google", () => { + registerMcpToolCompatibility("mcp__repowise__search"); + const toolNames = ["bash", "read", "mcp__repowise__search"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "google-generative-ai"); + assert.deepEqual(compatible, ["bash", "read"]); + assert.deepEqual(filtered, ["mcp__repowise__search"]); + }); + + test("unknown provider passes all tools (permissive default)", () => { + registerToolCompatibility("image_tool", { producesImages: true }); + registerMcpToolCompatibility("mcp_tool"); + const toolNames = ["bash", "image_tool", "mcp_tool"]; + const { compatible, filtered } = filterToolsForProvider(toolNames, "unknown-provider-xyz"); + assert.deepEqual(compatible, toolNames); + assert.deepEqual(filtered, []); + }); +}); + +// ─── adjustToolSet ────────────────────────────────────────────────────────── + +describe("adjustToolSet", () => { + beforeEach(() => { + resetToolCompatibilityRegistry(); + }); + + test("returns all tools for Anthropic (most permissive)", () => { + registerToolCompatibility("screenshot", { producesImages: true }); + const toolNames = ["bash", "read", "screenshot"]; + const { toolNames: result, removedTools } = adjustToolSet(toolNames, "anthropic-messages"); + assert.deepEqual(result, toolNames); + assert.deepEqual(removedTools, []); + }); + + test("removes incompatible tools and reports them", () => { + registerToolCompatibility("screenshot", { producesImages: true }); + registerMcpToolCompatibility("mcp_complex"); + const toolNames = ["bash", "read", "screenshot", "mcp_complex"]; + const { toolNames: result, removedTools } = adjustToolSet(toolNames, "google-generative-ai"); + // Google supports images but not patternProperties + assert.ok(result.includes("bash")); + assert.ok(result.includes("read")); + assert.ok(result.includes("screenshot")); // Google supports images + assert.ok(!result.includes("mcp_complex")); // patternProperties not supported + assert.deepEqual(removedTools, ["mcp_complex"]); + }); +}); diff --git a/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts b/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts index 5a2cdfa58..c802e91a5 100644 --- a/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts +++ b/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts @@ -61,6 +61,13 @@ describe("#2883: isToolInvocationError classification", () => { ); }); + test("detects Node v18+ JSON parse variant with property-value text", () => { + assert.equal( + isToolInvocationError("Expected ',' or '}' after property value in JSON at position 4096"), + true, + ); + }); + test("detects Unexpected end of JSON input", () => { assert.equal( isToolInvocationError("Unexpected end of JSON input"), diff --git a/src/resources/extensions/gsd/tests/validate-directory.test.ts b/src/resources/extensions/gsd/tests/validate-directory.test.ts index 72c45be38..c86e08a80 100644 --- a/src/resources/extensions/gsd/tests/validate-directory.test.ts +++ b/src/resources/extensions/gsd/tests/validate-directory.test.ts @@ -74,6 +74,27 @@ test("validateDirectory: C:\\Windows is blocked", { skip: !isWindows ? "Windows- assert.equal(result.severity, "blocked"); }); +test("validateDirectory: D:\\Windows is blocked", { skip: !isWindows ? "Windows-only test" : undefined }, () => { + const result = validateDirectory("D:\\Windows"); + assert.equal(result.safe, false); + assert.equal(result.severity, "blocked"); + assert.ok(result.reason?.includes("system directory")); +}); + +test("validateDirectory: E:\\Program Files is blocked", { skip: !isWindows ? "Windows-only test" : undefined }, () => { + const result = validateDirectory("E:\\Program Files"); + assert.equal(result.safe, false); + assert.equal(result.severity, "blocked"); + assert.ok(result.reason?.includes("system directory")); +}); + +test("validateDirectory: any Windows drive root is blocked", { skip: !isWindows ? "Windows-only test" : undefined }, () => { + const result = validateDirectory("D:\\"); + assert.equal(result.safe, false); + assert.equal(result.severity, "blocked"); + assert.ok(result.reason?.includes("system directory")); +}); + // ─── Home directory (cross-platform) ───────────────────────────────────────────── test("validateDirectory: home directory itself is blocked", () => { @@ -104,7 +125,13 @@ test("validateDirectory: subdirectory of home is NOT blocked", () => { // Regression test for #1317: GSD worktree inside $HOME must not be blocked even // when the resolved project root equals $HOME (e.g. home dir is a git repo). test("validateDirectory: GSD worktree path nested under home is NOT blocked (#1317)", () => { + const originalHome = process.env.HOME; + const originalUserProfile = process.env.USERPROFILE; + const fakeHome = makeTempDir("fake-home"); + process.env.HOME = fakeHome; + process.env.USERPROFILE = fakeHome; const worktreePath = join(homedir(), ".gsd", "worktrees", "M001"); + const worktreeRoot = join(fakeHome, ".gsd", "worktrees", "M001"); mkdirSync(worktreePath, { recursive: true }); try { // The worktree CWD itself is a valid location — it must pass. @@ -112,7 +139,12 @@ test("validateDirectory: GSD worktree path nested under home is NOT blocked (#13 assert.equal(result.safe, true, "GSD worktree path should be safe to run in"); assert.equal(result.severity, "ok"); } finally { - rmSync(join(homedir(), ".gsd", "worktrees", "M001"), { recursive: true, force: true }); + if (originalHome === undefined) delete process.env.HOME; + else process.env.HOME = originalHome; + if (originalUserProfile === undefined) delete process.env.USERPROFILE; + else process.env.USERPROFILE = originalUserProfile; + rmSync(worktreeRoot, { recursive: true, force: true }); + rmSync(fakeHome, { recursive: true, force: true }); } }); diff --git a/src/resources/extensions/gsd/tests/validate-milestone-prompt-verification-classes.test.ts b/src/resources/extensions/gsd/tests/validate-milestone-prompt-verification-classes.test.ts new file mode 100644 index 000000000..df08568f3 --- /dev/null +++ b/src/resources/extensions/gsd/tests/validate-milestone-prompt-verification-classes.test.ts @@ -0,0 +1,18 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const promptPath = join(process.cwd(), "src/resources/extensions/gsd/prompts/validate-milestone.md"); +const prompt = readFileSync(promptPath, "utf-8"); + +test("validate-milestone reviewer C requires canonical verification class names", () => { + assert.match(prompt, /\*\*Reviewer C[\s\S]*Verification Classes/i); + assert.match(prompt, /exact class names [`']?Contract[`']?, [`']?Integration[`']?, [`']?Operational[`']?, and [`']?UAT[`']?/i); + assert.match(prompt, /If no verification classes were planned, say that explicitly/i); +}); + +test("validate-milestone prompt routes verification class analysis into verificationClasses", () => { + assert.match(prompt, /pass it in `verificationClasses`/i); + assert.match(prompt, /Extract the `Verification Classes` subsection from Reviewer C and pass it verbatim in `verificationClasses`/); +}); diff --git a/src/resources/extensions/gsd/tests/validate-milestone.test.ts b/src/resources/extensions/gsd/tests/validate-milestone.test.ts index 569abd796..7ba062229 100644 --- a/src/resources/extensions/gsd/tests/validate-milestone.test.ts +++ b/src/resources/extensions/gsd/tests/validate-milestone.test.ts @@ -9,10 +9,11 @@ import { deriveState, isValidationTerminal } from "../state.ts"; import { resolveExpectedArtifactPath, diagnoseExpectedArtifact } from "../auto-artifact-paths.ts"; import { verifyExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.ts"; import { resolveDispatch, type DispatchContext } from "../auto-dispatch.ts"; -import { buildValidateMilestonePrompt } from "../auto-prompts.ts"; +import { buildCompleteMilestonePrompt, buildValidateMilestonePrompt } from "../auto-prompts.ts"; import type { GSDState } from "../types.ts"; import { clearPathCache } from "../paths.ts"; import { clearParseCache } from "../files.ts"; +import { closeDatabase, insertMilestone, insertSlice, openDatabase } from "../gsd-db.ts"; // ─── Helpers ────────────────────────────────────────────────────────────── @@ -25,9 +26,15 @@ function makeTmpBase(): string { function cleanup(base: string): void { clearPathCache(); clearParseCache(); + closeDatabase(); try { rmSync(base, { recursive: true, force: true }); } catch { /* */ } } +function openTestDb(base: string): void { + const dbPath = join(base, ".gsd", "gsd.db"); + assert.equal(openDatabase(dbPath), true, "test DB should open"); +} + function writeRoadmap(base: string, mid: string, content: string): void { const dir = join(base, ".gsd", "milestones", mid); mkdirSync(dir, { recursive: true }); @@ -218,6 +225,85 @@ test("buildValidateMilestonePrompt inlines ASSESSMENT evidence instead of UAT sp } }); +test("buildCompleteMilestonePrompt skips skipped slices from DB-backed summary inlining", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", `# M001: Test Milestone + +## Vision +Test + +## Success Criteria +- It works + +## Slices + +- [x] **S01: First slice** \`risk:low\` \`depends:[]\` + > Done +- [ ] **S02: Skipped slice** \`risk:low\` \`depends:[]\` + > Intentionally skipped + +## Boundary Map + +| From | To | Produces | Consumes | +|------|-----|----------|----------| +| S01 | terminal | output | nothing | +`); + openTestDb(base); + insertMilestone({ id: "M001", title: "Test Milestone", status: "active" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "First slice", status: "complete", depends: [], sequence: 1 }); + insertSlice({ id: "S02", milestoneId: "M001", title: "Skipped slice", status: "skipped", depends: [], sequence: 2 }); + writeSliceSummary(base, "M001", "S01", "# S01 Summary\nDelivered."); + + const prompt = await buildCompleteMilestonePrompt("M001", "Test Milestone", base); + assert.match(prompt, /S01 Summary/i, "prompt should inline non-skipped slice summaries"); + assert.doesNotMatch(prompt, /### S02 Summary/i, "prompt should not inline skipped slice summaries"); + assert.doesNotMatch(prompt, /not found — file does not exist yet/i, "prompt should not emit skipped-slice missing-file placeholders"); + } finally { + cleanup(base); + } +}); + +test("buildValidateMilestonePrompt skips skipped slices from DB-backed summary inlining", async () => { + const base = makeTmpBase(); + try { + writeRoadmap(base, "M001", `# M001: Test Milestone + +## Vision +Test + +## Success Criteria +- It works + +## Slices + +- [x] **S01: First slice** \`risk:low\` \`depends:[]\` + > Done +- [ ] **S02: Skipped slice** \`risk:low\` \`depends:[]\` + > Intentionally skipped + +## Boundary Map + +| From | To | Produces | Consumes | +|------|-----|----------|----------| +| S01 | terminal | output | nothing | +`); + openTestDb(base); + insertMilestone({ id: "M001", title: "Test Milestone", status: "active" }); + insertSlice({ id: "S01", milestoneId: "M001", title: "First slice", status: "complete", depends: [], sequence: 1 }); + insertSlice({ id: "S02", milestoneId: "M001", title: "Skipped slice", status: "skipped", depends: [], sequence: 2 }); + writeSliceSummary(base, "M001", "S01", "# S01 Summary\nDelivered."); + writeSliceAssessment(base, "M001", "S01", "---\nverdict: PASS\n---\n# Assessment\nEvidence captured."); + + const prompt = await buildValidateMilestonePrompt("M001", "Test Milestone", base); + assert.match(prompt, /S01 Summary/i, "prompt should inline non-skipped slice summaries"); + assert.doesNotMatch(prompt, /### S02 Summary/i, "prompt should not inline skipped slice summaries"); + assert.doesNotMatch(prompt, /not found — file does not exist yet/i, "prompt should not emit skipped-slice missing-file placeholders"); + } finally { + cleanup(base); + } +}); + // ─── Dispatch rule ──────────────────────────────────────────────────────── test("dispatch rule matches validating-milestone phase", async () => { diff --git a/src/resources/extensions/gsd/tests/workflow-mcp-auto-prep.test.ts b/src/resources/extensions/gsd/tests/workflow-mcp-auto-prep.test.ts new file mode 100644 index 000000000..fabb15c3a --- /dev/null +++ b/src/resources/extensions/gsd/tests/workflow-mcp-auto-prep.test.ts @@ -0,0 +1,76 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { prepareWorkflowMcpForProject, shouldAutoPrepareWorkflowMcp } from "../workflow-mcp-auto-prep.ts"; + +test("shouldAutoPrepareWorkflowMcp enables prep for externalCli local transport", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "claude-code", baseUrl: "local://claude-code" }, + modelRegistry: { + getProviderAuthMode: () => "externalCli", + isProviderRequestReady: () => false, + }, + }); + + assert.equal(result, true); +}); + +test("shouldAutoPrepareWorkflowMcp enables prep when claude-code provider is ready", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "openai", baseUrl: "https://api.openai.com" }, + modelRegistry: { + getProviderAuthMode: () => "apiKey", + isProviderRequestReady: (provider: string) => provider === "claude-code", + }, + }); + + assert.equal(result, true); +}); + +test("shouldAutoPrepareWorkflowMcp enables prep when claude-code provider is registered", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "openai", baseUrl: "https://api.openai.com" }, + modelRegistry: { + getProviderAuthMode: (provider: string) => provider === "claude-code" ? "externalCli" : "apiKey", + isProviderRequestReady: () => false, + }, + }); + + assert.equal(result, true); +}); + +test("shouldAutoPrepareWorkflowMcp stays disabled when neither transport nor provider readiness match", () => { + const result = shouldAutoPrepareWorkflowMcp({ + model: { provider: "openai", baseUrl: "https://api.openai.com" }, + modelRegistry: { + getProviderAuthMode: () => "apiKey", + isProviderRequestReady: () => false, + }, + }); + + assert.equal(result, false); +}); + +test("prepareWorkflowMcpForProject warns with /gsd mcp init guidance when prep fails", () => { + const notifications: Array<{ message: string; level: "info" | "warning" | "error" | "success" }> = []; + const result = prepareWorkflowMcpForProject( + { + model: { provider: "claude-code", baseUrl: "local://claude-code" }, + modelRegistry: { + getProviderAuthMode: () => "externalCli", + isProviderRequestReady: () => true, + }, + ui: { + notify: (message: string, level?: "info" | "warning" | "error" | "success") => { + notifications.push({ message, level: level ?? "info" }); + }, + }, + }, + "/", + ); + + assert.equal(result, null); + assert.equal(notifications.length, 1); + assert.equal(notifications[0].level, "warning"); + assert.match(notifications[0].message, /Please run \/gsd mcp init \./); +}); diff --git a/src/resources/extensions/gsd/tests/workflow-mcp.test.ts b/src/resources/extensions/gsd/tests/workflow-mcp.test.ts index fb91a1b94..2b9687623 100644 --- a/src/resources/extensions/gsd/tests/workflow-mcp.test.ts +++ b/src/resources/extensions/gsd/tests/workflow-mcp.test.ts @@ -6,6 +6,7 @@ import { tmpdir } from "node:os"; import { fileURLToPath } from "node:url"; import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; +import { ElicitRequestSchema } from "@modelcontextprotocol/sdk/types.js"; import { buildWorkflowMcpServers, @@ -13,16 +14,27 @@ import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForAutoUnit, getRequiredWorkflowToolsForGuidedUnit, + supportsStructuredQuestions, usesWorkflowMcpTransport, } from "../workflow-mcp.ts"; const __dirname = dirname(fileURLToPath(import.meta.url)); const gsdDir = join(__dirname, ".."); +type ElicitPayload = { + message: string; + requestedSchema: { properties: Record; required?: string[] }; +}; + function readSrc(file: string): string { return readFileSync(join(gsdDir, file), "utf-8"); } +function extractElicitPayload(request: unknown): ElicitPayload { + const payload = (request as { params?: unknown }).params ?? request; + return payload as ElicitPayload; +} + test("guided execute-task requires canonical task completion tool", () => { assert.deepEqual(getRequiredWorkflowToolsForGuidedUnit("execute-task"), ["gsd_task_complete"]); }); @@ -141,7 +153,11 @@ test("detectWorkflowMcpLaunchConfig resolves the bundled server relative to the assert.match(launch?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.(js|ts)$/); assert.match(launch?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.(js|ts)$/); assert.equal(typeof launch?.args?.[0], "string"); - assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\]dist[\/\\]cli\.js$/); + assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\](dist[\/\\]cli\.js|src[\/\\]cli\.ts)$/); + if ((launch?.args?.[0] ?? "").endsWith(".ts")) { + assert.match(launch?.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(launch?.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } }); test("detectWorkflowMcpLaunchConfig resolves the bundled server relative to the package without env hints", () => { @@ -154,7 +170,11 @@ test("detectWorkflowMcpLaunchConfig resolves the bundled server relative to the assert.match(launch?.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", /workflow-tool-executors\.(js|ts)$/); assert.match(launch?.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", /write-gate\.(js|ts)$/); assert.equal(typeof launch?.args?.[0], "string"); - assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\]dist[\/\\]cli\.js$/); + assert.match(launch?.args?.[0] ?? "", /packages[\/\\]mcp-server[\/\\](dist[\/\\]cli\.js|src[\/\\]cli\.ts)$/); + if ((launch?.args?.[0] ?? "").endsWith(".ts")) { + assert.match(launch?.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(launch?.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } }); test("workflow MCP launch config reaches mutation tools over stdio", async () => { @@ -165,14 +185,37 @@ test("workflow MCP launch config reaches mutation tools over stdio", async () => assert.ok(launch, "expected a workflow MCP launch config"); assert.match( launch.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "", - /dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]tools[\/\\]workflow-tool-executors\.js$/, + /(dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]tools[\/\\]workflow-tool-executors\.js|src[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]tools[\/\\]workflow-tool-executors\.(js|ts))$/, ); assert.match( launch.env?.GSD_WORKFLOW_WRITE_GATE_MODULE ?? "", - /dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]bootstrap[\/\\]write-gate\.js$/, + /(dist[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]bootstrap[\/\\]write-gate\.js|src[\/\\]resources[\/\\]extensions[\/\\]gsd[\/\\]bootstrap[\/\\]write-gate\.(js|ts))$/, ); + if ((launch.env?.GSD_WORKFLOW_EXECUTORS_MODULE ?? "").endsWith(".ts")) { + assert.match(launch.env?.NODE_OPTIONS ?? "", /--experimental-strip-types/); + assert.match(launch.env?.NODE_OPTIONS ?? "", /resolve-ts\.mjs/); + } - const client = new Client({ name: "workflow-mcp-transport-test", version: "1.0.0" }); + const client = new Client( + { name: "workflow-mcp-transport-test", version: "1.0.0" }, + { capabilities: { elicitation: {} } }, + ); + client.setRequestHandler(ElicitRequestSchema, async (request) => { + const elicitation = extractElicitPayload(request as unknown); + + assert.match(elicitation.message, /Please answer the following question/); + assert.ok(elicitation.requestedSchema.properties.transport_mode); + assert.ok(elicitation.requestedSchema.properties["transport_mode__note"]); + assert.ok(elicitation.requestedSchema.required?.includes("transport_mode")); + + return { + action: "accept", + content: { + transport_mode: "None of the above", + transport_mode__note: "Need Windows-safe MCP elicitation.", + }, + }; + }); const transport = new StdioClientTransport({ command: launch.command, args: launch.args, @@ -189,6 +232,42 @@ test("workflow MCP launch config reaches mutation tools over stdio", async () => (tools.tools ?? []).some((tool) => tool.name === "gsd_plan_slice"), "expected workflow MCP surface to expose gsd_plan_slice", ); + assert.ok( + (tools.tools ?? []).some((tool) => tool.name === "ask_user_questions"), + "expected workflow MCP surface to expose ask_user_questions", + ); + + const askResult = await client.callTool( + { + name: "ask_user_questions", + arguments: { + questions: [ + { + id: "transport_mode", + header: "Transport", + question: "How should the workflow prompt be delivered?", + options: [ + { label: "Local UI", description: "Use the host tool UI." }, + { label: "Remote UI", description: "Use a remote response channel." }, + ], + }, + ], + }, + }, + undefined, + { timeout: 30_000 }, + ); + assert.equal(askResult.isError, undefined); + assert.equal( + ((askResult.content as Array<{ text?: string }>)?.[0])?.text ?? "", + JSON.stringify({ + answers: { + transport_mode: { + answers: ["None of the above", "user_note: Need Windows-safe MCP elicitation."], + }, + }, + }), + ); const milestoneResult = await client.callTool( { @@ -269,12 +348,123 @@ test("workflow MCP launch config reaches mutation tools over stdio", async () => } }); +test("workflow MCP ask_user_questions uses stdio elicitation round-trip", async () => { + const projectRoot = mkdtempSync(join(tmpdir(), "gsd-workflow-elicit-")); + mkdirSync(join(projectRoot, ".gsd"), { recursive: true }); + + const launch = detectWorkflowMcpLaunchConfig(projectRoot, {}); + assert.ok(launch, "expected a workflow MCP launch config"); + + const client = new Client( + { name: "workflow-mcp-elicit-test", version: "1.0.0" }, + { capabilities: { elicitation: {} } }, + ); + let requestSeen: { + message: string; + requestedSchema: { properties: Record; required?: string[] }; + } | null = null; + + client.setRequestHandler(ElicitRequestSchema, async (request) => { + const params = extractElicitPayload(request as unknown); + + requestSeen = params; + + return { + action: "accept", + content: { + deployment: "None of the above", + deployment__note: "Need hybrid deployment.", + }, + }; + }); + + const transport = new StdioClientTransport({ + command: launch.command, + args: launch.args, + env: { ...process.env, ...launch.env } as Record, + cwd: launch.cwd, + stderr: "pipe", + }); + + try { + await client.connect(transport, { timeout: 30_000 }); + + const result = await client.callTool( + { + name: "ask_user_questions", + arguments: { + questions: [ + { + id: "deployment", + header: "Deploy", + question: "Where will this run?", + options: [ + { label: "Cloud", description: "Managed hosting." }, + { label: "On-prem", description: "Runs in customer infrastructure." }, + ], + }, + ], + }, + }, + undefined, + { timeout: 30_000 }, + ); + + assert.ok(requestSeen, "expected stdio transport to forward an elicitation request"); + const seen = requestSeen as ElicitPayload; + assert.match(seen.message, /Please answer the following question/); + assert.ok(seen.requestedSchema.properties.deployment); + assert.ok(seen.requestedSchema.properties.deployment__note); + assert.ok(seen.requestedSchema.required?.includes("deployment")); + + const content = (result as { content: Array<{ type: string; text?: string }> }).content; + const text = content.find((item: { type: string; text?: string }) => item.type === "text"); + assert.ok(text && "text" in text); + assert.equal( + text.text, + JSON.stringify({ + answers: { + deployment: { + answers: ["None of the above", "user_note: Need hybrid deployment."], + }, + }, + }), + ); + } finally { + await client.close(); + } +}); + test("usesWorkflowMcpTransport matches local externalCli providers", () => { assert.equal(usesWorkflowMcpTransport("externalCli", "local://claude-code"), true); assert.equal(usesWorkflowMcpTransport("externalCli", "https://api.example.com"), false); assert.equal(usesWorkflowMcpTransport("oauth", "local://custom"), false); }); +test("supportsStructuredQuestions disables structured ask flow on workflow MCP transports", () => { + assert.equal( + supportsStructuredQuestions(["ask_user_questions"], { + authMode: "externalCli", + baseUrl: "local://claude-code", + }), + false, + ); + assert.equal( + supportsStructuredQuestions(["ask_user_questions"], { + authMode: "oauth", + baseUrl: "https://api.anthropic.com", + }), + true, + ); + assert.equal( + supportsStructuredQuestions([], { + authMode: "oauth", + baseUrl: "https://api.anthropic.com", + }), + false, + ); +}); + test("transport compatibility passes when required tools fit current MCP surface", () => { const error = getWorkflowTransportSupportError( "claude-code", @@ -465,18 +655,18 @@ test("transport compatibility now allows replan-slice over workflow MCP surface" test("transport compatibility still blocks units whose MCP tools are not exposed", () => { const error = getWorkflowTransportSupportError( "claude-code", - ["gsd_skip_slice"], + ["secure_env_collect"], { projectRoot: "/tmp/project", env: { GSD_WORKFLOW_MCP_COMMAND: "node" }, surface: "auto-mode", - unitType: "skip-slice", + unitType: "guided-discussion", authMode: "externalCli", baseUrl: "local://claude-code", }, ); - assert.match(error ?? "", /requires gsd_skip_slice/); + assert.match(error ?? "", /requires secure_env_collect/); assert.match(error ?? "", /currently exposes only/); }); @@ -498,3 +688,8 @@ test("auto phases source enforces workflow compatibility preflight", () => { assert.match(src, /getWorkflowTransportSupportError/); assert.match(src, /workflow-capability/); }); + +test("workflow transport error guidance includes /gsd mcp init hint", () => { + const src = readSrc("workflow-mcp.ts"); + assert.match(src, /Please run \/gsd mcp init \./); +}); diff --git a/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts b/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts index 06c01c419..327f51759 100644 --- a/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts +++ b/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts @@ -256,6 +256,28 @@ test("executePlanSlice writes task planning state and rendered plan artifacts", } }); +test("executePlanSlice marks validation failures with isError", async () => { + const base = makeTmpBase(); + try { + openTestDb(base); + + const result = await inProjectDir(base, () => executePlanSlice({ + milestoneId: "M001", + sliceId: "S01", + goal: "Trigger validation failure for empty tasks.", + tasks: [], + }, base)); + + assert.equal(result.isError, true); + assert.equal(result.details.operation, "plan_slice"); + assert.match(String(result.details.error), /validation failed: tasks must be a non-empty array/); + assert.match(result.content[0].text, /Error planning slice:/); + } finally { + closeDatabase(); + cleanup(base); + } +}); + test("executeSliceComplete coerces string enrichment entries and writes summary/UAT artifacts", async () => { const base = makeTmpBase(); try { diff --git a/src/resources/extensions/gsd/tests/write-gate.test.ts b/src/resources/extensions/gsd/tests/write-gate.test.ts index 48c0c5524..04b2c4603 100644 --- a/src/resources/extensions/gsd/tests/write-gate.test.ts +++ b/src/resources/extensions/gsd/tests/write-gate.test.ts @@ -230,16 +230,13 @@ import { // ─── Scenario 19: isGateQuestionId recognizes all gate patterns ── test('write-gate: isGateQuestionId recognizes all gate patterns', () => { - assert.strictEqual(isGateQuestionId('layer1_scope_gate'), true); - assert.strictEqual(isGateQuestionId('layer2_architecture_gate'), true); - assert.strictEqual(isGateQuestionId('layer3_error_gate'), true); - assert.strictEqual(isGateQuestionId('layer4_quality_gate'), true); assert.strictEqual(isGateQuestionId('depth_verification'), true); assert.strictEqual(isGateQuestionId('depth_verification_M002'), true); - assert.strictEqual(isGateQuestionId('my_layer1_scope_gate_question'), true); + assert.strictEqual(isGateQuestionId('depth_verification_confirm'), true); // Non-gate question IDs assert.strictEqual(isGateQuestionId('project_intent'), false); assert.strictEqual(isGateQuestionId('feature_priority'), false); + assert.strictEqual(isGateQuestionId('layer1_scope_gate'), false); assert.strictEqual(isGateQuestionId(''), false); }); @@ -249,14 +246,14 @@ test('write-gate: pending gate lifecycle (set, get, clear)', () => { clearDiscussionFlowState(); assert.strictEqual(getPendingGate(), null, 'starts null'); - setPendingGate('layer1_scope_gate'); - assert.strictEqual(getPendingGate(), 'layer1_scope_gate', 'set correctly'); + setPendingGate('depth_verification'); + assert.strictEqual(getPendingGate(), 'depth_verification', 'set correctly'); clearPendingGate(); assert.strictEqual(getPendingGate(), null, 'cleared correctly'); // clearDiscussionFlowState also clears pending gate - setPendingGate('layer2_architecture_gate'); + setPendingGate('depth_verification_M002'); clearDiscussionFlowState(); assert.strictEqual(getPendingGate(), null, 'clearDiscussionFlowState clears pending gate'); }); @@ -265,12 +262,12 @@ test('write-gate: pending gate lifecycle (set, get, clear)', () => { test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer1_scope_gate'); + setPendingGate('depth_verification'); // write should be blocked during discussion const writeResult = shouldBlockPendingGate('write', 'M001', false); assert.strictEqual(writeResult.block, true, 'write should be blocked'); - assert.ok(writeResult.reason!.includes('layer1_scope_gate'), 'reason mentions the gate'); + assert.ok(writeResult.reason!.includes('depth_verification'), 'reason mentions the gate'); // edit should be blocked const editResult = shouldBlockPendingGate('edit', 'M001', false); @@ -287,7 +284,7 @@ test('write-gate: shouldBlockPendingGate blocks write/edit during pending gate', test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer1_scope_gate'); + setPendingGate('depth_verification'); // ask_user_questions is always safe (model needs to re-ask) assert.strictEqual(shouldBlockPendingGate('ask_user_questions', 'M001').block, false); @@ -304,7 +301,7 @@ test('write-gate: shouldBlockPendingGate allows read-only and ask_user_questions test('write-gate: shouldBlockPendingGate blocks outside discussion when a gate is pending', () => { clearDiscussionFlowState(); - setPendingGate('layer1_scope_gate'); + setPendingGate('depth_verification'); // No milestoneId and no queue phase — still block because the gate is pending const result = shouldBlockPendingGate('write', null, false); @@ -330,7 +327,7 @@ test('write-gate: shouldBlockPendingGate blocks in queue mode when gate is pendi test('write-gate: shouldBlockPendingGateBash allows read-only commands during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer2_architecture_gate'); + setPendingGate('depth_verification'); assert.strictEqual(shouldBlockPendingGateBash('cat file.txt', 'M001').block, false); assert.strictEqual(shouldBlockPendingGateBash('git log --oneline', 'M001').block, false); @@ -344,11 +341,11 @@ test('write-gate: shouldBlockPendingGateBash allows read-only commands during pe test('write-gate: shouldBlockPendingGateBash blocks mutating commands during pending gate', () => { clearDiscussionFlowState(); - setPendingGate('layer2_architecture_gate'); + setPendingGate('depth_verification'); const result = shouldBlockPendingGateBash('npm run build', 'M001'); assert.strictEqual(result.block, true, 'mutating bash should be blocked'); - assert.ok(result.reason!.includes('layer2_architecture_gate')); + assert.ok(result.reason!.includes('depth_verification')); clearDiscussionFlowState(); }); @@ -365,7 +362,7 @@ test('write-gate: no pending gate means no blocking', () => { // ─── Scenario 28: resetWriteGateState clears pending gate ── test('write-gate: resetWriteGateState clears pending gate', () => { - setPendingGate('layer3_error_gate'); + setPendingGate('depth_verification'); resetWriteGateState(); assert.strictEqual(getPendingGate(), null); }); diff --git a/src/resources/extensions/gsd/tools/complete-slice.ts b/src/resources/extensions/gsd/tools/complete-slice.ts index 5863a586f..6e9c06e61 100644 --- a/src/resources/extensions/gsd/tools/complete-slice.ts +++ b/src/resources/extensions/gsd/tools/complete-slice.ts @@ -21,7 +21,10 @@ import { getMilestone, updateSliceStatus, setSliceSummaryMd, + saveGateResult, + getPendingGatesForTurn, } from "../gsd-db.js"; +import { getGatesForTurn } from "../gate-registry.js"; import { resolveSliceFile, resolveSlicePath, clearPathCache } from "../paths.js"; import { checkOwnership, sliceUnitKey } from "../unit-ownership.js"; import { saveFile, clearParseCache } from "../files.js"; @@ -39,6 +42,23 @@ export interface CompleteSliceResult { uatPath: string; } +/** + * Map a complete-slice-owned gate id to the CompleteSliceParams field + * whose presence drives `pass` vs. `omitted`. Keep this in lockstep with + * the gates declared in gate-registry.ts under ownerTurn "complete-slice". + */ +function sliceGateFieldForId( + id: string, + params: CompleteSliceParams, +): string | undefined { + switch (id) { + case "Q8": + return params.operationalReadiness; + default: + return undefined; + } +} + /** * Render slice summary markdown matching the template format. * YAML frontmatter uses snake_case keys for parseSummary() compatibility. @@ -169,6 +189,10 @@ ${reqSurfaced} ${reqInvalidated} +## Operational Readiness + +${params.operationalReadiness?.trim() || "None."} + ## Deviations ${params.deviations || "None."} @@ -330,6 +354,45 @@ export async function handleCompleteSlice( // Store rendered markdown in DB for D004 recovery setSliceSummaryMd(params.milestoneId, params.sliceId, summaryMd, uatMd); + // ── Close gates owned by complete-slice (Q8) ─────────────────────────── + // Each owned gate maps to a specific summary section via the registry. + // If the caller populated the corresponding field, record `pass`; if the + // field is empty, record `omitted`. Without this loop, Q8 would stay + // pending forever and block future state derivation (see gate-registry). + try { + const pendingGates = getPendingGatesForTurn( + params.milestoneId, + params.sliceId, + "complete-slice", + ); + if (pendingGates.length > 0) { + const ownedDefs = new Map(getGatesForTurn("complete-slice").map((g) => [g.id, g] as const)); + for (const row of pendingGates) { + const def = ownedDefs.get(row.gate_id); + if (!def) continue; + // Map gate id → param field it maps to. Keep the map local so + // adding a new complete-slice gate is a single place change. + const field = sliceGateFieldForId(def.id, params); + const hasContent = typeof field === "string" && field.trim().length > 0; + saveGateResult({ + milestoneId: params.milestoneId, + sliceId: params.sliceId, + gateId: def.id, + verdict: hasContent ? "pass" : "omitted", + rationale: hasContent + ? `${def.promptSection} section populated in slice summary` + : `${def.promptSection} section left empty — recorded as omitted`, + findings: hasContent ? (field as string).trim() : "", + }); + } + } + } catch (gateErr) { + logWarning( + "tool", + `complete-slice gate close warning for ${params.milestoneId}/${params.sliceId}: ${(gateErr as Error).message}`, + ); + } + // Invalidate all caches invalidateStateCache(); clearPathCache(); diff --git a/src/resources/extensions/gsd/tools/complete-task.ts b/src/resources/extensions/gsd/tools/complete-task.ts index 00cfa78d8..f19f5b4b9 100644 --- a/src/resources/extensions/gsd/tools/complete-task.ts +++ b/src/resources/extensions/gsd/tools/complete-task.ts @@ -24,7 +24,10 @@ import { updateTaskStatus, setTaskSummaryMd, deleteVerificationEvidence, + saveGateResult, + getPendingGatesForTurn, } from "../gsd-db.js"; +import { getGatesForTurn } from "../gate-registry.js"; import { resolveSliceFile, resolveTasksDir, clearPathCache } from "../paths.js"; import { checkOwnership, taskUnitKey } from "../unit-ownership.js"; import { saveFile, clearParseCache } from "../files.js"; @@ -44,6 +47,27 @@ export interface CompleteTaskResult { import type { TaskRow } from "../gsd-db.js"; +/** + * Map an execute-task-owned gate id to the CompleteTaskParams field whose + * presence drives `pass` vs. `omitted`. Keep in lockstep with the gates + * declared in gate-registry.ts under ownerTurn "execute-task". + */ +function taskGateFieldForId( + id: string, + params: CompleteTaskParams, +): string | undefined { + switch (id) { + case "Q5": + return params.failureModes; + case "Q6": + return params.loadProfile; + case "Q7": + return params.negativeTests; + default: + return undefined; + } +} + /** * Normalize a list parameter that may arrive as a string (newline-delimited * bullet list from the LLM) into a string array (#3361). @@ -236,6 +260,45 @@ export async function handleCompleteTask( // Store rendered markdown in DB for D004 recovery setTaskSummaryMd(params.milestoneId, params.sliceId, params.taskId, summaryMd); + // ── Close gates owned by execute-task (Q5/Q6/Q7) for this task ──────── + // Each gate id maps to a specific params field via taskGateFieldForId. + // When the model populates the field, record `pass`; when it's empty, + // record `omitted`. Task-scoped rows are filtered by taskId so a single + // task's completion doesn't touch sibling tasks' gate rows. + try { + const pendingGates = getPendingGatesForTurn( + params.milestoneId, + params.sliceId, + "execute-task", + params.taskId, + ); + if (pendingGates.length > 0) { + const ownedDefs = new Map(getGatesForTurn("execute-task").map((g) => [g.id, g] as const)); + for (const row of pendingGates) { + const def = ownedDefs.get(row.gate_id); + if (!def) continue; + const field = taskGateFieldForId(def.id, params); + const hasContent = typeof field === "string" && field.trim().length > 0; + saveGateResult({ + milestoneId: params.milestoneId, + sliceId: params.sliceId, + taskId: params.taskId, + gateId: def.id, + verdict: hasContent ? "pass" : "omitted", + rationale: hasContent + ? `${def.promptSection} section populated in task summary` + : `${def.promptSection} section left empty — recorded as omitted`, + findings: hasContent ? (field as string).trim() : "", + }); + } + } + } catch (gateErr) { + logWarning( + "tool", + `complete-task gate close warning for ${params.milestoneId}/${params.sliceId}/${params.taskId}: ${(gateErr as Error).message}`, + ); + } + // Invalidate all caches invalidateStateCache(); clearPathCache(); diff --git a/src/resources/extensions/gsd/tools/workflow-tool-executors.ts b/src/resources/extensions/gsd/tools/workflow-tool-executors.ts index edc1bfd31..ef7b67088 100644 --- a/src/resources/extensions/gsd/tools/workflow-tool-executors.ts +++ b/src/resources/extensions/gsd/tools/workflow-tool-executors.ts @@ -8,6 +8,7 @@ import { _getAdapter, saveGateResult, } from "../gsd-db.js"; +import { GATE_REGISTRY } from "../gate-registry.js"; import { saveArtifactToDb } from "../db-writer.js"; import type { CompleteMilestoneParams } from "./complete-milestone.js"; import { handleCompleteMilestone } from "./complete-milestone.js"; @@ -38,6 +39,7 @@ export function isSupportedSummaryArtifactType( export interface ToolExecutionResult { content: Array<{ type: "text"; text: string }>; details: Record; + isError?: boolean; } export interface SummarySaveParams { @@ -57,13 +59,15 @@ export async function executeSummarySave( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot save artifact." }], details: { operation: "save_summary", error: "db_unavailable" }, - }; + isError: true, + }; } if (!isSupportedSummaryArtifactType(params.artifact_type)) { return { content: [{ type: "text", text: `Error: Invalid artifact_type "${params.artifact_type}". Must be one of: ${SUPPORTED_SUMMARY_ARTIFACT_TYPES.join(", ")}` }], details: { operation: "save_summary", error: "invalid_artifact_type" }, - }; + isError: true, + }; } const contextGuard = shouldBlockContextArtifactSaveInSnapshot( loadWriteGateSnapshot(basePath), @@ -75,7 +79,8 @@ export async function executeSummarySave( return { content: [{ type: "text", text: `Error saving artifact: ${contextGuard.reason ?? "context write blocked"}` }], details: { operation: "save_summary", error: "context_write_blocked" }, - }; + isError: true, + }; } try { let relativePath: string; @@ -108,7 +113,8 @@ export async function executeSummarySave( return { content: [{ type: "text", text: `Error saving artifact: ${msg}` }], details: { operation: "save_summary", error: msg }, - }; + isError: true, + }; } } @@ -163,7 +169,8 @@ export async function executeTaskComplete( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot complete task." }], details: { operation: "complete_task", error: "db_unavailable" }, - }; + isError: true, + }; } try { const coerced = { ...params }; @@ -176,6 +183,7 @@ export async function executeTaskComplete( return { content: [{ type: "text", text: `Error completing task: ${result.error}` }], details: { operation: "complete_task", error: result.error }, + isError: true, }; } return { @@ -194,7 +202,8 @@ export async function executeTaskComplete( return { content: [{ type: "text", text: `Error completing task: ${msg}` }], details: { operation: "complete_task", error: msg }, - }; + isError: true, + }; } } @@ -207,7 +216,8 @@ export async function executeSliceComplete( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot complete slice." }], details: { operation: "complete_slice", error: "db_unavailable" }, - }; + isError: true, + }; } try { const splitPair = (s: string): [string, string] => { @@ -257,6 +267,7 @@ export async function executeSliceComplete( return { content: [{ type: "text", text: `Error completing slice: ${result.error}` }], details: { operation: "complete_slice", error: result.error }, + isError: true, }; } return { @@ -275,7 +286,8 @@ export async function executeSliceComplete( return { content: [{ type: "text", text: `Error completing slice: ${msg}` }], details: { operation: "complete_slice", error: msg }, - }; + isError: true, + }; } } @@ -288,7 +300,8 @@ export async function executeCompleteMilestone( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot complete milestone." }], details: { operation: "complete_milestone", error: "db_unavailable" }, - }; + isError: true, + }; } try { const sanitized = sanitizeCompleteMilestoneParams(params); @@ -297,6 +310,7 @@ export async function executeCompleteMilestone( return { content: [{ type: "text", text: `Error completing milestone: ${result.error}` }], details: { operation: "complete_milestone", error: result.error }, + isError: true, }; } return { @@ -313,7 +327,8 @@ export async function executeCompleteMilestone( return { content: [{ type: "text", text: `Error completing milestone: ${msg}` }], details: { operation: "complete_milestone", error: msg }, - }; + isError: true, + }; } } @@ -326,7 +341,8 @@ export async function executeValidateMilestone( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot validate milestone." }], details: { operation: "validate_milestone", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handleValidateMilestone(params, basePath); @@ -334,6 +350,7 @@ export async function executeValidateMilestone( return { content: [{ type: "text", text: `Error validating milestone: ${result.error}` }], details: { operation: "validate_milestone", error: result.error }, + isError: true, }; } return { @@ -351,7 +368,8 @@ export async function executeValidateMilestone( return { content: [{ type: "text", text: `Error validating milestone: ${msg}` }], details: { operation: "validate_milestone", error: msg }, - }; + isError: true, + }; } } @@ -364,7 +382,8 @@ export async function executeReassessRoadmap( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot reassess roadmap." }], details: { operation: "reassess_roadmap", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handleReassessRoadmap(params, basePath); @@ -372,6 +391,7 @@ export async function executeReassessRoadmap( return { content: [{ type: "text", text: `Error reassessing roadmap: ${result.error}` }], details: { operation: "reassess_roadmap", error: result.error }, + isError: true, }; } return { @@ -390,7 +410,8 @@ export async function executeReassessRoadmap( return { content: [{ type: "text", text: `Error reassessing roadmap: ${msg}` }], details: { operation: "reassess_roadmap", error: msg }, - }; + isError: true, + }; } } @@ -403,15 +424,19 @@ export async function executeSaveGateResult( return { content: [{ type: "text", text: "Error: GSD database is not available." }], details: { operation: "save_gate_result", error: "db_unavailable" }, - }; + isError: true, + }; } - const validGates = ["Q3", "Q4", "Q5", "Q6", "Q7", "Q8"]; + // Source of truth: gate-registry.ts. Every declared GateId is accepted, + // so adding a new gate in one place automatically flows through here. + const validGates = Object.keys(GATE_REGISTRY); if (!validGates.includes(params.gateId)) { return { content: [{ type: "text", text: `Error: Invalid gateId "${params.gateId}". Must be one of: ${validGates.join(", ")}` }], details: { operation: "save_gate_result", error: "invalid_gate_id" }, - }; + isError: true, + }; } const validVerdicts = ["pass", "flag", "omitted"]; @@ -419,7 +444,8 @@ export async function executeSaveGateResult( return { content: [{ type: "text", text: `Error: Invalid verdict "${params.verdict}". Must be one of: ${validVerdicts.join(", ")}` }], details: { operation: "save_gate_result", error: "invalid_verdict" }, - }; + isError: true, + }; } try { @@ -443,7 +469,8 @@ export async function executeSaveGateResult( return { content: [{ type: "text", text: `Error saving gate result: ${msg}` }], details: { operation: "save_gate_result", error: msg }, - }; + isError: true, + }; } } @@ -456,7 +483,8 @@ export async function executePlanMilestone( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot plan milestone." }], details: { operation: "plan_milestone", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handlePlanMilestone(params, basePath); @@ -464,6 +492,7 @@ export async function executePlanMilestone( return { content: [{ type: "text", text: `Error planning milestone: ${result.error}` }], details: { operation: "plan_milestone", error: result.error }, + isError: true, }; } return { @@ -480,7 +509,8 @@ export async function executePlanMilestone( return { content: [{ type: "text", text: `Error planning milestone: ${msg}` }], details: { operation: "plan_milestone", error: msg }, - }; + isError: true, + }; } } @@ -493,7 +523,8 @@ export async function executePlanSlice( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot plan slice." }], details: { operation: "plan_slice", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handlePlanSlice(params, basePath); @@ -501,6 +532,7 @@ export async function executePlanSlice( return { content: [{ type: "text", text: `Error planning slice: ${result.error}` }], details: { operation: "plan_slice", error: result.error }, + isError: true, }; } return { @@ -519,7 +551,8 @@ export async function executePlanSlice( return { content: [{ type: "text", text: `Error planning slice: ${msg}` }], details: { operation: "plan_slice", error: msg }, - }; + isError: true, + }; } } @@ -532,7 +565,8 @@ export async function executeReplanSlice( return { content: [{ type: "text", text: "Error: GSD database is not available. Cannot replan slice." }], details: { operation: "replan_slice", error: "db_unavailable" }, - }; + isError: true, + }; } try { const result = await handleReplanSlice(params, basePath); @@ -540,6 +574,7 @@ export async function executeReplanSlice( return { content: [{ type: "text", text: `Error replanning slice: ${result.error}` }], details: { operation: "replan_slice", error: result.error }, + isError: true, }; } return { @@ -558,7 +593,8 @@ export async function executeReplanSlice( return { content: [{ type: "text", text: `Error replanning slice: ${msg}` }], details: { operation: "replan_slice", error: msg }, - }; + isError: true, + }; } } @@ -576,6 +612,7 @@ export async function executeMilestoneStatus( return { content: [{ type: "text", text: "Error: GSD database is not available." }], details: { operation: "milestone_status", error: "db_unavailable" }, + isError: true, }; } @@ -624,6 +661,7 @@ export async function executeMilestoneStatus( return { content: [{ type: "text", text: `Error querying milestone status: ${msg}` }], details: { operation: "milestone_status", error: msg }, - }; + isError: true, + }; } } diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts index e03815520..292aa462a 100644 --- a/src/resources/extensions/gsd/types.ts +++ b/src/resources/extensions/gsd/types.ts @@ -536,6 +536,24 @@ export interface CompleteTaskParams { verdict: string; durationMs: number; }>; + /** + * Q5 failure-modes section content (what breaks when dependencies fail). + * Populated → `pass`; omitted/empty → `omitted`. + * @optional + */ + failureModes?: string; + /** + * Q6 load-profile section content (10x breakpoint + protection). + * Populated → `pass`; omitted/empty → `omitted`. + * @optional + */ + loadProfile?: string; + /** + * Q7 negative-tests section content (malformed inputs, error paths, + * boundaries). Populated → `pass`; omitted/empty → `omitted`. + * @optional + */ + negativeTests?: string; /** Optional caller-provided identity for audit trail */ actorName?: string; /** Optional caller-provided reason this action was triggered */ @@ -584,6 +602,14 @@ export interface CompleteSliceParams { affects?: string[]; /** @optional — defaults to [] when omitted */ drillDownPaths?: string[]; + /** + * Q8 operational readiness section content (health signal, failure signal, + * recovery, monitoring gaps). When populated, the complete-slice handler + * records Q8 as `pass`; when omitted or empty, Q8 is recorded as `omitted`. + * See gate-registry.ts. + * @optional + */ + operationalReadiness?: string; /** Optional caller-provided identity for audit trail */ actorName?: string; /** Optional caller-provided reason this action was triggered */ diff --git a/src/resources/extensions/gsd/validate-directory.ts b/src/resources/extensions/gsd/validate-directory.ts index 4341826c2..6923abd49 100644 --- a/src/resources/extensions/gsd/validate-directory.ts +++ b/src/resources/extensions/gsd/validate-directory.ts @@ -61,6 +61,33 @@ const WINDOWS_BLOCKED_PATHS = new Set([ "C:\\Program Files (x86)", ]); +const WINDOWS_BLOCKED_SUFFIXES = new Set([ + "\\", + "\\windows", + "\\windows\\system32", + "\\program files", + "\\program files (x86)", +]); + +function normalizePathForComparison(dirPath: string): string { + let normalized = dirPath.replace(/[/\\]+$/, ""); + if (normalized === "") { + normalized = "/"; + } else if (/^[A-Za-z]:$/.test(normalized)) { + normalized += "\\"; + } + return platform() === "win32" ? normalized.toLowerCase() : normalized; +} + +function isBlockedWindowsPath(normalized: string): boolean { + if (!/^[a-z]:\\/.test(normalized)) { + return false; + } + + const suffix = normalized.slice(2); + return WINDOWS_BLOCKED_SUFFIXES.has(suffix); +} + // ─── Core Validation ──────────────────────────────────────────────────────────── /** @@ -84,16 +111,11 @@ export function validateDirectory(dirPath: string): DirectoryValidationResult { // Normalize trailing slashes for consistent comparison. // Special cases: "/" → "/" (not ""), "C:\" → "C:\" (not "C:") - let normalized = resolved.replace(/[/\\]+$/, ""); - if (normalized === "") { - normalized = "/"; - } else if (/^[A-Za-z]:$/.test(normalized)) { - normalized = normalized + "\\"; - } + const normalized = normalizePathForComparison(resolved); // ── Check 1: Blocked system paths ────────────────────────────────────── const blockedPaths = platform() === "win32" ? WINDOWS_BLOCKED_PATHS : UNIX_BLOCKED_PATHS; - if (blockedPaths.has(normalized)) { + if (platform() === "win32" ? isBlockedWindowsPath(normalized) : blockedPaths.has(normalized)) { return { safe: false, severity: "blocked", @@ -104,9 +126,9 @@ export function validateDirectory(dirPath: string): DirectoryValidationResult { // ── Check 2: Home directory itself (not subdirs) ─────────────────────── let resolvedHome: string; try { - resolvedHome = realpathSync(resolve(homedir())).replace(/[/\\]+$/, ""); + resolvedHome = normalizePathForComparison(realpathSync(resolve(homedir()))); } catch { - resolvedHome = resolve(homedir()).replace(/[/\\]+$/, ""); + resolvedHome = normalizePathForComparison(resolve(homedir())); } if (normalized === resolvedHome) { @@ -120,9 +142,9 @@ export function validateDirectory(dirPath: string): DirectoryValidationResult { // ── Check 3: Temp directory root ─────────────────────────────────────── let resolvedTmp: string; try { - resolvedTmp = realpathSync(resolve(tmpdir())).replace(/[/\\]+$/, ""); + resolvedTmp = normalizePathForComparison(realpathSync(resolve(tmpdir()))); } catch { - resolvedTmp = resolve(tmpdir()).replace(/[/\\]+$/, ""); + resolvedTmp = normalizePathForComparison(resolve(tmpdir())); } if (normalized === resolvedTmp) { diff --git a/src/resources/extensions/gsd/workflow-events.ts b/src/resources/extensions/gsd/workflow-events.ts index efc239da5..40bdab31f 100644 --- a/src/resources/extensions/gsd/workflow-events.ts +++ b/src/resources/extensions/gsd/workflow-events.ts @@ -2,6 +2,7 @@ import { createHash, randomUUID } from "node:crypto"; import { appendFileSync, readFileSync, existsSync, mkdirSync } from "node:fs"; import { join } from "node:path"; import { atomicWriteSync } from "./atomic-write.js"; +import { withFileLockSync } from "./file-lock.js"; import { logWarning } from "./workflow-logger.js"; // ─── Session ID ─────────────────────────────────────────────────────────── @@ -127,31 +128,39 @@ export function compactMilestoneEvents( const logPath = join(basePath, ".gsd", "event-log.jsonl"); const archivePath = join(basePath, ".gsd", `event-log-${milestoneId}.jsonl.archived`); - const allEvents = readEvents(logPath); - const toArchive = allEvents.filter( - (e) => (e.params as { milestoneId?: string }).milestoneId === milestoneId, - ); - const remaining = allEvents.filter( - (e) => (e.params as { milestoneId?: string }).milestoneId !== milestoneId, - ); + return withFileLockSync(logPath, () => { + const allEvents = readEvents(logPath); + + // Single-pass partition to halve the work (per reviewer agent) + const toArchive: WorkflowEvent[] = []; + const remaining: WorkflowEvent[] = []; + + for (const e of allEvents) { + if ((e.params as { milestoneId?: string }).milestoneId === milestoneId) { + toArchive.push(e); + } else { + remaining.push(e); + } + } - if (toArchive.length === 0) { - return { archived: 0 }; - } + if (toArchive.length === 0) { + return { archived: 0 }; + } - // Write archived events to .jsonl.archived file (crash-safe) - atomicWriteSync( - archivePath, - toArchive.map((e) => JSON.stringify(e)).join("\n") + "\n", - ); + // Write archived events to .jsonl.archived file (crash-safe) + atomicWriteSync( + archivePath, + toArchive.map((e) => JSON.stringify(e)).join("\n") + "\n", + ); - // Truncate active log to remaining events only - atomicWriteSync( - logPath, - remaining.length > 0 - ? remaining.map((e) => JSON.stringify(e)).join("\n") + "\n" - : "", - ); + // Truncate active log to remaining events only + atomicWriteSync( + logPath, + remaining.length > 0 + ? remaining.map((e) => JSON.stringify(e)).join("\n") + "\n" + : "", + ); - return { archived: toArchive.length }; + return { archived: toArchive.length }; + }); } diff --git a/src/resources/extensions/gsd/workflow-mcp-auto-prep.ts b/src/resources/extensions/gsd/workflow-mcp-auto-prep.ts new file mode 100644 index 000000000..1d69ebc00 --- /dev/null +++ b/src/resources/extensions/gsd/workflow-mcp-auto-prep.ts @@ -0,0 +1,76 @@ +import type { ExtensionContext } from "@gsd/pi-coding-agent"; + +import { + type EnsureProjectWorkflowMcpConfigResult, + ensureProjectWorkflowMcpConfig, +} from "./mcp-project-config.js"; +import { usesWorkflowMcpTransport } from "./workflow-mcp.js"; + +interface WorkflowMcpAutoPrepContext { + model?: { provider?: string; baseUrl?: string }; + modelRegistry?: { + getProviderAuthMode?: (provider: string) => string; + isProviderRequestReady?: (provider: string) => boolean; + }; + ui?: Pick; +} + +function getAuthModeSafe( + ctx: WorkflowMcpAutoPrepContext, + provider: string | undefined, +): string | undefined { + if (!provider) return undefined; + const getAuthMode = ctx.modelRegistry?.getProviderAuthMode; + if (typeof getAuthMode !== "function") return undefined; + try { + return getAuthMode(provider); + } catch { + return undefined; + } +} + +function hasClaudeCodeProvider(ctx: WorkflowMcpAutoPrepContext): boolean { + return getAuthModeSafe(ctx, "claude-code") === "externalCli"; +} + +function isClaudeCodeProviderReady(ctx: WorkflowMcpAutoPrepContext): boolean { + const readyCheck = ctx.modelRegistry?.isProviderRequestReady; + if (typeof readyCheck !== "function") return false; + try { + return readyCheck("claude-code"); + } catch { + return false; + } +} + +export function shouldAutoPrepareWorkflowMcp(ctx: WorkflowMcpAutoPrepContext): boolean { + const provider = ctx.model?.provider; + const baseUrl = ctx.model?.baseUrl; + const authMode = getAuthModeSafe(ctx, provider); + + if (usesWorkflowMcpTransport(authMode as any, baseUrl)) return true; + if (provider === "claude-code") return true; + if (hasClaudeCodeProvider(ctx)) return true; + return isClaudeCodeProviderReady(ctx); +} + +export function prepareWorkflowMcpForProject( + ctx: WorkflowMcpAutoPrepContext, + projectRoot: string, +): EnsureProjectWorkflowMcpConfigResult | null { + if (!shouldAutoPrepareWorkflowMcp(ctx)) return null; + + try { + const result = ensureProjectWorkflowMcpConfig(projectRoot); + if (result.status !== "unchanged") { + ctx.ui?.notify?.(`Claude Code MCP prepared at ${result.configPath}`, "info"); + } + return result; + } catch (err) { + ctx.ui?.notify?.( + `Claude Code MCP prep failed: ${err instanceof Error ? err.message : String(err)}. Detected Claude Code model but no workflow MCP. Please run /gsd mcp init . from your project root.`, + "warning", + ); + return null; + } +} diff --git a/src/resources/extensions/gsd/workflow-mcp.ts b/src/resources/extensions/gsd/workflow-mcp.ts index 797f127f5..9e4bb90c7 100644 --- a/src/resources/extensions/gsd/workflow-mcp.ts +++ b/src/resources/extensions/gsd/workflow-mcp.ts @@ -1,7 +1,7 @@ import { execSync } from "node:child_process"; import { existsSync } from "node:fs"; import { dirname, resolve } from "node:path"; -import { fileURLToPath } from "node:url"; +import { fileURLToPath, pathToFileURL } from "node:url"; export interface WorkflowMcpLaunchConfig { name: string; @@ -21,22 +21,35 @@ export interface WorkflowCapabilityOptions { } const MCP_WORKFLOW_TOOL_SURFACE = new Set([ + "ask_user_questions", + "gsd_decision_save", "gsd_complete_milestone", "gsd_complete_task", "gsd_complete_slice", + "gsd_generate_milestone_id", + "gsd_journal_query", "gsd_milestone_complete", + "gsd_milestone_generate_id", "gsd_milestone_status", "gsd_milestone_validate", + "gsd_plan_task", "gsd_plan_milestone", "gsd_plan_slice", "gsd_replan_slice", "gsd_reassess_roadmap", + "gsd_requirement_save", + "gsd_requirement_update", "gsd_roadmap_reassess", + "gsd_save_decision", "gsd_save_gate_result", + "gsd_save_requirement", + "gsd_skip_slice", "gsd_slice_replan", "gsd_slice_complete", "gsd_summary_save", + "gsd_task_plan", "gsd_task_complete", + "gsd_update_requirement", "gsd_validate_milestone", ]); @@ -95,6 +108,8 @@ function getBundledWorkflowMcpCliPath(env: NodeJS.ProcessEnv): string | null { } const candidates = [ + resolve(fileURLToPath(new URL("../../../../packages/mcp-server/src/cli.ts", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../../packages/mcp-server/src/cli.ts", import.meta.url))), resolve(fileURLToPath(new URL("../../../../packages/mcp-server/dist/cli.js", import.meta.url))), resolve(fileURLToPath(new URL("../../../../../packages/mcp-server/dist/cli.js", import.meta.url))), ]; @@ -108,9 +123,9 @@ function getBundledWorkflowMcpCliPath(env: NodeJS.ProcessEnv): string | null { function getBundledWorkflowExecutorModulePath(): string | null { const candidates = [ - resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url))), resolve(fileURLToPath(new URL("./tools/workflow-tool-executors.js", import.meta.url))), resolve(fileURLToPath(new URL("./tools/workflow-tool-executors.ts", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/tools/workflow-tool-executors.js", import.meta.url))), ]; for (const candidate of candidates) { @@ -122,9 +137,9 @@ function getBundledWorkflowExecutorModulePath(): string | null { function getBundledWorkflowWriteGateModulePath(): string | null { const candidates = [ - resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url))), resolve(fileURLToPath(new URL("./bootstrap/write-gate.js", import.meta.url))), resolve(fileURLToPath(new URL("./bootstrap/write-gate.ts", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../dist/resources/extensions/gsd/bootstrap/write-gate.js", import.meta.url))), ]; for (const candidate of candidates) { @@ -134,19 +149,58 @@ function getBundledWorkflowWriteGateModulePath(): string | null { return null; } +function getResolveTsHookPath(): string | null { + const candidates = [ + resolve(fileURLToPath(new URL("./tests/resolve-ts.mjs", import.meta.url))), + resolve(fileURLToPath(new URL("../../../../src/resources/extensions/gsd/tests/resolve-ts.mjs", import.meta.url))), + ]; + + for (const candidate of candidates) { + if (existsSync(candidate)) return candidate; + } + + return null; +} + +function mergeNodeOptions(existing: string | undefined, additions: string[]): string | undefined { + const tokens = (existing ?? "").split(/\s+/).map((value) => value.trim()).filter(Boolean); + for (const addition of additions) { + if (!tokens.includes(addition)) { + tokens.push(addition); + } + } + return tokens.length > 0 ? tokens.join(" ") : undefined; +} + function buildWorkflowLaunchEnv( projectRoot: string, gsdCliPath: string | undefined, explicitEnv?: Record, + workflowCliPath?: string, ): Record { const executorModulePath = getBundledWorkflowExecutorModulePath(); const writeGateModulePath = getBundledWorkflowWriteGateModulePath(); + const resolveTsHookPath = getResolveTsHookPath(); + const wantsSourceTs = + Boolean(resolveTsHookPath) && + ( + (workflowCliPath?.endsWith(".ts") ?? false) || + (executorModulePath?.endsWith(".ts") ?? false) || + (writeGateModulePath?.endsWith(".ts") ?? false) + ); + const nodeOptions = wantsSourceTs + ? mergeNodeOptions(explicitEnv?.NODE_OPTIONS, [ + "--experimental-strip-types", + `--import=${pathToFileURL(resolveTsHookPath!).href}`, + ]) + : explicitEnv?.NODE_OPTIONS; return { ...(explicitEnv ?? {}), ...(gsdCliPath ? { GSD_CLI_PATH: gsdCliPath } : {}), ...(executorModulePath ? { GSD_WORKFLOW_EXECUTORS_MODULE: executorModulePath } : {}), ...(writeGateModulePath ? { GSD_WORKFLOW_WRITE_GATE_MODULE: writeGateModulePath } : {}), + ...(nodeOptions ? { NODE_OPTIONS: nodeOptions } : {}), GSD_PERSIST_WRITE_GATE_STATE: "1", GSD_WORKFLOW_PROJECT_ROOT: projectRoot, }; @@ -188,7 +242,7 @@ export function detectWorkflowMcpLaunchConfig( command: process.execPath, args: [distCli], cwd: resolvedWorkflowProjectRoot, - env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath), + env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath, undefined, distCli), }; } @@ -199,7 +253,7 @@ export function detectWorkflowMcpLaunchConfig( command: process.execPath, args: [bundledCli], cwd: resolvedWorkflowProjectRoot, - env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath), + env: buildWorkflowLaunchEnv(resolvedWorkflowProjectRoot, gsdCliPath, undefined, bundledCli), }; } @@ -294,6 +348,21 @@ export function usesWorkflowMcpTransport( return authMode === "externalCli" && typeof baseUrl === "string" && baseUrl.startsWith("local://"); } +export function supportsStructuredQuestions( + activeTools: string[], + options: Pick = {}, +): boolean { + if (!activeTools.includes("ask_user_questions")) return false; + + // Workflow MCP currently exposes ask_user_questions via MCP form elicitation. + // Local external CLI transports such as Claude Code can invoke the tool, but + // do not reliably complete that elicitation round-trip yet, so guided discuss + // prompts must fall back to plain-text questioning. + if (usesWorkflowMcpTransport(options.authMode, options.baseUrl)) return false; + + return true; +} + export function getWorkflowTransportSupportError( provider: string | undefined, requiredTools: string[], @@ -310,7 +379,7 @@ export function getWorkflowTransportSupportError( const providerLabel = `"${provider}"`; if (!launch) { - return `Provider ${providerLabel} cannot run ${surface}${unitLabel}: the GSD workflow MCP server is not configured or discoverable. Configure GSD_WORKFLOW_MCP_COMMAND, build packages/mcp-server/dist/cli.js, or install gsd-mcp-server on PATH.`; + return `Provider ${providerLabel} cannot run ${surface}${unitLabel}: the GSD workflow MCP server is not configured or discoverable. Detected Claude Code model but no workflow MCP. Please run /gsd mcp init . from your project root. You can also configure GSD_WORKFLOW_MCP_COMMAND, build packages/mcp-server/dist/cli.js, or install gsd-mcp-server on PATH.`; } const missing = [...new Set(requiredTools)].filter((tool) => !MCP_WORKFLOW_TOOL_SURFACE.has(tool)); diff --git a/src/resources/extensions/gsd/workflow-projections.ts b/src/resources/extensions/gsd/workflow-projections.ts index dfa8b170e..6aea665f0 100644 --- a/src/resources/extensions/gsd/workflow-projections.ts +++ b/src/resources/extensions/gsd/workflow-projections.ts @@ -180,6 +180,14 @@ export function renderSummaryContent( milestoneId: string, evidence?: Array<{ command: string; exitCode?: number; exit_code?: number; verdict: string; durationMs?: number; duration_ms?: number }>, ): string { + // If the task already has a fully rendered summary (written by handleCompleteTask's + // renderSummaryMarkdown), use it as-is. That content already includes frontmatter, + // heading, and all sections. Re-wrapping it inside a second frontmatter/heading + // envelope produces double frontmatter and duplicate sections. + if (taskRow.full_summary_md && taskRow.full_summary_md.trimStart().startsWith("---")) { + return taskRow.full_summary_md; + } + // ── Frontmatter (YAML list format, matches parseSummary() expectations) ── const keyFilesYaml = taskRow.key_files && taskRow.key_files.length > 0 ? taskRow.key_files.map(f => ` - ${f}`).join("\n") diff --git a/src/resources/extensions/gsd/worktree-manager.ts b/src/resources/extensions/gsd/worktree-manager.ts index 37490a30b..5c4d274ec 100644 --- a/src/resources/extensions/gsd/worktree-manager.ts +++ b/src/resources/extensions/gsd/worktree-manager.ts @@ -548,13 +548,39 @@ export function removeWorktree( } } -/** Paths to skip in all worktree diffs (internal/runtime artifacts). */ -const SKIP_PATHS = [".gsd/worktrees/", ".gsd/runtime/", ".gsd/activity/"]; -const SKIP_EXACT = [".gsd/STATE.md", ".gsd/auto.lock", ".gsd/metrics.json"]; +/** + * Paths to skip in all worktree diffs (internal/runtime artifacts). + * + * NOTE: These arrays must stay synchronized with GSD_RUNTIME_PATTERNS in gitignore.ts. + * That file is the canonical source of truth for runtime ignore patterns. + * This module uses a split representation (paths/exact/prefixes) for efficient matching. + */ +const SKIP_PATHS = [ + ".gsd/worktrees/", + ".gsd/runtime/", + ".gsd/activity/", + ".gsd/forensics/", + ".gsd/parallel/", + ".gsd/journal/", +]; +const SKIP_EXACT = [ + ".gsd/STATE.md", + ".gsd/auto.lock", + ".gsd/metrics.json", + ".gsd/state-manifest.json", + ".gsd/doctor-history.jsonl", + ".gsd/event-log.jsonl", +]; +/** File prefixes to skip (for wildcard patterns like completed-units*.json, gsd.db*). */ +const SKIP_PREFIXES = [ + ".gsd/completed-units", + ".gsd/gsd.db", +]; function shouldSkipPath(filePath: string): boolean { if (SKIP_PATHS.some(p => filePath.startsWith(p))) return true; if (SKIP_EXACT.includes(filePath)) return true; + if (SKIP_PREFIXES.some(p => filePath.startsWith(p))) return true; return false; } diff --git a/src/resources/extensions/gsd/write-intercept.ts b/src/resources/extensions/gsd/write-intercept.ts index 833cc2023..3846d7a46 100644 --- a/src/resources/extensions/gsd/write-intercept.ts +++ b/src/resources/extensions/gsd/write-intercept.ts @@ -24,6 +24,9 @@ const BLOCKED_PATTERNS: RegExp[] = [ /(^|[/\\])\.gsd[/\\]STATE\.md$/i, // Also match resolved symlink paths under ~/.gsd/projects/ (Pitfall #6) /(^|[/\\])\.gsd[/\\]projects[/\\][^/\\]+[/\\]STATE\.md$/i, + // gsd.db and WAL/SHM files — single-writer WAL connection managed by engine (#3625) + /(^|[/\\])\.gsd[/\\]gsd\.db(-wal|-shm)?$/i, + /(^|[/\\])\.gsd[/\\]projects[/\\][^/\\]+[/\\]gsd\.db(-wal|-shm)?$/i, ]; /** @@ -41,6 +44,12 @@ const BASH_STATE_PATTERNS: RegExp[] = [ /\bsed\b.*-i.*STATE\.md/i, // dd output to STATE.md /\bdd\b.*of=\S*STATE\.md/i, + // Direct DB access via sqlite3/sql.js/better-sqlite3 targeting gsd.db (#3625) + /\b(sqlite3|sql\.js|better-sqlite3|node:sqlite)\b.*gsd\.db/i, + /\bgsd\.db\b.*\b(sqlite3|sql\.js|better-sqlite3)\b/i, + // Shell writes targeting gsd.db files + /[>|]+\s*\S*gsd\.db/i, + /\b(cp|mv|dd)\b.*gsd\.db/i, ]; /** @@ -81,7 +90,7 @@ function matchesBlockedPattern(path: string): boolean { * Error message returned when an agent attempts to directly write an authoritative .gsd/ state file. * Directs the agent to use engine tool calls instead. */ -export const BLOCKED_WRITE_ERROR = `Direct writes to .gsd/STATE.md are blocked. Use engine tool calls instead: +export const BLOCKED_WRITE_ERROR = `Direct writes to .gsd/STATE.md and .gsd/gsd.db are blocked. Use engine tool calls instead: - To complete a task: call gsd_complete_task(milestone_id, slice_id, task_id, summary) - To complete a slice: call gsd_complete_slice(milestone_id, slice_id, summary, uat_result) - To save a decision: call gsd_save_decision(scope, decision, choice, rationale) diff --git a/src/resources/extensions/ollama/index.ts b/src/resources/extensions/ollama/index.ts index 7f87c6e77..6934f4c26 100644 --- a/src/resources/extensions/ollama/index.ts +++ b/src/resources/extensions/ollama/index.ts @@ -57,17 +57,24 @@ async function probeAndRegister(pi: ExtensionAPI): Promise { } const models = await discoverModels(); - if (models.length === 0) return true; // Running but no models pulled + if (models.length === 0) { + // No local models means there's nothing usable to register in GSD. + // Keep the footer/status clean instead of advertising Ollama availability. + if (providerRegistered) { + pi.unregisterProvider("ollama"); + providerRegistered = false; + } + return false; + } const baseUrl = client.getOllamaHost(); - // Use authMode "apiKey" with a dummy key (#3440). - // authMode "none" requires a custom streamSimple handler, but Ollama uses - // the standard OpenAI-compatible streaming endpoint. Ollama ignores the - // Authorization header so the dummy key is harmless. + // Use authMode "apiKey" (#3440). Local Ollama ignores the Authorization header, + // so the "ollama" fallback is harmless. For cloud endpoints (OLLAMA_HOST pointing + // to ollama.com or a remote instance), OLLAMA_API_KEY is picked up here. pi.registerProvider("ollama", { authMode: "apiKey", - apiKey: "ollama", + apiKey: process.env.OLLAMA_API_KEY ?? "ollama", baseUrl, api: "ollama-chat", streamSimple: streamOllamaChat, @@ -115,9 +122,11 @@ export default function ollama(pi: ExtensionAPI) { } else { probeAndRegister(pi) .then((found) => { - if (found) ctx.ui.setStatus("ollama", "Ollama"); + ctx.ui.setStatus("ollama", found ? "Ollama" : undefined); }) - .catch(() => {}); + .catch(() => { + ctx.ui.setStatus("ollama", undefined); + }); } }); diff --git a/src/resources/extensions/ollama/ollama-client.ts b/src/resources/extensions/ollama/ollama-client.ts index 4738c09da..2408215fd 100644 --- a/src/resources/extensions/ollama/ollama-client.ts +++ b/src/resources/extensions/ollama/ollama-client.ts @@ -34,11 +34,34 @@ export function getOllamaHost(): string { return `http://${host}`; } +/** + * Get auth headers for Ollama API requests. + * For cloud endpoints (OLLAMA_HOST pointing to ollama.com or remote instances), + * OLLAMA_API_KEY is used as a Bearer token. Local Ollama ignores the header. + */ +function getAuthHeaders(): Record { + const apiKey = process.env.OLLAMA_API_KEY; + if (!apiKey) return {}; + return { Authorization: `Bearer ${apiKey}` }; +} + +/** + * Merge auth headers into request options. + */ +function withAuth(options: RequestInit = {}): RequestInit { + const authHeaders = getAuthHeaders(); + if (Object.keys(authHeaders).length === 0) return options; + return { + ...options, + headers: { ...authHeaders, ...(options.headers as Record || {}) }, + }; +} + async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutMs = REQUEST_TIMEOUT_MS): Promise { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), timeoutMs); try { - return await fetch(url, { ...options, signal: controller.signal }); + return await fetch(url, withAuth({ ...options, signal: controller.signal })); } finally { clearTimeout(timeout); } @@ -46,10 +69,16 @@ async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutM /** * Check if Ollama is running and reachable. + * For cloud endpoints (OLLAMA_HOST pointing to ollama.com), uses /api/tags + * as the probe since the root endpoint may not be available. */ export async function isRunning(): Promise { try { - const response = await fetchWithTimeout(`${getOllamaHost()}/`, {}, PROBE_TIMEOUT_MS); + const host = getOllamaHost(); + const isCloud = host.includes("ollama.com") || host.includes("cloud"); + const probeUrl = isCloud ? `${host}/api/tags` : `${host}/`; + const timeout = isCloud ? REQUEST_TIMEOUT_MS : PROBE_TIMEOUT_MS; + const response = await fetchWithTimeout(probeUrl, isCloud ? { method: "GET" } : {}, timeout); return response.ok; } catch { return false; @@ -117,12 +146,12 @@ export async function pullModel( onProgress?: (progress: OllamaPullProgress) => void, signal?: AbortSignal, ): Promise { - const response = await fetch(`${getOllamaHost()}/api/pull`, { + const response = await fetch(`${getOllamaHost()}/api/pull`, withAuth({ method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ name, stream: true }), signal, - }); + })); if (!response.ok) { const text = await response.text(); @@ -146,12 +175,12 @@ export async function* chat( request: OllamaChatRequest, signal?: AbortSignal, ): AsyncGenerator { - const response = await fetch(`${getOllamaHost()}/api/chat`, { + const response = await fetch(`${getOllamaHost()}/api/chat`, withAuth({ method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(request), signal, - }); + })); if (!response.ok) { const text = await response.text(); diff --git a/src/resources/extensions/ollama/ollama-discovery.ts b/src/resources/extensions/ollama/ollama-discovery.ts index 29fb1bc77..bc2105b74 100644 --- a/src/resources/extensions/ollama/ollama-discovery.ts +++ b/src/resources/extensions/ollama/ollama-discovery.ts @@ -8,7 +8,7 @@ * Returns models in the format expected by pi.registerProvider(). */ -import { listModels } from "./ollama-client.js"; +import { listModels, showModel } from "./ollama-client.js"; import { estimateContextFromParams, formatModelSize, @@ -17,6 +17,24 @@ import { } from "./model-capabilities.js"; import type { OllamaChatOptions, OllamaModelInfo } from "./types.js"; +/** + * Extract context window from /api/show model_info. + * Keys follow the pattern "{architecture}.context_length" (e.g. "llama.context_length"). + */ +function extractContextFromModelInfo(modelInfo: Record): number | undefined { + for (const [key, value] of Object.entries(modelInfo)) { + if (key.endsWith(".context_length") && typeof value === "number" && value > 0) { + return value; + } + } + return undefined; +} + +type ClientDeps = { + listModels: typeof listModels; + showModel: typeof showModel; +}; + export interface DiscoveredOllamaModel { id: string; name: string; @@ -35,13 +53,26 @@ export interface DiscoveredOllamaModel { const ZERO_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }; -function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel { +async function enrichModel(info: OllamaModelInfo, deps: ClientDeps): Promise { const caps = getModelCapabilities(info.name); const parameterSize = info.details?.parameter_size ?? ""; - // Determine context window: known table > estimate from param size > default + // /api/tags doesn't include context length; /api/show does via "{arch}.context_length" in model_info. + let showContextWindow: number | undefined; + if (caps.contextWindow === undefined) { + try { + const showData = await deps.showModel(info.name); + showContextWindow = extractContextFromModelInfo(showData.model_info); + } catch (err) { + // non-fatal: fall through to estimate + if (process.env.GSD_DEBUG) console.warn(`[ollama] /api/show failed for ${info.name}:`, err instanceof Error ? err.message : String(err)); + } + } + + // Determine context window: known table > /api/show > estimate from param size > default const contextWindow = caps.contextWindow ?? + showContextWindow ?? (parameterSize ? estimateContextFromParams(parameterSize) : 8192); // Determine max tokens: known table > fraction of context > default @@ -73,11 +104,11 @@ function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel { /** * Discover all locally available Ollama models with enriched capabilities. */ -export async function discoverModels(): Promise { - const tags = await listModels(); +export async function discoverModels(deps: ClientDeps = { listModels, showModel }): Promise { + const tags = await deps.listModels(); if (!tags.models || tags.models.length === 0) return []; - return tags.models.map(enrichModel); + return Promise.all(tags.models.map((m) => enrichModel(m, deps))); } /** diff --git a/src/resources/extensions/ollama/ollama-status-indicator.test.ts b/src/resources/extensions/ollama/ollama-status-indicator.test.ts new file mode 100644 index 000000000..68769d718 --- /dev/null +++ b/src/resources/extensions/ollama/ollama-status-indicator.test.ts @@ -0,0 +1,28 @@ +/** + * Regression test: don't show an Ollama footer status unless Ollama is + * actually usable (running with at least one discovered model). + */ +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const src = readFileSync(join(__dirname, "index.ts"), "utf-8"); + +test("probeAndRegister returns false when no Ollama models are discovered", () => { + assert.match( + src, + /if \(models\.length === 0\)[\s\S]*return false;/, + "running-without-models should not be treated as available", + ); +}); + +test("interactive session clears ollama footer status when unavailable", () => { + assert.match( + src, + /ctx\.ui\.setStatus\("ollama", found \? "Ollama" : undefined\)/, + "status should be cleared when probeAndRegister reports unavailable", + ); +}); diff --git a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts index a228bf663..02d582d19 100644 --- a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts +++ b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts @@ -1 +1,55 @@ // GSD2 — Tests for Ollama model discovery and enrichment +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { discoverModels } from "../ollama-discovery.js"; +import type { OllamaTagsResponse, OllamaShowResponse } from "../types.js"; + +const EMPTY_DETAILS = { parent_model: "", format: "", family: "", families: null, parameter_size: "", quantization_level: "" }; + +function modelStub(name: string, parameterSize = "") { + return { name, model: name, modified_at: "", size: 0, digest: "", details: { ...EMPTY_DETAILS, parameter_size: parameterSize } }; +} + +function tagsStub(name: string, parameterSize = ""): OllamaTagsResponse { + return { models: [modelStub(name, parameterSize)] }; +} + +function showStub(modelInfo: Record): OllamaShowResponse { + return { modelfile: "", parameters: "", template: "", details: EMPTY_DETAILS, model_info: modelInfo }; +} + +describe("discoverModels — context window resolution", () => { + it("uses known table context window without calling /api/show", async () => { + let showCalled = false; + const models = await discoverModels({ + listModels: async () => tagsStub("llama3.2:latest", "3B"), + showModel: async () => { showCalled = true; throw new Error("should not be called"); }, + }); + assert.equal(models[0].contextWindow, 131072); + assert.equal(showCalled, false); + }); + + it("uses context_length from /api/show model_info for unknown model", async () => { + const models = await discoverModels({ + listModels: async () => tagsStub("gemini-3-flash-preview:latest"), + showModel: async () => showStub({ "gemini.context_length": 1048576 }), + }); + assert.equal(models[0].contextWindow, 1048576); + }); + + it("falls back to 8192 when /api/show model_info has no context_length key", async () => { + const models = await discoverModels({ + listModels: async () => tagsStub("unknown-model:latest"), + showModel: async () => showStub({}), + }); + assert.equal(models[0].contextWindow, 8192); + }); + + it("falls back to 8192 when /api/show throws", async () => { + const models = await discoverModels({ + listModels: async () => tagsStub("unknown-model:latest"), + showModel: async () => { throw new Error("network error"); }, + }); + assert.equal(models[0].contextWindow, 8192); + }); +}); \ No newline at end of file diff --git a/src/resources/extensions/shared/gsd-phase-state.ts b/src/resources/extensions/shared/gsd-phase-state.ts new file mode 100644 index 000000000..360410e2a --- /dev/null +++ b/src/resources/extensions/shared/gsd-phase-state.ts @@ -0,0 +1,42 @@ +/** + * GSD Phase State — cross-extension coordination + * Copyright (c) 2026 Jeremy McSpadden + * + * Lightweight module-level state that GSD auto-mode writes to and the + * subagent tool reads from. Both extensions run in the same process so + * a module variable is sufficient — no file I/O needed. + */ + +let _active = false; +let _currentPhase: string | null = null; + +/** Mark GSD auto-mode as active. */ +export function activateGSD(): void { + _active = true; +} + +/** Mark GSD auto-mode as inactive and clear the current phase. */ +export function deactivateGSD(): void { + _active = false; + _currentPhase = null; +} + +/** Set the currently dispatched GSD phase (e.g. "plan-milestone"). */ +export function setCurrentPhase(phase: string): void { + _currentPhase = phase; +} + +/** Clear the current phase (unit completed or aborted). */ +export function clearCurrentPhase(): void { + _currentPhase = null; +} + +/** Returns true if GSD auto-mode is currently active. */ +export function isGSDActive(): boolean { + return _active; +} + +/** Returns the current GSD phase, or null if none is active. */ +export function getCurrentPhase(): string | null { + return _active ? _currentPhase : null; +} diff --git a/src/resources/extensions/shared/tests/gsd-phase-state.test.ts b/src/resources/extensions/shared/tests/gsd-phase-state.test.ts new file mode 100644 index 000000000..2047c3cf6 --- /dev/null +++ b/src/resources/extensions/shared/tests/gsd-phase-state.test.ts @@ -0,0 +1,48 @@ +import { describe, it, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { + activateGSD, + deactivateGSD, + setCurrentPhase, + clearCurrentPhase, + isGSDActive, + getCurrentPhase, +} from "../gsd-phase-state.js"; + +describe("gsd-phase-state", () => { + beforeEach(() => { + deactivateGSD(); + }); + + it("tracks active/inactive state", () => { + assert.equal(isGSDActive(), false); + activateGSD(); + assert.equal(isGSDActive(), true); + deactivateGSD(); + assert.equal(isGSDActive(), false); + }); + + it("tracks the current phase when active", () => { + activateGSD(); + assert.equal(getCurrentPhase(), null); + setCurrentPhase("plan-milestone"); + assert.equal(getCurrentPhase(), "plan-milestone"); + clearCurrentPhase(); + assert.equal(getCurrentPhase(), null); + }); + + it("returns null phase when inactive even if phase was set", () => { + activateGSD(); + setCurrentPhase("plan-milestone"); + deactivateGSD(); + assert.equal(getCurrentPhase(), null); + }); + + it("deactivation clears the current phase", () => { + activateGSD(); + setCurrentPhase("execute-task"); + deactivateGSD(); + activateGSD(); + assert.equal(getCurrentPhase(), null); + }); +}); diff --git a/src/resources/extensions/slash-commands/audit.ts b/src/resources/extensions/slash-commands/audit.ts index b5f3bf85c..fe7d3f046 100644 --- a/src/resources/extensions/slash-commands/audit.ts +++ b/src/resources/extensions/slash-commands/audit.ts @@ -1,4 +1,5 @@ import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; +import { mkdirSync } from "node:fs"; export default function auditCommand(pi: ExtensionAPI) { pi.registerCommand("audit", { @@ -39,7 +40,7 @@ export default function auditCommand(pi: ExtensionAPI) { // ── Step 3: Ensure the output directory exists ─────────────────────── - await pi.exec("mkdir", ["-p", ".gsd/audits"]); + mkdirSync(".gsd/audits", { recursive: true }); // ── Step 4: Send the audit prompt to the agent ─────────────────────── diff --git a/src/resources/extensions/subagent/agents.ts b/src/resources/extensions/subagent/agents.ts index 6f14c3bcf..7f69f3f18 100644 --- a/src/resources/extensions/subagent/agents.ts +++ b/src/resources/extensions/subagent/agents.ts @@ -15,6 +15,7 @@ export interface AgentConfig { description: string; tools?: string[]; model?: string; + conflictsWith?: string[]; systemPrompt: string; source: "user" | "project"; filePath: string; @@ -30,6 +31,13 @@ interface AgentFrontmatter extends Record { description?: string; tools?: string | string[]; model?: string; + conflicts_with?: string; +} + +export function parseConflictsWith(value: string | undefined): string[] | undefined { + if (typeof value !== "string") return undefined; + const conflicts = value.split(",").map((s) => s.trim()).filter(Boolean); + return conflicts.length > 0 ? conflicts : undefined; } function parseAgentTools(value: string | string[] | undefined): string[] | undefined { @@ -85,12 +93,14 @@ function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig } const tools = parseAgentTools(frontmatter.tools); + const conflictsWith = parseConflictsWith(frontmatter.conflicts_with); agents.push({ name: frontmatter.name, description: frontmatter.description, tools: tools && tools.length > 0 ? tools : undefined, model: frontmatter.model, + conflictsWith, systemPrompt: body, source, filePath, diff --git a/src/resources/extensions/subagent/index.ts b/src/resources/extensions/subagent/index.ts index 62b60757f..8bca18bf7 100644 --- a/src/resources/extensions/subagent/index.ts +++ b/src/resources/extensions/subagent/index.ts @@ -24,6 +24,7 @@ import { type ExtensionAPI, getMarkdownTheme } from "@gsd/pi-coding-agent"; import { Container, Markdown, Spacer, Text } from "@gsd/pi-tui"; import { Type } from "@sinclair/typebox"; import { formatTokenCount } from "../shared/mod.js"; +import { getCurrentPhase } from "../shared/gsd-phase-state.js"; import { type AgentConfig, type AgentScope, discoverAgents } from "./agents.js"; import { type IsolationEnvironment, @@ -352,6 +353,23 @@ async function runSingleAgent( }; } + // GSD phase guard: block agents that conflict with the active GSD phase + if (agent.conflictsWith && agent.conflictsWith.length > 0) { + const activePhase = getCurrentPhase(); + if (activePhase && agent.conflictsWith.includes(activePhase)) { + return { + agent: agentName, + agentSource: agent.source, + task, + exitCode: 1, + messages: [], + stderr: `Agent "${agentName}" is blocked: it conflicts with the active GSD phase "${activePhase}". Use the built-in GSD workflow instead.`, + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0, contextTokens: 0, turns: 0 }, + step, + }; + } + } + let tmpPromptDir: string | null = null; let tmpPromptPath: string | null = null; diff --git a/src/resources/extensions/subagent/isolation.ts b/src/resources/extensions/subagent/isolation.ts index a326f55d3..e862e65ff 100644 --- a/src/resources/extensions/subagent/isolation.ts +++ b/src/resources/extensions/subagent/isolation.ts @@ -53,8 +53,10 @@ interface Baseline { // Directory helpers // ============================================================================ -function encodeCwd(cwd: string): string { - return cwd.replace(/\//g, "--"); +export function encodeCwd(cwd: string): string { + // Encode the entire cwd so Windows drive letters, separators, and UNC + // prefixes cannot leak into the isolation path. + return Buffer.from(cwd, "utf8").toString("base64url"); } const gsdHome = process.env.GSD_HOME || path.join(os.homedir(), ".gsd"); @@ -500,4 +502,3 @@ export function readIsolationMode(): IsolationMode { return "none"; } } - diff --git a/src/resources/extensions/subagent/tests/agents-conflicts.test.ts b/src/resources/extensions/subagent/tests/agents-conflicts.test.ts new file mode 100644 index 000000000..c6b00b382 --- /dev/null +++ b/src/resources/extensions/subagent/tests/agents-conflicts.test.ts @@ -0,0 +1,33 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { parseConflictsWith } from "../agents.js"; + +describe("parseConflictsWith", () => { + it("parses comma-separated conflict list", () => { + const result = parseConflictsWith("plan-milestone, plan-slice, research-milestone"); + assert.deepEqual(result, ["plan-milestone", "plan-slice", "research-milestone"]); + }); + + it("returns undefined for undefined input", () => { + assert.equal(parseConflictsWith(undefined), undefined); + }); + + it("returns undefined for empty string", () => { + assert.equal(parseConflictsWith(""), undefined); + }); + + it("handles single value without commas", () => { + const result = parseConflictsWith("plan-milestone"); + assert.deepEqual(result, ["plan-milestone"]); + }); + + it("trims whitespace from values", () => { + const result = parseConflictsWith(" plan-milestone , plan-slice "); + assert.deepEqual(result, ["plan-milestone", "plan-slice"]); + }); + + it("filters out empty entries from trailing commas", () => { + const result = parseConflictsWith("plan-milestone,,plan-slice,"); + assert.deepEqual(result, ["plan-milestone", "plan-slice"]); + }); +}); diff --git a/src/resources/skills/create-skill/SKILL.md b/src/resources/skills/create-skill/SKILL.md index ec8b58e78..60139c6ba 100644 --- a/src/resources/skills/create-skill/SKILL.md +++ b/src/resources/skills/create-skill/SKILL.md @@ -78,6 +78,8 @@ Based on the user's message, route directly to the appropriate workflow: **If user intent is unclear, ask minimal clarifying questions:** - "Create a MIDI skill" → "Task-execution skill (does MIDI tasks) or domain expertise (complete MIDI knowledge base)?" - "Work on my skill" → "Which skill? What do you want to do with it?" +- Ask one clarifying question round at a time, then wait for the user's actual response before asking another. +- Never fabricate or simulate user responses while clarifying (for example, fake `[User]` markers or imagined answers). Then proceed directly to the workflow. diff --git a/src/startup-model-validation.ts b/src/startup-model-validation.ts index e7a00d849..a5e9cd24f 100644 --- a/src/startup-model-validation.ts +++ b/src/startup-model-validation.ts @@ -17,7 +17,6 @@ interface MinimalModel { } interface MinimalModelRegistry { - getAll(): MinimalModel[] getAvailable(): MinimalModel[] } @@ -48,10 +47,14 @@ export function validateConfiguredModel( ): void { const configuredProvider = settingsManager.getDefaultProvider() const configuredModel = settingsManager.getDefaultModel() - const allModels = modelRegistry.getAll() const availableModels = modelRegistry.getAvailable() + // Check against availableModels (configured + auth'd) rather than getAll() + // so a stale default pointing at an unconfigured provider triggers the + // fallback. Previously a model present in the registry but missing API + // key / OAuth would satisfy configuredExists and survive startup, ending + // up as ctx.model even though it couldn't actually be used. const configuredExists = configuredProvider && configuredModel && - allModels.some((m) => m.provider === configuredProvider && m.id === configuredModel) + availableModels.some((m) => m.provider === configuredProvider && m.id === configuredModel) if (!configuredModel || !configuredExists) { // Model not configured at all, or removed from registry — pick a fallback. diff --git a/src/tests/auto-resume-resource-loader.test.ts b/src/tests/auto-resume-resource-loader.test.ts new file mode 100644 index 000000000..9926e87c2 --- /dev/null +++ b/src/tests/auto-resume-resource-loader.test.ts @@ -0,0 +1,56 @@ +// GSD2 — Regression test: auto-mode resume resolves resource-loader.js from deployed path (#3949) +// Copyright (c) 2026 Jeremy McSpadden +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, resolve, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const autoTsPath = join(__dirname, "..", "resources", "extensions", "gsd", "auto.ts"); +const loaderTsPath = join(__dirname, "..", "loader.ts"); + +test("loader.ts sets GSD_PKG_ROOT env var", () => { + const loaderSrc = readFileSync(loaderTsPath, "utf-8"); + assert.ok( + loaderSrc.includes("process.env.GSD_PKG_ROOT"), + "loader.ts must set GSD_PKG_ROOT so deployed extensions can locate package-root modules", + ); +}); + +test("auto.ts resume uses GSD_PKG_ROOT for resource-loader import, not bare relative path", () => { + const autoSrc = readFileSync(autoTsPath, "utf-8"); + + // Must reference GSD_PKG_ROOT to build an absolute path + assert.ok( + autoSrc.includes("process.env.GSD_PKG_ROOT"), + "auto.ts must use GSD_PKG_ROOT to resolve resource-loader.js from deployed extension path", + ); + + // The import must use the computed variable (resourceLoaderPath), not a hardcoded relative path. + assert.ok( + autoSrc.includes("await import(resourceLoaderPath)"), + "auto.ts resource-loader import must use the computed resourceLoaderPath variable, not a hardcoded relative path", + ); + + // The resourceLoaderPath must be constructed from GSD_PKG_ROOT via pathToFileURL + // (raw filesystem paths break on Windows with ERR_UNSUPPORTED_ESM_URL_SCHEME) + assert.ok( + autoSrc.includes("pathToFileURL(join(pkgRoot,"), + "auto.ts must convert the constructed path to a file URL for cross-platform import()", + ); +}); + +test("GSD_PKG_ROOT resolves resource-loader.js correctly from package root", () => { + // Simulate what auto.ts does: given GSD_PKG_ROOT, construct the path + const pkgRoot = resolve(__dirname, "..", ".."); + const resourceLoaderPath = join(pkgRoot, "dist", "resource-loader.js"); + + // After build, dist/resource-loader.js should exist + // (this test runs post-build in CI; in dev it validates the path construction) + const expectedDir = dirname(resourceLoaderPath); + assert.ok( + expectedDir.endsWith(join("dist")), + `resource-loader path should be under dist/, got: ${expectedDir}`, + ); +}); diff --git a/src/tests/cli-onboarding-custom-provider.test.ts b/src/tests/cli-onboarding-custom-provider.test.ts new file mode 100644 index 000000000..3f7644d53 --- /dev/null +++ b/src/tests/cli-onboarding-custom-provider.test.ts @@ -0,0 +1,37 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { SettingsManager } from "../../packages/pi-coding-agent/src/core/settings-manager.ts"; + +test("SettingsManager reads defaultProvider/defaultModel from the explicit agentDir used by CLI (#3860)", () => { + const root = mkdtempSync(join(tmpdir(), "gsd-cli-settings-")); + const cwd = join(root, "project"); + const agentDir = join(root, ".gsd", "agent"); + + try { + mkdirSync(cwd, { recursive: true }); + mkdirSync(agentDir, { recursive: true }); + writeFileSync( + join(agentDir, "settings.json"), + JSON.stringify({ + defaultProvider: "example-provider", + defaultModel: "gpt-5.4", + }), + "utf-8", + ); + + const settingsManager = SettingsManager.create(cwd, agentDir); + assert.equal(settingsManager.getDefaultProvider(), "example-provider"); + assert.equal(settingsManager.getDefaultModel(), "gpt-5.4"); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("cli.ts wires SettingsManager.create with both cwd and agentDir (#3860)", () => { + const cliSource = readFileSync(join(import.meta.dirname, "..", "cli.ts"), "utf-8"); + assert.match(cliSource, /SettingsManager\.create\(process\.cwd\(\),\s*agentDir\)/); +}); diff --git a/src/tests/file-watcher.test.ts b/src/tests/file-watcher.test.ts deleted file mode 100644 index cdfcee6af..000000000 --- a/src/tests/file-watcher.test.ts +++ /dev/null @@ -1,148 +0,0 @@ -import { test, afterEach } from "node:test"; -import assert from "node:assert"; -import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; -import { join } from "node:path"; -import { tmpdir } from "node:os"; -import { setTimeout as delay } from "node:timers/promises"; - -import { - startFileWatcher, - stopFileWatcher, -} from "../resources/extensions/gsd/file-watcher.ts"; - -function createTempAgentDir(): string { - const tmp = mkdtempSync(join(tmpdir(), "gsd-fw-test-")); - mkdirSync(join(tmp, "extensions"), { recursive: true }); - // Seed watched files so chokidar treats writes as "change" not "add" - writeFileSync(join(tmp, "settings.json"), "{}"); - writeFileSync(join(tmp, "auth.json"), "{}"); - writeFileSync(join(tmp, "models.json"), "{}"); - return tmp; -} - -function createMockEventBus() { - const events: { channel: string; data: unknown }[] = []; - return { - events, - emit(channel: string, data: unknown) { - events.push({ channel, data }); - }, - on(_channel: string, _handler: (data: unknown) => void) { - return () => {}; - }, - }; -} - -afterEach(async () => { - await stopFileWatcher(); -}); - -test("startFileWatcher and stopFileWatcher run without errors", async () => { - const dir = createTempAgentDir(); - const bus = createMockEventBus(); - - await startFileWatcher(dir, bus); - await stopFileWatcher(); -}); - -test("stopFileWatcher is safe to call when no watcher is active", async () => { - await stopFileWatcher(); -}); - -test("settings.json change emits settings-changed event", async () => { - const dir = createTempAgentDir(); - const bus = createMockEventBus(); - - await startFileWatcher(dir, bus); - await delay(200); - - writeFileSync(join(dir, "settings.json"), JSON.stringify({ updated: true })); - // Wait for debounce (300ms) + filesystem propagation - await delay(800); - - const matched = bus.events.filter((e) => e.channel === "settings-changed"); - assert.ok(matched.length > 0, "should emit settings-changed event"); -}); - -test("auth.json change emits auth-changed event", async () => { - const dir = createTempAgentDir(); - const bus = createMockEventBus(); - - await startFileWatcher(dir, bus); - // Allow watcher to fully initialize before writing - await delay(200); - - writeFileSync(join(dir, "auth.json"), JSON.stringify({ token: "new" })); - await delay(800); - - const matched = bus.events.filter((e) => e.channel === "auth-changed"); - assert.ok(matched.length > 0, "should emit auth-changed event"); -}); - -test("models.json change emits models-changed event", async () => { - const dir = createTempAgentDir(); - const bus = createMockEventBus(); - - await startFileWatcher(dir, bus); - await delay(200); - - writeFileSync(join(dir, "models.json"), JSON.stringify({ model: "new" })); - await delay(800); - - const matched = bus.events.filter((e) => e.channel === "models-changed"); - assert.ok(matched.length > 0, "should emit models-changed event"); -}); - -test("extensions directory change emits extensions-changed event", { skip: process.platform === "win32" ? "chokidar subdirectory events are unreliable on Windows CI" : undefined }, async () => { - const dir = createTempAgentDir(); - const bus = createMockEventBus(); - - await startFileWatcher(dir, bus); - await delay(500); - - writeFileSync( - join(dir, "extensions", "my-ext.json"), - JSON.stringify({ name: "test" }), - ); - await delay(2000); - - const matched = bus.events.filter( - (e) => e.channel === "extensions-changed", - ); - assert.ok(matched.length > 0, "should emit extensions-changed event"); -}); - -test("unrelated file changes are ignored", async () => { - const dir = createTempAgentDir(); - const bus = createMockEventBus(); - - await startFileWatcher(dir, bus); - // Wait for watcher to settle, then clear any residual events from setup - await delay(400); - bus.events.length = 0; - - writeFileSync(join(dir, "random.txt"), "hello"); - await delay(600); - - assert.strictEqual(bus.events.length, 0, "should not emit any events"); -}); - -test("debouncing coalesces rapid changes into one event", async () => { - const dir = createTempAgentDir(); - const bus = createMockEventBus(); - - await startFileWatcher(dir, bus); - - // Rapid-fire writes - for (let i = 0; i < 5; i++) { - writeFileSync(join(dir, "settings.json"), JSON.stringify({ i })); - } - await delay(800); - - const matched = bus.events.filter((e) => e.channel === "settings-changed"); - assert.strictEqual( - matched.length, - 1, - "rapid changes should be debounced into a single event", - ); -}); diff --git a/src/tests/headless-events.test.ts b/src/tests/headless-events.test.ts index 60c0695e7..4aeae8f39 100644 --- a/src/tests/headless-events.test.ts +++ b/src/tests/headless-events.test.ts @@ -150,7 +150,15 @@ test('empty filter blocks all events', () => { assert.ok(!shouldEmit('message_update')) }) -import { mapStatusToExitCode, EXIT_SUCCESS, EXIT_ERROR, EXIT_BLOCKED, EXIT_CANCELLED } from '../headless-events.js' +import { + mapStatusToExitCode, + EXIT_SUCCESS, + EXIT_ERROR, + EXIT_BLOCKED, + EXIT_CANCELLED, + isInteractiveHeadlessTool, + shouldArmHeadlessIdleTimeout, +} from '../headless-events.js' // ─── mapStatusToExitCode ───────────────────────────────────────────────── @@ -185,3 +193,31 @@ test('mapStatusToExitCode: "cancelled" returns EXIT_CANCELLED', () => { test('mapStatusToExitCode: unknown status returns EXIT_ERROR', () => { assert.equal(mapStatusToExitCode('unknown'), EXIT_ERROR) }) + +test('isInteractiveHeadlessTool: ask_user_questions is interactive', () => { + assert.equal(isInteractiveHeadlessTool('ask_user_questions'), true) +}) + +test('isInteractiveHeadlessTool: secure_env_collect is interactive', () => { + assert.equal(isInteractiveHeadlessTool('secure_env_collect'), true) +}) + +test('isInteractiveHeadlessTool: non-interactive tools stay false', () => { + assert.equal(isInteractiveHeadlessTool('bash'), false) + assert.equal(isInteractiveHeadlessTool(undefined), false) +}) + +test('shouldArmHeadlessIdleTimeout: arms after tool calls when no interactive tool is in flight', () => { + assert.equal(shouldArmHeadlessIdleTimeout(1, 0), true) + assert.equal(shouldArmHeadlessIdleTimeout(3, 0), true) +}) + +test('shouldArmHeadlessIdleTimeout: stays disarmed while interactive tools are in flight (#3714)', () => { + assert.equal(shouldArmHeadlessIdleTimeout(1, 1), false) + assert.equal(shouldArmHeadlessIdleTimeout(5, 2), false) +}) + +test('shouldArmHeadlessIdleTimeout: stays disarmed before any tool call has started', () => { + assert.equal(shouldArmHeadlessIdleTimeout(0, 0), false) + assert.equal(shouldArmHeadlessIdleTimeout(0, 1), false) +}) diff --git a/src/tests/integration/web-auth-token.test.ts b/src/tests/integration/web-auth-token.test.ts index 9f3571c57..2309cbd26 100644 --- a/src/tests/integration/web-auth-token.test.ts +++ b/src/tests/integration/web-auth-token.test.ts @@ -69,19 +69,23 @@ test('app-shell.tsx sendBeacon does not send bare unauthenticated URL', () => { } }) -// ─── proxy.ts contract tests ──────────────────────────────────────────────── +// ─── middleware.ts contract tests ─────────────────────────────────��───────── -const proxySource = readFileSync(join(projectRoot, 'web', 'proxy.ts'), 'utf-8') +const middlewareSource = readFileSync(join(projectRoot, 'web', 'middleware.ts'), 'utf-8') -test('proxy.ts accepts _token query parameter as fallback authentication', () => { - assert.match(proxySource, /_token/, 'proxy should support _token query parameter for SSE/sendBeacon') +test('middleware.ts exports a function named middleware', () => { + assert.match(middlewareSource, /export function middleware/, 'must export "middleware" for Next.js to activate it') }) -test('proxy.ts validates bearer token from Authorization header', () => { - assert.match(proxySource, /Bearer/, 'proxy should check Authorization: Bearer header') +test('middleware.ts accepts _token query parameter as fallback authentication', () => { + assert.match(middlewareSource, /_token/, 'middleware should support _token query parameter for SSE/sendBeacon') }) -test('proxy.ts skips auth when GSD_WEB_AUTH_TOKEN is not set', () => { - assert.match(proxySource, /GSD_WEB_AUTH_TOKEN/, 'proxy should read GSD_WEB_AUTH_TOKEN from env') - assert.match(proxySource, /NextResponse\.next\(\)/, 'proxy should pass through when no token is configured') +test('middleware.ts validates bearer token from Authorization header', () => { + assert.match(middlewareSource, /Bearer/, 'middleware should check Authorization: Bearer header') +}) + +test('middleware.ts skips auth when GSD_WEB_AUTH_TOKEN is not set', () => { + assert.match(middlewareSource, /GSD_WEB_AUTH_TOKEN/, 'middleware should read GSD_WEB_AUTH_TOKEN from env') + assert.match(middlewareSource, /NextResponse\.next\(\)/, 'middleware should pass through when no token is configured') }) diff --git a/src/tests/integration/web-live-interaction-contract.test.ts b/src/tests/integration/web-live-interaction-contract.test.ts index 5e288b69f..ce473ff40 100644 --- a/src/tests/integration/web-live-interaction-contract.test.ts +++ b/src/tests/integration/web-live-interaction-contract.test.ts @@ -358,6 +358,7 @@ function routeEvent(state: MinimalLiveState, event: any): MinimalLiveState { } case "tool_execution_start": { s.activeToolExecution = { id: event.toolCallId, name: event.toolName }; + s.streamingAssistantText = ""; break; } case "tool_execution_end": { @@ -802,6 +803,7 @@ test("(g-2) tool_execution_start/end update activeToolExecution", async () => { assert.ok(state.activeToolExecution); assert.equal(state.activeToolExecution.id, "tc-1"); assert.equal(state.activeToolExecution.name, "bash"); + assert.equal(state.streamingAssistantText, ""); state = routeEvent(state, { type: "tool_execution_end", @@ -813,6 +815,46 @@ test("(g-2) tool_execution_start/end update activeToolExecution", async () => { assert.equal(state.activeToolExecution, null); }); +test("(g-3) tool_execution_start clears provisional streaming text so only post-tool final text survives", async () => { + let state = createMinimalLiveState(); + + state = routeEvent(state, { + type: "message_update", + assistantMessageEvent: { + type: "text_delta", + delta: "It seems the questions were presented to the user. Let me wait for them to answer.", + }, + }); + assert.equal(state.streamingAssistantText, "It seems the questions were presented to the user. Let me wait for them to answer."); + + state = routeEvent(state, { + type: "tool_execution_start", + toolCallId: "tc-ask-1", + toolName: "ask_user_questions", + }); + assert.equal(state.streamingAssistantText, ""); + + state = routeEvent(state, { + type: "tool_execution_end", + toolCallId: "tc-ask-1", + toolName: "ask_user_questions", + result: {}, + isError: false, + }); + state = routeEvent(state, { + type: "message_update", + assistantMessageEvent: { + type: "text_delta", + delta: "What are you working on? Once you answer I'll tailor my approach accordingly.", + }, + }); + state = routeEvent(state, { type: "turn_end" }); + + assert.deepEqual(state.liveTranscript, [ + "What are you working on? Once you answer I'll tailor my approach accordingly.", + ]); +}); + test("(h) steer and abort commands post the correct RPC command type", async (t) => { const fixture = makeWorkspaceFixture(); const sessionPath = createSessionFile(fixture.projectCwd, fixture.sessionsDir, "sess-steer", "Steer Session"); diff --git a/src/tests/integration/web-mode-cli.test.ts b/src/tests/integration/web-mode-cli.test.ts index 68b6c9c1b..9eee1f803 100644 --- a/src/tests/integration/web-mode-cli.test.ts +++ b/src/tests/integration/web-mode-cli.test.ts @@ -165,6 +165,7 @@ test('launchWebMode prefers the packaged standalone host and opens the resolved detached: true, stdio: 'ignore', windowsHide: true, + shell: false, env: { TEST_ENV: '1', HOSTNAME: '127.0.0.1', diff --git a/src/tests/integration/web-mode-windows-hide.test.ts b/src/tests/integration/web-mode-windows-hide.test.ts index aeb6baeea..c1b2902f5 100644 --- a/src/tests/integration/web-mode-windows-hide.test.ts +++ b/src/tests/integration/web-mode-windows-hide.test.ts @@ -117,4 +117,9 @@ test("launchWebMode source-dev host also passes windowsHide: true", async (t) => true, "source-dev spawn must also include windowsHide: true (#2628)", ); + assert.equal( + capturedOptions!.shell, + true, + "source-dev spawn must include shell: true when launching npm.cmd on Windows", + ); }); diff --git a/src/tests/integration/web-onboarding-contract.test.ts b/src/tests/integration/web-onboarding-contract.test.ts index 3ed833368..016c7ae1e 100644 --- a/src/tests/integration/web-onboarding-contract.test.ts +++ b/src/tests/integration/web-onboarding-contract.test.ts @@ -348,7 +348,7 @@ test("boot and onboarding routes expose locked required state plus explicitly sk ]); const anthropicProvider = bootPayload.onboarding.required.providers.find((provider: any) => provider.id === "anthropic"); assert.equal(anthropicProvider.supports.apiKey, true); - assert.equal(anthropicProvider.supports.oauthAvailable, true); + assert.equal(anthropicProvider.supports.oauthAvailable, false); const onboardingResponse = await onboardingRoute.GET(projectRequest(fixture.projectCwd, "/api/onboarding")); assert.equal(onboardingResponse.status, 200); @@ -408,7 +408,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte getEnvApiKey: noEnvApiKey, validateApiKey: async () => ({ ok: false, - message: "OpenAI rejected sk-test-secret-123456 because Bearer sk-test-secret-123456 is invalid", + message: "OpenAI rejected the provided key because Bearer invalid-demo-key is invalid", }), }); @@ -425,7 +425,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte body: JSON.stringify({ action: "save_api_key", providerId: "openai", - apiKey: "sk-test-secret-123456", + apiKey: "invalid-demo-key", }), }), ); @@ -440,7 +440,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte assert.equal(validationPayload.onboarding.lockReason, "required_setup"); assert.equal(validationPayload.onboarding.bridgeAuthRefresh.phase, "idle"); assert.match(validationPayload.onboarding.lastValidation.message, /OpenAI rejected/i); - assert.doesNotMatch(validationPayload.onboarding.lastValidation.message, /sk-test-secret-123456/); + assert.doesNotMatch(validationPayload.onboarding.lastValidation.message, /invalid-demo-key/); assert.equal(authStorage.hasAuth("openai"), false); const bootResponse = await bootRoute.GET(projectRequest(fixture.projectCwd, "/api/boot")); @@ -448,7 +448,7 @@ test("failed API-key validation stays locked, redacts the error, and is reflecte const bootPayload = (await bootResponse.json()) as any; assert.equal(bootPayload.onboarding.locked, true); assert.equal(bootPayload.onboarding.lastValidation.status, "failed"); - assert.doesNotMatch(bootPayload.onboarding.lastValidation.message, /sk-test-secret-123456/); + assert.doesNotMatch(bootPayload.onboarding.lastValidation.message, /invalid-demo-key/); }); test("direct prompt commands cannot bypass onboarding while required setup is still locked", async (t) => { diff --git a/src/tests/mcp-createRequire.test.ts b/src/tests/mcp-createRequire.test.ts index 5f1292866..63c12af0c 100644 --- a/src/tests/mcp-createRequire.test.ts +++ b/src/tests/mcp-createRequire.test.ts @@ -1,12 +1,17 @@ /** - * Regression test for #3603 — MCP server subpath imports via createRequire + * Regression test for #3603 / #3914 — MCP server subpath imports. * - * The ESM wildcard export map in @modelcontextprotocol/sdk does not resolve - * subpath imports correctly. The fix uses createRequire from node:module to - * resolve wildcard subpaths via the CJS resolver which auto-appends .js. + * @modelcontextprotocol/sdk's package.json exports map uses a wildcard + * `./*` → `./dist/cjs/*` with no `.js` suffix, so bare subpath specifiers + * like `@modelcontextprotocol/sdk/server/stdio` resolve to a file that + * doesn't exist. Historically the workaround used `createRequire` so the + * CJS resolver auto-appended `.js`; that no longer works with current + * Node + SDK versions (#3914). * - * Structural verification test — reads source to confirm createRequire import - * and _require.resolve usage exist. + * The reliable convention (used in packages/mcp-server/{server,cli}.ts) + * is to write the `.js` suffix explicitly on every subpath import. This + * test locks that convention in so regressions can't silently reintroduce + * the bare subpath form or the broken createRequire-based resolution. */ import { describe, test } from 'node:test'; @@ -20,29 +25,31 @@ const __dirname = dirname(__filename); const source = readFileSync(join(__dirname, '..', 'mcp-server.ts'), 'utf-8'); -describe('MCP server createRequire subpath resolution (#3603)', () => { - test('createRequire is imported from node:module', () => { - assert.match(source, /import\s*\{\s*createRequire\s*\}\s*from\s*['"]node:module['"]/, - 'createRequire should be imported from node:module'); +describe('MCP server SDK subpath imports (#3603 / #3914)', () => { + test('server/index.js subpath is imported with explicit .js suffix', () => { + assert.match(source, /await import\(`\$\{MCP_PKG\}\/server\/index\.js`\)/, + 'server import must use `${MCP_PKG}/server/index.js` to satisfy the wildcard export map'); }); - test('_require is created from import.meta.url', () => { - assert.match(source, /createRequire\(import\.meta\.url\)/, - '_require should be created using createRequire(import.meta.url)'); + test('server/stdio.js subpath is imported with explicit .js suffix', () => { + assert.match(source, /await import\(`\$\{MCP_PKG\}\/server\/stdio\.js`\)/, + 'stdio import must use `${MCP_PKG}/server/stdio.js`'); }); - test('_require.resolve is used for subpath imports', () => { - assert.match(source, /_require\.resolve\(/, - '_require.resolve should be used for subpath resolution'); + test('types.js subpath is imported with explicit .js suffix', () => { + assert.match(source, /await import\(`\$\{MCP_PKG\}\/types\.js`\)/, + 'types import must use `${MCP_PKG}/types.js`'); }); - test('server/stdio subpath uses _require.resolve', () => { - assert.match(source, /_require\.resolve\(`\$\{MCP_PKG\}\/server\/stdio`\)/, - 'server/stdio import should use _require.resolve'); - }); - - test('types subpath uses _require.resolve', () => { - assert.match(source, /_require\.resolve\(`\$\{MCP_PKG\}\/types`\)/, - 'types import should use _require.resolve'); + test('legacy createRequire-based resolution is gone', () => { + // Only flag actual code, not the comment that explains the history. + // The import statement, variable declaration, and `_require.resolve(` call + // sites are the real regression surfaces. + assert.doesNotMatch(source, /^\s*import\s*\{\s*createRequire\s*\}\s*from/m, + 'createRequire should not be imported from node:module'); + assert.doesNotMatch(source, /^\s*const\s+_require\s*=\s*createRequire/m, + '_require helper should not be created'); + assert.doesNotMatch(source, /_require\.resolve\(/, + '_require.resolve should not be used for subpath resolution'); }); }); diff --git a/src/tests/mcp-server.test.ts b/src/tests/mcp-server.test.ts index 9581809dd..8a6a672f9 100644 --- a/src/tests/mcp-server.test.ts +++ b/src/tests/mcp-server.test.ts @@ -30,25 +30,11 @@ test('startMcpServer accepts the correct argument shape', async () => { assert.strictEqual(startMcpServer.length, 1, 'startMcpServer should accept one argument') }) -test('startMcpServer can be called with mock tools', async () => { - const { startMcpServer } = await import(distUrl('mcp-server.js')) +test('compiled MCP runtime dependencies resolve with explicit .js subpaths', async () => { + const stdioMod = await import('@modelcontextprotocol/sdk/server/stdio.js') + const typesMod = await import('@modelcontextprotocol/sdk/types.js') - // Create a mock tool matching the McpToolDef interface - const mockTool = { - name: 'test_tool', - description: 'A test tool', - parameters: { type: 'object', properties: {} }, - execute: async () => ({ - content: [{ type: 'text', text: 'hello' }], - }), - } - - // Verify the function can be called with the correct signature - // without throwing during argument validation. It will attempt to - // connect to stdin/stdout as an MCP transport, which won't work in - // a test environment, but the Server instance is created successfully. - assert.doesNotThrow(() => { - void startMcpServer({ tools: [mockTool], version: '0.0.0-test' }) - .catch(() => { /* expected: no MCP client on stdin */ }) - }) + assert.strictEqual(typeof stdioMod.StdioServerTransport, 'function') + assert.ok(typesMod.ListToolsRequestSchema, 'ListToolsRequestSchema should be exported') + assert.ok(typesMod.CallToolRequestSchema, 'CallToolRequestSchema should be exported') }) diff --git a/src/tests/node-modules-symlink.test.ts b/src/tests/node-modules-symlink.test.ts index ef0bdf724..56a9d4a03 100644 --- a/src/tests/node-modules-symlink.test.ts +++ b/src/tests/node-modules-symlink.test.ts @@ -1,9 +1,15 @@ -import test from "node:test"; +/** + * Tests for ensureNodeModulesSymlink — covers symlink reconciliation for + * source installs (#3529) and pnpm-style merged node_modules (#3564). + */ +import { test } from "node:test"; import assert from "node:assert/strict"; -import { existsSync, lstatSync, mkdirSync, mkdtempSync, readlinkSync, rmSync, symlinkSync, unlinkSync } from "node:fs"; +import { existsSync, lstatSync, mkdirSync, mkdtempSync, readFileSync, readlinkSync, readdirSync, rmSync, symlinkSync, unlinkSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; +// --- Integration tests via initResources (source/monorepo path) --- + test("initResources creates node_modules symlink in agent dir", async (t) => { const { initResources } = await import("../resource-loader.ts"); const tmp = mkdtempSync(join(tmpdir(), "gsd-symlink-")); @@ -30,7 +36,6 @@ test("initResources replaces a real directory blocking node_modules with a symli const fakeAgentDir = join(tmp, "agent"); t.after(() => rmSync(tmp, { recursive: true, force: true })); - // First call to set up agent dir structure initResources(fakeAgentDir); const nodeModulesPath = join(fakeAgentDir, "node_modules"); @@ -56,7 +61,6 @@ test("initResources replaces a stale symlink with a correct one", async (t) => { const fakeAgentDir = join(tmp, "agent"); t.after(() => rmSync(tmp, { recursive: true, force: true })); - // First call to set up agent dir structure initResources(fakeAgentDir); const nodeModulesPath = join(fakeAgentDir, "node_modules"); @@ -88,7 +92,6 @@ test("initResources replaces symlink whose target was deleted", async (t) => { const correctTarget = readlinkSync(nodeModulesPath); // Create a symlink that points to a path that doesn't exist - // (simulates the case where npm upgrade moved the package location) unlinkSync(nodeModulesPath); const deadTarget = join(tmp, "old-install", "node_modules"); symlinkSync(deadTarget, nodeModulesPath); @@ -102,3 +105,182 @@ test("initResources replaces symlink whose target was deleted", async (t) => { const fixedTarget = readlinkSync(nodeModulesPath); assert.equal(fixedTarget, correctTarget, "broken symlink should be replaced with correct target"); }); + +// --- Unit tests for pnpm-style merged node_modules (#3564) --- +// These simulate the filesystem layout without going through initResources, +// since packageRoot is fixed at module load time. + +test("pnpm layout: merged node_modules contains entries from both hoisted and internal", (t) => { + // Simulate pnpm global layout: + // hoisted/node_modules/ + // yaml/ ← external dep + // @sinclair/ ← external scoped dep + // gsd-pi/ ← package root + // node_modules/ + // @gsd/ ← workspace scope (NOT hoisted) + // @gsd-build/ ← workspace scope (NOT hoisted) + const tmp = mkdtempSync(join(tmpdir(), "gsd-pnpm-merge-")); + t.after(() => rmSync(tmp, { recursive: true, force: true })); + + const hoisted = join(tmp, "node_modules"); + const pkgRoot = join(hoisted, "gsd-pi"); + const internal = join(pkgRoot, "node_modules"); + const agentNodeModules = join(tmp, "agent", "node_modules"); + + // Create hoisted entries (external deps) + mkdirSync(join(hoisted, "yaml"), { recursive: true }); + mkdirSync(join(hoisted, "@sinclair", "typebox"), { recursive: true }); + mkdirSync(join(hoisted, "@anthropic-ai", "sdk"), { recursive: true }); + mkdirSync(pkgRoot, { recursive: true }); + + // Create internal entries (workspace packages) + mkdirSync(join(internal, "@gsd", "pi-ai"), { recursive: true }); + mkdirSync(join(internal, "@gsd", "pi-coding-agent"), { recursive: true }); + mkdirSync(join(internal, "@gsd-build", "core"), { recursive: true }); + + // Create merged directory manually (simulating what reconcileMergedNodeModules does) + mkdirSync(agentNodeModules, { recursive: true }); + + // Link hoisted entries (skip gsd-pi itself and dotfiles) + for (const entry of readdirSync(hoisted, { withFileTypes: true })) { + if (entry.name === "gsd-pi" || entry.name.startsWith(".")) continue; + symlinkSync(join(hoisted, entry.name), join(agentNodeModules, entry.name)); + } + + // Overlay all non-dotfile entries from internal (these take precedence) + for (const entry of readdirSync(internal, { withFileTypes: true })) { + if (entry.name.startsWith(".")) continue; + const link = join(agentNodeModules, entry.name); + try { lstatSync(link); unlinkSync(link); } catch { /* didn't exist */ } + symlinkSync(join(internal, entry.name), link); + } + + // Verify: external deps resolve through hoisted symlinks + assert.ok(existsSync(join(agentNodeModules, "yaml")), "yaml should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@sinclair")), "@sinclair should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@anthropic-ai")), "@anthropic-ai should resolve"); + + // Verify: workspace packages resolve through internal symlinks + assert.ok(existsSync(join(agentNodeModules, "@gsd")), "@gsd should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@gsd", "pi-ai")), "@gsd/pi-ai should resolve"); + assert.ok(existsSync(join(agentNodeModules, "@gsd-build")), "@gsd-build should resolve"); + + // Verify: gsd-pi itself is NOT symlinked (it's the package root, not a dep) + assert.ok(!existsSync(join(agentNodeModules, "gsd-pi")), "gsd-pi should not be in merged dir"); + + // Verify: @gsd points to internal, not hoisted (internal takes precedence) + const gsdTarget = readlinkSync(join(agentNodeModules, "@gsd")); + assert.equal(gsdTarget, join(internal, "@gsd"), "@gsd should point to internal node_modules"); +}); + +test("pnpm layout: non-@gsd internal deps (e.g. @anthropic-ai) are included in merged dir", (t) => { + // Regression: PR #3564 narrowed the internal overlay to @gsd* only, + // dropping optionalDependencies like @anthropic-ai/claude-agent-sdk + // that npm installs internally rather than hoisting. + const tmp = mkdtempSync(join(tmpdir(), "gsd-pnpm-internal-optional-")); + t.after(() => rmSync(tmp, { recursive: true, force: true })); + + const hoisted = join(tmp, "node_modules"); + const pkgRoot = join(hoisted, "gsd-pi"); + const internal = join(pkgRoot, "node_modules"); + const agentNodeModules = join(tmp, "agent", "node_modules"); + + // Hoisted: only external deps (no @anthropic-ai — it's internal-only) + mkdirSync(join(hoisted, "yaml"), { recursive: true }); + mkdirSync(pkgRoot, { recursive: true }); + + // Internal: workspace packages + optional dep that wasn't hoisted + mkdirSync(join(internal, "@gsd", "pi-ai"), { recursive: true }); + mkdirSync(join(internal, "@anthropic-ai", "claude-agent-sdk"), { recursive: true }); + + mkdirSync(agentNodeModules, { recursive: true }); + + // Link hoisted entries + for (const entry of readdirSync(hoisted, { withFileTypes: true })) { + if (entry.name === "gsd-pi" || entry.name.startsWith(".")) continue; + symlinkSync(join(hoisted, entry.name), join(agentNodeModules, entry.name)); + } + + // Overlay all non-dotfile internal entries (the fixed logic) + for (const entry of readdirSync(internal, { withFileTypes: true })) { + if (entry.name.startsWith(".")) continue; + const link = join(agentNodeModules, entry.name); + try { lstatSync(link); unlinkSync(link); } catch { /* didn't exist */ } + symlinkSync(join(internal, entry.name), link); + } + + // @anthropic-ai must be present — this is what broke in #3564 + assert.ok(existsSync(join(agentNodeModules, "@anthropic-ai")), "@anthropic-ai should resolve from internal"); + assert.ok(existsSync(join(agentNodeModules, "@anthropic-ai", "claude-agent-sdk")), "@anthropic-ai/claude-agent-sdk should resolve"); + + // @gsd still resolves + assert.ok(existsSync(join(agentNodeModules, "@gsd")), "@gsd should resolve"); + + // Hoisted deps still resolve + assert.ok(existsSync(join(agentNodeModules, "yaml")), "yaml should resolve"); +}); + +test("hasMissingWorkspaceScopes detects pnpm layout", (t) => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-pnpm-detect-")); + t.after(() => rmSync(tmp, { recursive: true, force: true })); + + const hoisted = join(tmp, "hoisted"); + const internal = join(tmp, "internal"); + + // npm-style: @gsd exists in both hoisted and internal + mkdirSync(join(hoisted, "@gsd"), { recursive: true }); + mkdirSync(join(internal, "@gsd"), { recursive: true }); + + // Inline the detection logic for testing + const hasMissing = (h: string, i: string): boolean => { + if (!existsSync(i)) return false; + for (const entry of readdirSync(i, { withFileTypes: true })) { + if (entry.isDirectory() && entry.name.startsWith("@gsd") && + !existsSync(join(h, entry.name))) { + return true; + } + } + return false; + }; + + assert.equal(hasMissing(hoisted, internal), false, "npm-style: no missing scopes"); + + // pnpm-style: @gsd-build only in internal + mkdirSync(join(internal, "@gsd-build"), { recursive: true }); + assert.equal(hasMissing(hoisted, internal), true, "pnpm-style: @gsd-build missing from hoisted"); +}); + +test("merged node_modules marker uses fingerprint including directory entries", (t) => { + const tmp = mkdtempSync(join(tmpdir(), "gsd-pnpm-marker-")); + t.after(() => rmSync(tmp, { recursive: true, force: true })); + + // Simulate two directories with known entries + const hoisted = join(tmp, "hoisted"); + const internal = join(tmp, "internal"); + mkdirSync(join(hoisted, "yaml"), { recursive: true }); + mkdirSync(join(hoisted, "@sinclair"), { recursive: true }); + mkdirSync(join(internal, "@gsd"), { recursive: true }); + + // Build fingerprint the same way the production code does + const h = readdirSync(hoisted).sort().join(","); + const i = readdirSync(internal).sort().join(","); + const fakePackageRoot = "/usr/lib/node_modules/gsd-pi"; + const fingerprint = `${fakePackageRoot}\n${h}\n${i}`; + + const agentNodeModules = join(tmp, "agent", "node_modules"); + mkdirSync(agentNodeModules, { recursive: true }); + const marker = join(agentNodeModules, ".gsd-merged"); + writeFileSync(marker, fingerprint); + + // Verify fingerprint contains all three components + const stored = readFileSync(marker, "utf-8").trim(); + assert.ok(stored.includes(fakePackageRoot), "fingerprint includes packageRoot"); + assert.ok(stored.includes("@sinclair"), "fingerprint includes hoisted entries"); + assert.ok(stored.includes("@gsd"), "fingerprint includes internal entries"); + + // Verify fingerprint changes when a new package is added + mkdirSync(join(hoisted, "new-package"), { recursive: true }); + const h2 = readdirSync(hoisted).sort().join(","); + const fingerprint2 = `${fakePackageRoot}\n${h2}\n${i}`; + assert.notEqual(fingerprint, fingerprint2, "fingerprint should change when deps change"); +}); diff --git a/src/tests/onboarding-claude-cli-provider.test.ts b/src/tests/onboarding-claude-cli-provider.test.ts new file mode 100644 index 000000000..fddb8727f --- /dev/null +++ b/src/tests/onboarding-claude-cli-provider.test.ts @@ -0,0 +1,31 @@ +import test from "node:test" +import assert from "node:assert/strict" +import { readFileSync } from "node:fs" +import { join } from "node:path" + +/** + * Source-level regression test: the claude-cli onboarding path must persist + * defaultProvider = 'claude-code' in settings.json so the user is not left + * on the 'anthropic' direct-API provider after selecting Claude Code CLI. + * + * Without this, the auto-migration in cli.ts does not fire when the user + * also has a stored Anthropic API key, leaving them on the wrong provider. + */ +test("onboarding claude-cli path persists defaultProvider to settings.json", () => { + const source = readFileSync( + join(import.meta.dirname, "..", "onboarding.ts"), + "utf-8", + ) + + // The claude-cli branch must write defaultProvider = 'claude-code' to settings.json + const cliBlock = source.slice( + source.indexOf("method === 'claude-cli'"), + source.indexOf("// ── Step 2"), + ) + assert.ok(cliBlock.length > 0, "claude-cli block not found in onboarding.ts") + assert.match( + cliBlock, + /raw\.defaultProvider\s*=\s*['"]claude-code['"]/, + "claude-cli onboarding path must set defaultProvider = 'claude-code' in settings.json", + ) +}) diff --git a/src/tests/package-mcp-server-elicitation.test.ts b/src/tests/package-mcp-server-elicitation.test.ts new file mode 100644 index 000000000..a746d8094 --- /dev/null +++ b/src/tests/package-mcp-server-elicitation.test.ts @@ -0,0 +1,227 @@ +import test from 'node:test' +import assert from 'node:assert/strict' +import { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js' +import { ElicitRequestSchema } from '@modelcontextprotocol/sdk/types.js' + +import { + buildAskUserQuestionsElicitRequest, + createMcpServer, + formatAskUserQuestionsElicitResult, +} from '../../packages/mcp-server/src/server.js' + +function createSessionManagerStub() { + return { + startSession: async () => { + throw new Error('not implemented in test') + }, + getSession: () => undefined, + getResult: () => undefined, + cancelSession: async () => {}, + resolveBlocker: async () => {}, + } +} + +async function createConnectedClient(options?: { + onElicit?: (params: unknown) => Promise, +}) { + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair() + + const { server } = await createMcpServer(createSessionManagerStub() as never) + const client = new Client({ + name: 'test-client', + version: '0.0.0', + }, { + capabilities: { + elicitation: {}, + }, + }) + + if (options?.onElicit) { + client.setRequestHandler(ElicitRequestSchema, options.onElicit) + } + + await Promise.all([ + server.connect(serverTransport), + client.connect(clientTransport), + ]) + + return { + client, + close: async () => { + await client.close() + await server.close() + }, + } +} + +test('package MCP server exposes ask_user_questions over listTools', async () => { + const { client, close } = await createConnectedClient() + + try { + const tools = await client.listTools() + assert.ok(tools.tools.some(tool => tool.name === 'ask_user_questions')) + } finally { + await close() + } +}) + +test('ask_user_questions returns the packaged answers JSON shape for form elicitation', async () => { + const { client, close } = await createConnectedClient({ + onElicit: async (request) => { + const elicitation = (request as { + params?: { + message: string, + requestedSchema: { properties: Record, required?: string[] }, + }, + }).params ?? request as { + message: string, + requestedSchema: { properties: Record, required?: string[] }, + } + assert.match(elicitation.message, /Please answer the following question/) + assert.ok(elicitation.requestedSchema.properties.deployment) + assert.ok(elicitation.requestedSchema.properties['deployment__note']) + assert.ok(elicitation.requestedSchema.required?.includes('deployment')) + + return { + action: 'accept', + content: { + deployment: 'None of the above', + deployment__note: 'Need hybrid deployment.', + }, + } + }, + }) + + try { + const result = await client.callTool({ + name: 'ask_user_questions', + arguments: { + questions: [ + { + id: 'deployment', + header: 'Deploy', + question: 'Where will this run?', + options: [ + { label: 'Cloud', description: 'Managed hosting.' }, + { label: 'On-prem', description: 'Runs in customer infrastructure.' }, + ], + }, + ], + }, + }) + + const text = result.content.find(item => item.type === 'text') + assert.ok(text && 'text' in text) + assert.equal( + text.text, + JSON.stringify({ + answers: { + deployment: { + answers: ['None of the above', 'user_note: Need hybrid deployment.'], + }, + }, + }), + ) + } finally { + await close() + } +}) + +test('ask_user_questions returns an error result for invalid question payloads', async () => { + const { client, close } = await createConnectedClient() + + try { + const result = await client.callTool({ + name: 'ask_user_questions', + arguments: { + questions: [ + { + id: 'broken', + header: 'Broken', + question: 'This payload is invalid', + options: [], + }, + ], + }, + }) + + const text = result.content.find(item => item.type === 'text') + assert.ok(text && 'text' in text) + assert.equal(result.isError, true) + assert.match(text.text, /requires non-empty options/i) + } finally { + await close() + } +}) + +test('ask_user_questions returns the cancellation message when elicitation is declined', async () => { + const { client, close } = await createConnectedClient({ + onElicit: async () => ({ + action: 'decline', + }), + }) + + try { + const result = await client.callTool({ + name: 'ask_user_questions', + arguments: { + questions: [ + { + id: 'continue', + header: 'Continue', + question: 'Continue?', + options: [ + { label: 'Yes', description: 'Proceed.' }, + { label: 'No', description: 'Stop here.' }, + ], + }, + ], + }, + }) + + const text = result.content.find(item => item.type === 'text') + assert.ok(text && 'text' in text) + assert.equal(text.text, 'ask_user_questions was cancelled before receiving a response') + } finally { + await close() + } +}) + +test('helper formatting stays aligned with the tool contract', () => { + const questions = [ + { + id: 'focus_areas', + header: 'Focus', + question: 'Which areas matter most?', + allowMultiple: true, + options: [ + { label: 'Frontend', description: 'Prioritize the UI.' }, + { label: 'Backend', description: 'Prioritize server logic.' }, + ], + }, + ] + + const request = buildAskUserQuestionsElicitRequest(questions) + assert.equal(request.mode, 'form') + assert.ok(request.requestedSchema.properties.focus_areas) + assert.ok(!request.requestedSchema.properties['focus_areas__note']) + + const formatted = formatAskUserQuestionsElicitResult(questions, { + action: 'accept', + content: { + focus_areas: ['Frontend', 'Backend'], + }, + }) + + assert.equal( + formatted, + JSON.stringify({ + answers: { + focus_areas: { + answers: ['Frontend', 'Backend'], + }, + }, + }), + ) +}) diff --git a/src/tests/pi-migration-exports.test.ts b/src/tests/pi-migration-exports.test.ts new file mode 100644 index 000000000..e54a8a8c7 --- /dev/null +++ b/src/tests/pi-migration-exports.test.ts @@ -0,0 +1,23 @@ +// GSD-2 — Regression test for pi-migration.ts public exports consumed by cli.ts +// +// Guards against the TS2304 regression introduced by 080c6ac1e where +// src/cli.ts called `getPiDefaultModelAndProvider()` without importing it. +// If the symbol is ever renamed or unexported, this test fails before the +// root `tsc` build breaks every CI job on main. + +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import * as piMigration from "../pi-migration.js"; + +test("pi-migration exports getPiDefaultModelAndProvider for cli.ts fallback-model resolution", () => { + assert.equal( + typeof piMigration.getPiDefaultModelAndProvider, + "function", + "cli.ts validateConfiguredModel relies on this export to pick a fallback model", + ); +}); + +test("pi-migration exports migratePiCredentials for cli.ts startup migration", () => { + assert.equal(typeof piMigration.migratePiCredentials, "function"); +}); diff --git a/src/tests/provider-migrations.test.ts b/src/tests/provider-migrations.test.ts new file mode 100644 index 000000000..d23e22b99 --- /dev/null +++ b/src/tests/provider-migrations.test.ts @@ -0,0 +1,77 @@ +import test from "node:test" +import assert from "node:assert/strict" +import { hasDirectAnthropicApiKey, shouldMigrateAnthropicToClaudeCode } from "../provider-migrations.ts" + +function makeAuthStorage(credentials: unknown[]) { + return { + getCredentialsForProvider(provider: string) { + return provider === "anthropic" ? credentials : [] + }, + } +} + +test("hasDirectAnthropicApiKey detects non-empty auth storage keys", () => { + assert.equal( + hasDirectAnthropicApiKey( + makeAuthStorage([{ type: "api_key", key: "sk-ant-test" }]) as any, + {} as NodeJS.ProcessEnv, + ), + true, + ) +}) + +test("hasDirectAnthropicApiKey ignores empty placeholder keys", () => { + assert.equal( + hasDirectAnthropicApiKey( + makeAuthStorage([{ type: "api_key", key: "" }]) as any, + {} as NodeJS.ProcessEnv, + ), + false, + ) +}) + +test("hasDirectAnthropicApiKey detects ANTHROPIC_API_KEY env fallback", () => { + assert.equal( + hasDirectAnthropicApiKey( + makeAuthStorage([]) as any, + { ANTHROPIC_API_KEY: "sk-ant-env" } as NodeJS.ProcessEnv, + ), + true, + ) +}) + +test("shouldMigrateAnthropicToClaudeCode blocks migration for direct-key users", () => { + assert.equal( + shouldMigrateAnthropicToClaudeCode({ + authStorage: makeAuthStorage([{ type: "api_key", key: "sk-ant-test" }]) as any, + isClaudeCodeReady: true, + defaultProvider: "anthropic", + env: {} as NodeJS.ProcessEnv, + }), + false, + ) +}) + +test("shouldMigrateAnthropicToClaudeCode allows OAuth-only anthropic users", () => { + assert.equal( + shouldMigrateAnthropicToClaudeCode({ + authStorage: makeAuthStorage([{ type: "oauth" }]) as any, + isClaudeCodeReady: true, + defaultProvider: "anthropic", + env: {} as NodeJS.ProcessEnv, + }), + true, + ) +}) + +test("shouldMigrateAnthropicToClaudeCode stays off for other providers", () => { + assert.equal( + shouldMigrateAnthropicToClaudeCode({ + authStorage: makeAuthStorage([{ type: "oauth" }]) as any, + isClaudeCodeReady: true, + defaultProvider: "openai", + env: {} as NodeJS.ProcessEnv, + }), + false, + ) +}) diff --git a/src/tests/startup-model-validation.test.ts b/src/tests/startup-model-validation.test.ts index 01b43c98c..4e449fe13 100644 --- a/src/tests/startup-model-validation.test.ts +++ b/src/tests/startup-model-validation.test.ts @@ -123,4 +123,31 @@ describe("validateConfiguredModel — regression #3534", () => { assert.ok(settings._provider); assert.ok(settings._model); }); + + it("falls back when configured model exists in registry but provider has no auth", () => { + // Simulate: user configured xai/grok-4 but XAI_API_KEY is unset, so + // xai is in getAll() but not getAvailable(). Previously this slipped + // through configuredExists and left an unusable default in place. + const allModels = [ + { provider: "xai", id: "grok-4-fast-non-reasoning" }, + { provider: "anthropic", id: "claude-opus-4-6" }, + ]; + const availableModels = [ + { provider: "anthropic", id: "claude-opus-4-6" }, + ]; + const registry = createMockRegistry(allModels, availableModels); + const settings = createMockSettings({ + provider: "xai", + model: "grok-4-fast-non-reasoning", + thinking: "high", + }); + + validateConfiguredModel(registry, settings); + + // Should have replaced with an authenticated fallback + assert.equal(settings._provider, "anthropic"); + assert.equal(settings._model, "claude-opus-4-6"); + // Thinking level resets because the original model was replaced + assert.equal(settings._thinking, "off"); + }); }); diff --git a/src/tests/update-check.test.ts b/src/tests/update-check.test.ts index caa712533..40d2c5f28 100644 --- a/src/tests/update-check.test.ts +++ b/src/tests/update-check.test.ts @@ -5,7 +5,7 @@ import { join } from 'node:path' import { tmpdir } from 'node:os' import { createServer } from 'node:http' -import { compareSemver, readUpdateCache, writeUpdateCache, checkForUpdates } from '../update-check.js' +import { compareSemver, readUpdateCache, writeUpdateCache, checkForUpdates, fetchLatestVersionFromRegistry } from '../update-check.js' // --------------------------------------------------------------------------- // compareSemver @@ -315,3 +315,23 @@ test('checkForUpdates handles missing version field in response', async (t) => { assert.ok(!called, 'onUpdate should not be called when response has no version') }) + +test('fetchLatestVersionFromRegistry returns the registry version string', async (t) => { + const registry = await startMockRegistry({ version: '2.67.0' }) + t.after(async () => { + await registry.close() + }) + + const latest = await fetchLatestVersionFromRegistry(registry.url, 5000) + assert.equal(latest, '2.67.0') +}) + +test('fetchLatestVersionFromRegistry returns null for blank version strings', async (t) => { + const registry = await startMockRegistry({ version: '' }) + t.after(async () => { + await registry.close() + }) + + const latest = await fetchLatestVersionFromRegistry(registry.url, 5000) + assert.equal(latest, null) +}) diff --git a/src/tests/update-cmd-diagnostics.test.ts b/src/tests/update-cmd-diagnostics.test.ts index 71fff7b36..8f3c5c088 100644 --- a/src/tests/update-cmd-diagnostics.test.ts +++ b/src/tests/update-cmd-diagnostics.test.ts @@ -18,10 +18,17 @@ test("update-cmd prints latest version before comparison (#3445)", () => { assert.ok(latestPrintIdx < comparisonIdx, "Must print latest BEFORE comparison"); }); -test("update-cmd bypasses npm cache (#3445)", () => { +test("update commands use the registry fetch helper instead of npm view (#3806)", () => { const src = readFileSync(join(__dirname, "..", "update-cmd.ts"), "utf-8"); + const handlerSrc = readFileSync(join(__dirname, "..", "resources", "extensions", "gsd", "commands-handlers.ts"), "utf-8"); assert.ok( - src.includes("npm_config_cache"), - "Must clear npm cache env to bypass stale registry data", + src.includes("fetchLatestVersionFromRegistry"), + "update-cmd should use the shared registry fetch helper", ); + assert.ok(!src.includes("npm view "), "update-cmd should no longer shell out to npm view"); + assert.ok( + handlerSrc.includes("fetchLatestVersionForCommand"), + "/gsd update should fetch the latest version through a registry helper too", + ); + assert.ok(!handlerSrc.includes("npm view "), "/gsd update should no longer shell out to npm view"); }); diff --git a/src/tests/windows-portability.test.ts b/src/tests/windows-portability.test.ts new file mode 100644 index 000000000..30dbde0e5 --- /dev/null +++ b/src/tests/windows-portability.test.ts @@ -0,0 +1,78 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { resolveLocalBinaryPath } from "../../packages/pi-coding-agent/src/core/lsp/config.ts"; +import { encodeCwd } from "../resources/extensions/subagent/isolation.ts"; + +function makeTempDir(prefix: string): string { + const dir = path.join( + os.tmpdir(), + `gsd-windows-portability-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, + ); + mkdirSync(dir, { recursive: true }); + return dir; +} + +test("resolveLocalBinaryPath finds Windows npm shims", () => { + const dir = makeTempDir("lsp-shim"); + try { + writeFileSync(path.join(dir, "package.json"), "{}"); + mkdirSync(path.join(dir, "node_modules", ".bin"), { recursive: true }); + const shimPath = path.join(dir, "node_modules", ".bin", "tsc.cmd"); + writeFileSync(shimPath, "@echo off\r\n"); + + const resolved = resolveLocalBinaryPath("tsc", dir, true); + assert.equal(resolved, shimPath); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("resolveLocalBinaryPath finds Windows venv Scripts executables", () => { + const dir = makeTempDir("lsp-scripts"); + try { + writeFileSync(path.join(dir, "pyproject.toml"), ""); + mkdirSync(path.join(dir, "venv", "Scripts"), { recursive: true }); + const exePath = path.join(dir, "venv", "Scripts", "python.exe"); + writeFileSync(exePath, ""); + + const resolved = resolveLocalBinaryPath("python", dir, true); + assert.equal(resolved, exePath); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("encodeCwd produces a filesystem-safe token for Windows paths", () => { + const encoded = encodeCwd("C:\\Users\\Alice\\repo"); + assert.match(encoded, /^[A-Za-z0-9_-]+$/); + assert.ok(!encoded.includes(":")); + assert.ok(!encoded.includes("\\")); + assert.ok(!encoded.includes("/")); +}); + +test("Windows launch points use shell-safe shims", () => { + const gsdClient = readFileSync( + path.join(process.cwd(), "vscode-extension", "src", "gsd-client.ts"), + "utf8", + ); + const updateService = readFileSync( + path.join(process.cwd(), "src", "web", "update-service.ts"), + "utf8", + ); + const preExecution = readFileSync( + path.join(process.cwd(), "src", "resources", "extensions", "gsd", "pre-execution-checks.ts"), + "utf8", + ); + const validatePack = readFileSync( + path.join(process.cwd(), "scripts", "validate-pack.js"), + "utf8", + ); + + assert.match(gsdClient, /shell:\s*process\.platform === "win32"/); + assert.match(updateService, /npm\.cmd/); + assert.match(preExecution, /npm\.cmd/); + assert.match(validatePack, /shell:\s*process\.platform === 'win32'/); +}); diff --git a/src/update-check.ts b/src/update-check.ts index 784eeb900..d560c318b 100644 --- a/src/update-check.ts +++ b/src/update-check.ts @@ -8,6 +8,7 @@ const CACHE_FILE = join(appRoot, '.update-check') const NPM_PACKAGE_NAME = 'gsd-pi' const CHECK_INTERVAL_MS = 24 * 60 * 60 * 1000 // 24 hours const FETCH_TIMEOUT_MS = 5000 +const DEFAULT_REGISTRY_URL = `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` interface UpdateCheckCache { lastCheck: number @@ -47,6 +48,32 @@ export function writeUpdateCache(cache: UpdateCheckCache, cachePath: string = CA } } +function normalizeLatestVersion(version: unknown): string | null { + if (typeof version !== 'string') return null + const trimmed = version.trim().replace(/^v/, '') + return trimmed.length > 0 ? trimmed : null +} + +export async function fetchLatestVersionFromRegistry( + registryUrl: string = DEFAULT_REGISTRY_URL, + fetchTimeoutMs: number = FETCH_TIMEOUT_MS, +): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs) + + try { + const res = await fetch(registryUrl, { signal: controller.signal }) + if (!res.ok) return null + + const data = (await res.json()) as { version?: string } + return normalizeLatestVersion(data.version) + } catch { + return null + } finally { + clearTimeout(timeout) + } +} + function printUpdateBanner(current: string, latest: string): void { process.stderr.write( ` ${chalk.yellow('Update available:')} ${chalk.dim(`v${current}`)} → ${chalk.bold(`v${latest}`)}\n` + @@ -70,7 +97,7 @@ export interface UpdateCheckOptions { export async function checkForUpdates(options: UpdateCheckOptions = {}): Promise { const currentVersion = options.currentVersion || process.env.GSD_VERSION || '0.0.0' const cachePath = options.cachePath || CACHE_FILE - const registryUrl = options.registryUrl || `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` + const registryUrl = options.registryUrl || DEFAULT_REGISTRY_URL const checkIntervalMs = options.checkIntervalMs ?? CHECK_INTERVAL_MS const fetchTimeoutMs = options.fetchTimeoutMs ?? FETCH_TIMEOUT_MS const onUpdate = options.onUpdate || printUpdateBanner @@ -84,18 +111,8 @@ export async function checkForUpdates(options: UpdateCheckOptions = {}): Promise return } - // Fetch latest version from npm registry - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs) - try { - const res = await fetch(registryUrl, { signal: controller.signal }) - clearTimeout(timeout) - - if (!res.ok) return - - const data = (await res.json()) as { version?: string } - const latestVersion = data.version + const latestVersion = await fetchLatestVersionFromRegistry(registryUrl, fetchTimeoutMs) if (!latestVersion) return writeUpdateCache({ lastCheck: Date.now(), latestVersion }, cachePath) @@ -105,8 +122,6 @@ export async function checkForUpdates(options: UpdateCheckOptions = {}): Promise } } catch { // Network error or timeout — silently ignore, don't block startup - } finally { - clearTimeout(timeout) } } @@ -123,7 +138,7 @@ const PROMPT_TIMEOUT_MS = 30_000 export async function checkAndPromptForUpdates(options: UpdateCheckOptions = {}): Promise { const currentVersion = options.currentVersion || process.env.GSD_VERSION || '0.0.0' const cachePath = options.cachePath || CACHE_FILE - const registryUrl = options.registryUrl || `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` + const registryUrl = options.registryUrl || DEFAULT_REGISTRY_URL const checkIntervalMs = options.checkIntervalMs ?? CHECK_INTERVAL_MS const fetchTimeoutMs = options.fetchTimeoutMs ?? FETCH_TIMEOUT_MS @@ -134,22 +149,13 @@ export async function checkAndPromptForUpdates(options: UpdateCheckOptions = {}) if (cache && Date.now() - cache.lastCheck < checkIntervalMs) { latestVersion = cache.latestVersion } else { - const controller = new AbortController() - const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs) try { - const res = await fetch(registryUrl, { signal: controller.signal }) - clearTimeout(timeout) - if (res.ok) { - const data = (await res.json()) as { version?: string } - if (data.version) { - latestVersion = data.version - writeUpdateCache({ lastCheck: Date.now(), latestVersion }, cachePath) - } + latestVersion = await fetchLatestVersionFromRegistry(registryUrl, fetchTimeoutMs) + if (latestVersion) { + writeUpdateCache({ lastCheck: Date.now(), latestVersion }, cachePath) } } catch { // Network unavailable — silently skip - } finally { - clearTimeout(timeout) } } diff --git a/src/update-cmd.ts b/src/update-cmd.ts index 9534fd9f6..18dcd0c48 100644 --- a/src/update-cmd.ts +++ b/src/update-cmd.ts @@ -1,5 +1,5 @@ import { execSync } from 'node:child_process' -import { compareSemver } from './update-check.js' +import { compareSemver, fetchLatestVersionFromRegistry } from './update-check.js' const NPM_PACKAGE = 'gsd-pi' @@ -14,15 +14,8 @@ export async function runUpdate(): Promise { process.stdout.write(`${dim}Current version:${reset} v${current}\n`) process.stdout.write(`${dim}Checking npm registry...${reset}\n`) - // Fetch latest version — bypass npm client cache to avoid stale results (#3445) - let latest: string - try { - latest = execSync(`npm view ${NPM_PACKAGE} version --fetch-retry-mintimeout=3000`, { - encoding: 'utf-8', - stdio: ['ignore', 'pipe', 'ignore'], - env: { ...process.env, npm_config_cache: '' }, - }).trim() - } catch { + const latest = await fetchLatestVersionFromRegistry() + if (!latest) { process.stderr.write(`${yellow}Failed to reach npm registry.${reset}\n`) process.exit(1) } diff --git a/src/web-mode.ts b/src/web-mode.ts index 665e0f5a8..3d917431c 100644 --- a/src/web-mode.ts +++ b/src/web-mode.ts @@ -353,6 +353,10 @@ function getSpawnCommandForSourceHost(platform: NodeJS.Platform): string { return platform === 'win32' ? 'npm.cmd' : 'npm' } +function needsWindowsShell(command: string, platform: NodeJS.Platform): boolean { + return platform === 'win32' && /\.(cmd|bat)$/i.test(command) +} + function formatLaunchStatus(status: WebModeLaunchStatus): string { if (status.ok) { return `[gsd] Web mode startup: status=started cwd=${status.cwd} port=${status.port} host=${status.hostPath} kind=${status.hostKind} url=${status.url}\n` @@ -636,6 +640,7 @@ export async function launchWebMode( detached: true, stdio: 'ignore', windowsHide: true, + shell: needsWindowsShell(spawnSpec.command, deps.platform ?? process.platform), env, }, ) diff --git a/src/web/onboarding-service.ts b/src/web/onboarding-service.ts index 259865da5..764949c58 100644 --- a/src/web/onboarding-service.ts +++ b/src/web/onboarding-service.ts @@ -142,7 +142,7 @@ type ProviderFlowRuntime = { }; const REQUIRED_PROVIDER_CATALOG: RequiredProviderCatalogEntry[] = [ - { id: "anthropic", label: "Anthropic (Claude)", supportsApiKey: true, supportsOAuth: true, recommended: true }, + { id: "anthropic", label: "Anthropic (Claude)", supportsApiKey: true, supportsOAuth: false, recommended: true }, { id: "openai", label: "OpenAI", supportsApiKey: true, supportsOAuth: false }, { id: "github-copilot", label: "GitHub Copilot", supportsApiKey: false, supportsOAuth: true }, { id: "openai-codex", label: "ChatGPT Plus/Pro (Codex Subscription)", supportsApiKey: false, supportsOAuth: true }, diff --git a/src/web/update-service.ts b/src/web/update-service.ts index 62c728161..f7c8d185c 100644 --- a/src/web/update-service.ts +++ b/src/web/update-service.ts @@ -4,6 +4,7 @@ import { compareSemver } from "../update-check.ts" const NPM_PACKAGE_NAME = "gsd-pi" const REGISTRY_URL = `https://registry.npmjs.org/${NPM_PACKAGE_NAME}/latest` const FETCH_TIMEOUT_MS = 5000 +const NPM_COMMAND = process.platform === "win32" ? "npm.cmd" : "npm" // --- Version check --- @@ -69,11 +70,13 @@ export function triggerUpdate(targetVersion?: string): boolean { updateState = { status: "running", targetVersion } - const child = spawn("npm", ["install", "-g", "gsd-pi@latest"], { + const child = spawn(NPM_COMMAND, ["install", "-g", "gsd-pi@latest"], { stdio: ["ignore", "ignore", "pipe"], // Detach so the child process is not killed if the parent exits detached: false, windowsHide: true, + // Avoid shell: true — npm.cmd is directly executable on Windows via spawn. + // Using shell expands the command injection surface unnecessarily. }) let stderr = "" diff --git a/vscode-extension/src/gsd-client.ts b/vscode-extension/src/gsd-client.ts index b2a872c5e..ef6d65978 100644 --- a/vscode-extension/src/gsd-client.ts +++ b/vscode-extension/src/gsd-client.ts @@ -127,6 +127,7 @@ export class GsdClient implements vscode.Disposable { cwd: this.cwd, stdio: ["pipe", "pipe", "pipe"], env: { ...process.env }, + shell: process.platform === "win32", }); this.process = proc; diff --git a/web/app/api/shutdown/route.ts b/web/app/api/shutdown/route.ts index 348044c85..9921534ad 100644 --- a/web/app/api/shutdown/route.ts +++ b/web/app/api/shutdown/route.ts @@ -1,9 +1,14 @@ import { scheduleShutdown } from "../../../lib/shutdown-gate"; +import { verifyAuthToken } from "../../../lib/auth-guard"; export const runtime = "nodejs" export const dynamic = "force-dynamic" -export async function POST(): Promise { +export async function POST(request: Request): Promise { + // Defense-in-depth: verify auth token even though middleware should catch it. + const authError = verifyAuthToken(request); + if (authError) return authError; + // Schedule a deferred shutdown instead of exiting immediately. // This gives the client a window to cancel the exit on page refresh — // the boot route calls cancelShutdown() when it receives the next request. diff --git a/web/app/api/update/route.ts b/web/app/api/update/route.ts index f0d13c9dd..737790162 100644 --- a/web/app/api/update/route.ts +++ b/web/app/api/update/route.ts @@ -3,11 +3,15 @@ import { getUpdateStatus, triggerUpdate, } from "../../../../src/web/update-service.ts" +import { verifyAuthToken } from "../../../lib/auth-guard"; export const runtime = "nodejs" export const dynamic = "force-dynamic" -export async function GET(): Promise { +export async function GET(request: Request): Promise { + // Defense-in-depth: verify auth token even though middleware should catch it. + const authError = verifyAuthToken(request); + if (authError) return authError; try { const versionInfo = await checkForUpdate() const { status, error, targetVersion } = getUpdateStatus() @@ -37,7 +41,10 @@ export async function GET(): Promise { } } -export async function POST(): Promise { +export async function POST(request: Request): Promise { + // Defense-in-depth: verify auth token even though middleware should catch it. + const authError = verifyAuthToken(request); + if (authError) return authError; try { const versionInfo = await checkForUpdate() const started = triggerUpdate(versionInfo.latestVersion) diff --git a/web/lib/auth-guard.ts b/web/lib/auth-guard.ts new file mode 100644 index 000000000..d05da6e8c --- /dev/null +++ b/web/lib/auth-guard.ts @@ -0,0 +1,47 @@ +// GSD Web — Inline auth token verification for sensitive API routes +// Copyright (c) 2026 Jeremy McSpadden + +/** + * Defense-in-depth auth check for critical API routes (shutdown, update, etc.). + * + * The primary auth gate is Next.js middleware (web/middleware.ts). This helper + * provides a second layer so that even if middleware is misconfigured or + * bypassed, sensitive endpoints still reject unauthenticated requests. + * + * Returns a 401 Response if the token is missing or invalid, or null if auth + * passes (or no token is configured). + */ +export function verifyAuthToken(request: Request): Response | null { + const expectedToken = process.env.GSD_WEB_AUTH_TOKEN + if (!expectedToken) { + // No token configured (e.g. dev mode) — allow through + return null + } + + let token: string | null = null + + // 1. Authorization header (preferred) + const authHeader = request.headers.get("authorization") + if (authHeader?.startsWith("Bearer ")) { + token = authHeader.slice(7) + } + + // 2. Query parameter fallback for EventSource / sendBeacon + if (!token) { + try { + const url = new URL(request.url) + token = url.searchParams.get("_token") + } catch { + // Malformed URL — reject + } + } + + if (!token || token !== expectedToken) { + return Response.json( + { error: "Unauthorized" }, + { status: 401 }, + ) + } + + return null +} diff --git a/web/lib/gsd-workspace-store.tsx b/web/lib/gsd-workspace-store.tsx index de80f47bd..adee496d6 100644 --- a/web/lib/gsd-workspace-store.tsx +++ b/web/lib/gsd-workspace-store.tsx @@ -5134,25 +5134,18 @@ export class GSDWorkspaceStore { } private handleToolExecutionStart(event: ToolExecutionStartEvent): void { - // Finalize any in-flight streaming content into segments before the tool runs - const pendingSegments: TurnSegment[] = [] - if (this.state.streamingThinkingText.length > 0) { - pendingSegments.push({ kind: "thinking", content: this.state.streamingThinkingText }) - } - if (this.state.streamingAssistantText.length > 0) { - pendingSegments.push({ kind: "text", content: this.state.streamingAssistantText }) - } this.patchState({ activeToolExecution: { id: event.toolCallId, name: event.toolName, args: (event as Record).args as Record | undefined, }, - ...(pendingSegments.length > 0 ? { - currentTurnSegments: [...this.state.currentTurnSegments, ...pendingSegments], - streamingAssistantText: "", - streamingThinkingText: "", - } : {}), + // Treat pre-tool streaming text as ephemeral. Claude Code can emit + // provisional assistant text before a tool call, then replace it with + // the real final text after the tool completes. If we finalize that + // interim text here, the chat timeline shows stale text above the tool. + streamingAssistantText: "", + streamingThinkingText: "", }) } diff --git a/web/proxy.ts b/web/middleware.ts similarity index 94% rename from web/proxy.ts rename to web/middleware.ts index de2d6c1bb..97b86b0bf 100644 --- a/web/proxy.ts +++ b/web/middleware.ts @@ -1,7 +1,7 @@ import { NextResponse, type NextRequest } from "next/server" /** - * Next.js proxy — validates bearer token and origin on all API routes. + * Next.js middleware — validates bearer token and origin on all API routes. * * The GSD_WEB_AUTH_TOKEN env var is set at server launch. Every /api/* request * must carry a matching `Authorization: Bearer ` header. EventSource @@ -11,7 +11,7 @@ import { NextResponse, type NextRequest } from "next/server" * Additionally, if an `Origin` header is present, it must match the expected * localhost origin to prevent cross-site request forgery. */ -export function proxy(request: NextRequest): NextResponse | undefined { +export function middleware(request: NextRequest): NextResponse | undefined { const { pathname } = request.nextUrl // Only gate API routes