merge: resolve upstream/main conflicts for PR #3177
This commit is contained in:
commit
4b69e44a42
1004 changed files with 89413 additions and 5192 deletions
62
.github/workflows/ai-triage.yml
vendored
62
.github/workflows/ai-triage.yml
vendored
|
|
@ -96,41 +96,47 @@ jobs:
|
|||
Be generous in your assessment — only flag clear violations. Ambiguous cases should be marked as aligned.
|
||||
Do NOT flag issues/PRs that are legitimately reporting bugs or requesting features, even if they could be better written.`;
|
||||
|
||||
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': process.env.ANTHROPIC_API_KEY,
|
||||
'content-type': 'application/json',
|
||||
'anthropic-version': '2023-06-01'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
max_tokens: 1024,
|
||||
messages: [{ role: 'user', content: prompt }]
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const err = await response.text();
|
||||
core.setFailed(`Anthropic API error: ${response.status} ${err}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const text = data.content[0].text;
|
||||
|
||||
// Extract JSON from response (handle markdown code blocks)
|
||||
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
core.setFailed(`Could not parse Claude response: ${text}`);
|
||||
if (!process.env.ANTHROPIC_API_KEY) {
|
||||
core.warning('Skipping AI triage because ANTHROPIC_API_KEY is not configured.');
|
||||
return;
|
||||
}
|
||||
|
||||
let result;
|
||||
try {
|
||||
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': process.env.ANTHROPIC_API_KEY,
|
||||
'content-type': 'application/json',
|
||||
'anthropic-version': '2023-06-01'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
max_tokens: 1024,
|
||||
messages: [{ role: 'user', content: prompt }]
|
||||
}),
|
||||
signal: AbortSignal.timeout(20000)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const err = await response.text();
|
||||
core.warning(`Skipping AI triage after Anthropic API error: ${response.status} ${err}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const text = data.content?.[0]?.text ?? '';
|
||||
|
||||
// Extract JSON from response (handle markdown code blocks)
|
||||
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
core.warning(`Skipping AI triage because the model response was not parseable JSON: ${text}`);
|
||||
return;
|
||||
}
|
||||
|
||||
result = JSON.parse(jsonMatch[0]);
|
||||
} catch (e) {
|
||||
core.setFailed(`JSON parse error: ${e.message}\nRaw text: ${text}`);
|
||||
core.warning(`Skipping AI triage after unexpected failure: ${e.message}`);
|
||||
return;
|
||||
}
|
||||
core.info(`Triage result: ${JSON.stringify(result, null, 2)}`);
|
||||
|
|
|
|||
13
.github/workflows/ci.yml
vendored
13
.github/workflows/ci.yml
vendored
|
|
@ -155,7 +155,7 @@ jobs:
|
|||
run: npm run test:coverage
|
||||
|
||||
windows-portability:
|
||||
timeout-minutes: 15
|
||||
timeout-minutes: 25
|
||||
needs: detect-changes
|
||||
if: >-
|
||||
needs.detect-changes.outputs.docs-only != 'true'
|
||||
|
|
@ -180,12 +180,17 @@ jobs:
|
|||
- name: Typecheck extensions
|
||||
run: npm run typecheck:extensions
|
||||
|
||||
- name: Run unit tests
|
||||
run: npm run test:unit
|
||||
|
||||
- name: Run package tests
|
||||
run: npm run test:packages
|
||||
|
||||
- name: Run Windows portability tests
|
||||
run: >-
|
||||
node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs
|
||||
--experimental-strip-types --test
|
||||
src/tests/windows-portability.test.ts
|
||||
src/resources/extensions/gsd/tests/validate-directory.test.ts
|
||||
src/tests/integration/web-mode-windows-hide.test.ts
|
||||
|
||||
rtk-portability:
|
||||
timeout-minutes: 20
|
||||
needs: detect-changes
|
||||
|
|
|
|||
17
.gitignore
vendored
17
.gitignore
vendored
|
|
@ -2,6 +2,16 @@
|
|||
# ── Compiled test output ──
|
||||
dist-test/
|
||||
|
||||
# ── Compiled output in src/ (should only contain .ts source) ──
|
||||
src/**/*.js
|
||||
src/**/*.js.map
|
||||
src/**/*.d.ts
|
||||
src/**/*.d.ts.map
|
||||
!src/**/*.test.js
|
||||
|
||||
# ── Repowise index (local machine-generated cache) ──
|
||||
.repowise/
|
||||
|
||||
# ── GSD project state (development-only, lives in worktree branches) ──
|
||||
package-lock.json
|
||||
.claude/
|
||||
|
|
@ -42,6 +52,9 @@ tmp/
|
|||
packages/*/dist/
|
||||
packages/*/node_modules/
|
||||
|
||||
# ── Scratch/WIP files ──
|
||||
preflight-script.ts
|
||||
|
||||
# ── GSD baseline (auto-generated) ──
|
||||
dist/
|
||||
!/pkg/dist/modes/
|
||||
|
|
@ -55,6 +68,7 @@ TODOS.md
|
|||
.planning/
|
||||
.audits/
|
||||
docs/coherence-audit/
|
||||
.plans/
|
||||
|
||||
# ── GSD project state (per-worktree, never committed) ──
|
||||
.gsd/
|
||||
|
|
@ -65,3 +79,6 @@ bun.lock
|
|||
|
||||
# ── GSD baseline (auto-generated) ──
|
||||
.gsd
|
||||
|
||||
# ── GSD baseline (auto-generated) ──
|
||||
.gsd-id
|
||||
|
|
|
|||
14
.mcp.json
Normal file
14
.mcp.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"mcpServers": {
|
||||
"repowise": {
|
||||
"command": "repowise",
|
||||
"args": [
|
||||
"mcp",
|
||||
"/Users/jeremymcspadden/Github/gsd-2",
|
||||
"--transport",
|
||||
"stdio"
|
||||
],
|
||||
"description": "repowise: codebase intelligence \u2014 docs, graph, git signals, dead code, decisions"
|
||||
}
|
||||
}
|
||||
}
|
||||
138
.plans/extension-loading-multi-path.md
Normal file
138
.plans/extension-loading-multi-path.md
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
# Extension Loading: Dependency Sort + Unified Enable/Disable
|
||||
|
||||
## Context
|
||||
|
||||
GSD-2 has a well-structured extension system with three discovery paths (bundled, global/community, project-local) that are **already wired up** through pi's `DefaultPackageManager.addAutoDiscoveredResources()`. However, two critical gaps remain:
|
||||
|
||||
1. `sortExtensionPaths()` (topological dependency sort) is implemented but **never called** — `dependencies.extensions` in manifests is decorative
|
||||
2. The GSD extension registry (enable/disable) only applies to **bundled** extensions — community extensions bypass it entirely
|
||||
|
||||
### Architecture (Current Flow)
|
||||
|
||||
```
|
||||
GSD loader.ts
|
||||
→ discoverExtensionEntryPaths(bundledExtDir)
|
||||
→ filter by GSD registry (isExtensionEnabled)
|
||||
→ set GSD_BUNDLED_EXTENSION_PATHS env var
|
||||
↓
|
||||
DefaultResourceLoader.reload()
|
||||
→ packageManager.resolve()
|
||||
→ addAutoDiscoveredResources()
|
||||
→ project: cwd/.gsd/extensions/ (CONFIG_DIR_NAME = ".gsd")
|
||||
→ global: ~/.gsd/agent/extensions/ (includes synced bundled)
|
||||
→ loadExtensions(mergedPaths) ← NO sort, NO registry check on community
|
||||
```
|
||||
|
||||
### Key Files
|
||||
|
||||
| File | Role |
|
||||
|------|------|
|
||||
| `src/loader.ts` (lines 146-161) | GSD startup — bundled discovery + registry filter |
|
||||
| `src/extension-sort.ts` | Topological sort (Kahn's BFS) — EXISTS but NEVER CALLED |
|
||||
| `src/extension-registry.ts` | Registry I/O, enable/disable, tier checks |
|
||||
| `src/resource-loader.ts` (lines 589-607) | `buildResourceLoader()` — constructs DefaultResourceLoader |
|
||||
| `packages/pi-coding-agent/src/core/resource-loader.ts` (lines 311-395) | `reload()` — merges paths, calls `loadExtensions()` |
|
||||
| `packages/pi-coding-agent/src/core/package-manager.ts` (lines 1585-1700) | `addAutoDiscoveredResources()` — auto-discovers from .gsd/ dirs |
|
||||
| `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) | `discoverAndLoadExtensions()` — DEAD CODE, never invoked |
|
||||
|
||||
---
|
||||
|
||||
## Plan
|
||||
|
||||
### Task 1: Wire topological sort into extension loading
|
||||
|
||||
**What:** Call `sortExtensionPaths()` on the merged extension paths before passing them to `loadExtensions()`.
|
||||
|
||||
**Where:** `packages/pi-coding-agent/src/core/resource-loader.ts` ~line 381-385
|
||||
|
||||
**Before:**
|
||||
```typescript
|
||||
const extensionsResult = await loadExtensions(extensionPaths, this.cwd, this.eventBus);
|
||||
```
|
||||
|
||||
**After:**
|
||||
```typescript
|
||||
import { sortExtensionPaths } from '../../../src/extension-sort.js';
|
||||
|
||||
const { sortedPaths, warnings } = sortExtensionPaths(extensionPaths);
|
||||
for (const w of warnings) {
|
||||
// emit as diagnostic, not hard error
|
||||
}
|
||||
const extensionsResult = await loadExtensions(sortedPaths, this.cwd, this.eventBus);
|
||||
```
|
||||
|
||||
**Consideration:** `sortExtensionPaths` lives in `src/` (GSD side), not in `packages/pi-coding-agent/`. Need to either:
|
||||
- (a) Move it into pi-coding-agent as a shared utility, OR
|
||||
- (b) Import it cross-package (already done for other GSD→pi imports), OR
|
||||
- (c) Call it on the GSD side before paths reach pi — harder since auto-discovered paths are added inside pi's package manager
|
||||
|
||||
Option (a) is cleanest — the sort logic only depends on `readManifestFromEntryPath` which is also in `src/extension-registry.ts` but could be duplicated or shared.
|
||||
|
||||
### Task 2: Apply GSD registry to community extensions
|
||||
|
||||
**What:** When `buildResourceLoader()` in `src/resource-loader.ts` constructs the DefaultResourceLoader, also discover and filter community extensions from `~/.gsd/agent/extensions/` through the GSD registry — same as it already does for `~/.pi/agent/extensions/` paths.
|
||||
|
||||
**Where:** `src/resource-loader.ts` → `buildResourceLoader()` (lines 589-607)
|
||||
|
||||
**Current code already filters pi extensions:**
|
||||
```typescript
|
||||
const piExtensionPaths = discoverExtensionEntryPaths(piExtensionsDir)
|
||||
.filter((entryPath) => !bundledKeys.has(getExtensionKey(entryPath, piExtensionsDir)))
|
||||
.filter((entryPath) => {
|
||||
const manifest = readManifestFromEntryPath(entryPath)
|
||||
if (!manifest) return true
|
||||
return isExtensionEnabled(registry, manifest.id)
|
||||
})
|
||||
```
|
||||
|
||||
**Add similar filtering for community extensions in agentDir:**
|
||||
- Discover extensions in `~/.gsd/agent/extensions/` that are NOT bundled
|
||||
- Filter through `isExtensionEnabled(registry, manifest.id)`
|
||||
- Pass as disabled (via override patterns or pre-filtering) to the resource loader
|
||||
|
||||
**Alternative approach:** Hook into `addAutoDiscoveredResources` or the `addResource` call to check the GSD registry. This might be cleaner since the auto-discovery already happens inside pi's package manager.
|
||||
|
||||
### Task 3: Emit sort warnings as diagnostics
|
||||
|
||||
**What:** Surface dependency warnings (missing deps, cycles) through GSD's diagnostic system so users see them.
|
||||
|
||||
**Where:** Wherever the sort is invoked from Task 1.
|
||||
|
||||
**Format:**
|
||||
```
|
||||
⚠ Extension 'gsd-watch' declares dependency 'gsd' which is not installed — loading anyway
|
||||
⚠ Extensions 'foo' and 'bar' form a dependency cycle — loading in alphabetical order
|
||||
```
|
||||
|
||||
### Task 4: Clean up dead code
|
||||
|
||||
**What:** The `discoverAndLoadExtensions()` function in `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) is exported but never invoked. The project-local trust model inside it (`getUntrustedExtensionPaths`) also never runs.
|
||||
|
||||
**Options:**
|
||||
- (a) Remove it entirely — it's dead
|
||||
- (b) Mark deprecated — in case upstream pi uses it
|
||||
- (c) Leave it — lowest risk
|
||||
|
||||
Recommend (b) for now — add `@deprecated` JSDoc so it doesn't grow new callers.
|
||||
|
||||
### Task 5: Tests
|
||||
|
||||
- **Sort integration test:** Create two extensions where A depends on B. Verify B loads before A after sort.
|
||||
- **Registry community test:** Drop a community extension in `~/.gsd/agent/extensions/`, run `gsd extensions disable <id>`, verify it doesn't load.
|
||||
- **Conflict test:** Same extension ID in project-local and global — verify project-local wins.
|
||||
- **Missing dep test:** Extension declares dependency on non-existent extension — verify warning emitted, extension still loads.
|
||||
- **Cycle test:** Two extensions that depend on each other — verify warning, both load.
|
||||
|
||||
---
|
||||
|
||||
## Follow-up PR (separate)
|
||||
|
||||
**Subagent extension forwarding:** Update `src/resources/extensions/subagent/index.ts` to forward ALL extension paths (not just bundled) to child processes. May need a second env var like `GSD_COMMUNITY_EXTENSION_PATHS` or consolidate into `GSD_EXTENSION_PATHS`.
|
||||
|
||||
---
|
||||
|
||||
## Open Questions
|
||||
|
||||
1. **Where should `sortExtensionPaths` live?** Currently in `src/` (GSD side). Needs to be callable from pi's resource-loader. Options: move to pi, keep and import cross-package, or duplicate.
|
||||
2. **Should community extensions respect the same registry as bundled?** Or should they have their own enable/disable mechanism? Current plan unifies them.
|
||||
3. **Project-local trust:** The TOFU model in the dead `discoverAndLoadExtensions()` never runs. Should `addAutoDiscoveredResources` also gate project-local extensions behind trust? Or is `.gsd/extensions/` in your own project always trusted?
|
||||
241
.plans/ollama-native-provider.md
Normal file
241
.plans/ollama-native-provider.md
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
# Ollama Extension — First-Class Local LLM Support
|
||||
|
||||
## Status: DRAFT — Awaiting approval
|
||||
|
||||
## Problem
|
||||
|
||||
Ollama support in GSD2 currently requires manual `models.json` configuration. Users must:
|
||||
1. Know the OpenAI-compatibility endpoint (`localhost:11434/v1`)
|
||||
2. Manually list every model they want to use
|
||||
3. Set compat flags (`supportsDeveloperRole: false`, etc.)
|
||||
4. Use a dummy API key
|
||||
|
||||
There's an `ollama-cloud` provider for hosted Ollama, and a discovery adapter that can list models, but no first-class **local Ollama** extension that "just works."
|
||||
|
||||
## Goal
|
||||
|
||||
Make Ollama the easiest way to use GSD2 — zero config when Ollama is running locally. All Ollama functionality lives in a single extension: `src/resources/extensions/ollama/`.
|
||||
|
||||
## Architecture
|
||||
|
||||
Everything is a self-contained extension under `src/resources/extensions/ollama/`. The extension:
|
||||
- Auto-detects Ollama on startup via health check
|
||||
- Discovers and registers local models with the model registry
|
||||
- Provides native Ollama API streaming (not OpenAI shim)
|
||||
- Exposes `/ollama` slash commands for model management
|
||||
- Registers an LLM-callable tool for model pull/status
|
||||
|
||||
Minimal core changes — only `KnownProvider` and `KnownApi` type additions in `pi-ai`, and `env-api-keys.ts` for key resolution. Everything else is in the extension.
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/resources/extensions/ollama/
|
||||
├── index.ts # Extension entry — wires everything on session_start
|
||||
├── ollama-client.ts # HTTP client for Ollama REST API (/api/*)
|
||||
├── ollama-discovery.ts # Model discovery + capability detection
|
||||
├── ollama-provider.ts # Native /api/chat streaming provider (registers with pi-ai)
|
||||
├── ollama-commands.ts # /ollama slash commands (status, pull, list, remove, ps)
|
||||
├── ollama-tool.ts # LLM-callable tool for model management
|
||||
├── model-capabilities.ts # Known model capability table (context window, vision, reasoning)
|
||||
└── types.ts # Shared types for Ollama API responses
|
||||
```
|
||||
|
||||
## Scope
|
||||
|
||||
### Phase 1: Auto-Discovery + OpenAI-Compat Routing
|
||||
|
||||
**What:** Extension that auto-detects Ollama, discovers models, registers them using the existing `openai-completions` API provider. Zero config needed.
|
||||
|
||||
**Extension files:**
|
||||
- `ollama/index.ts` — Main entry. On `session_start`:
|
||||
1. Probe `localhost:11434` (or `OLLAMA_HOST`) with 1.5s timeout
|
||||
2. If reachable, discover models via `/api/tags`
|
||||
3. Register discovered models with `ctx.modelRegistry` using correct defaults
|
||||
4. Show status widget if Ollama is detected
|
||||
- `ollama/ollama-client.ts` — Low-level HTTP client:
|
||||
- `isRunning()` — `GET /` health check
|
||||
- `getVersion()` — `GET /api/version`
|
||||
- `listModels()` — `GET /api/tags`
|
||||
- `showModel(name)` — `POST /api/show` (details, template, parameters, size)
|
||||
- `getRunningModels()` — `GET /api/ps` (loaded models, VRAM usage)
|
||||
- `pullModel(name, onProgress)` — `POST /api/pull` (streaming progress)
|
||||
- `deleteModel(name)` — `DELETE /api/delete`
|
||||
- `copyModel(source, dest)` — `POST /api/copy`
|
||||
- Respects `OLLAMA_HOST` env var for non-default endpoints
|
||||
- `ollama/ollama-discovery.ts` — Enhanced model discovery:
|
||||
- Calls `/api/tags` to get model list
|
||||
- Calls `/api/show` per model (batch, cached) to get:
|
||||
- `details.parameter_size` → estimate context window
|
||||
- `details.families` → detect vision (clip), reasoning (deepseek-r1)
|
||||
- `modelfile` → extract default parameters
|
||||
- Returns enriched `DiscoveredModel[]` with proper capabilities
|
||||
- `ollama/model-capabilities.ts` — Known model lookup table:
|
||||
- Maps well-known model families to capabilities
|
||||
- e.g., `llama3.1` → `{ contextWindow: 131072, input: ["text"] }`
|
||||
- e.g., `llava` → `{ contextWindow: 4096, input: ["text", "image"] }`
|
||||
- e.g., `deepseek-r1` → `{ reasoning: true, contextWindow: 131072 }`
|
||||
- e.g., `qwen2.5-coder` → `{ contextWindow: 131072, input: ["text"] }`
|
||||
- Fallback: estimate from parameter count if not in table
|
||||
- `ollama/types.ts` — Ollama API response types
|
||||
|
||||
**Core changes (minimal):**
|
||||
- `packages/pi-ai/src/types.ts` — Add `"ollama"` to `KnownProvider`
|
||||
- `packages/pi-ai/src/env-api-keys.ts` — Add `"ollama"` key resolution (returns `"ollama"` placeholder — no real key needed)
|
||||
- `src/onboarding.ts` — Add `"ollama"` to provider selection list
|
||||
- `src/wizard.ts` — Add `ollama` entry (no key required)
|
||||
|
||||
**Model registration details:**
|
||||
Each discovered model registers as:
|
||||
```typescript
|
||||
{
|
||||
id: "llama3.1:8b", // from /api/tags
|
||||
name: "Llama 3.1 8B", // humanized
|
||||
api: "openai-completions", // uses existing provider
|
||||
provider: "ollama",
|
||||
baseUrl: "http://localhost:11434/v1",
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
reasoning: false, // from capabilities table
|
||||
input: ["text"], // from capabilities table
|
||||
contextWindow: 131072, // from capabilities table or /api/show
|
||||
maxTokens: 16384, // conservative default
|
||||
compat: {
|
||||
supportsDeveloperRole: false,
|
||||
supportsReasoningEffort: false,
|
||||
supportsUsageInStreaming: false,
|
||||
maxTokensField: "max_tokens",
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
- `gsd --list-models` shows all locally-pulled Ollama models automatically
|
||||
- `/model ollama/llama3.1:8b` works without any config file
|
||||
- If Ollama isn't running, extension is silent — no errors, no models listed
|
||||
- `models.json` overrides still work (user config wins over auto-discovery)
|
||||
|
||||
### Phase 2: Native Ollama API Provider (`/api/chat`)
|
||||
|
||||
**What:** A dedicated streaming provider that talks Ollama's native protocol instead of the OpenAI compatibility shim.
|
||||
|
||||
**Extension files:**
|
||||
- `ollama/ollama-provider.ts` — Native `/api/chat` streaming:
|
||||
- Registers `"ollama-chat"` API with `registerApiProvider()`
|
||||
- Implements `stream()` and `streamSimple()`:
|
||||
- Maps GSD `Context` → Ollama messages format
|
||||
- Maps GSD `Tool[]` → Ollama tool format
|
||||
- Streams NDJSON responses, maps back to `AssistantMessage` events
|
||||
- Extracts `<think>` blocks for reasoning models (deepseek-r1, qwq)
|
||||
- Ollama-specific options:
|
||||
- `keep_alive` — control model memory retention (default: "5m")
|
||||
- `num_ctx` — pass through model's context window
|
||||
- `num_predict` — max output tokens
|
||||
- Temperature, top_p, top_k
|
||||
- Response metadata:
|
||||
- `eval_count` / `eval_duration` → tokens/sec in usage stats
|
||||
- `total_duration`, `load_duration` → performance visibility
|
||||
- Vision support: converts image content to base64 for multimodal models
|
||||
|
||||
**Core changes:**
|
||||
- `packages/pi-ai/src/types.ts` — Add `"ollama-chat"` to `KnownApi`
|
||||
|
||||
**Phase 1 models switch to `api: "ollama-chat"` by default.** Users can force OpenAI-compat via `models.json` override if needed.
|
||||
|
||||
**Why native over OpenAI-compat:**
|
||||
- Full `keep_alive` / `num_ctx` control
|
||||
- Better error messages (Ollama-native vs generic OpenAI)
|
||||
- More reliable tool calling on Ollama's native format
|
||||
- Performance metrics in response (tokens/sec)
|
||||
- Foundation for model management commands
|
||||
|
||||
### Phase 3: Local LLM Management UX
|
||||
|
||||
**What:** `/ollama` slash commands and an LLM tool for model management.
|
||||
|
||||
**Extension files:**
|
||||
- `ollama/ollama-commands.ts` — Slash commands registered via `pi.registerCommand()`:
|
||||
- `/ollama` — Status overview:
|
||||
```
|
||||
Ollama v0.5.7 — running (localhost:11434)
|
||||
|
||||
Loaded:
|
||||
llama3.1:8b 4.7 GB VRAM idle 3m
|
||||
|
||||
Available:
|
||||
llama3.1:8b (4.7 GB)
|
||||
qwen2.5-coder:7b (4.4 GB)
|
||||
deepseek-r1:8b (4.9 GB)
|
||||
```
|
||||
- `/ollama pull <model>` — Pull with streaming progress via `ctx.ui.setWidget()`
|
||||
- `/ollama list` — List all local models with sizes and families
|
||||
- `/ollama remove <model>` — Delete a model (with confirmation)
|
||||
- `/ollama ps` — Running models + VRAM usage
|
||||
- `ollama/ollama-tool.ts` — LLM-callable tool registered via `pi.registerTool()`:
|
||||
- `ollama_manage` tool — lets the agent pull/list/check models
|
||||
- Parameters: `{ action: "list" | "pull" | "status" | "ps", model?: string }`
|
||||
- Use case: agent detects it needs a model, pulls it automatically
|
||||
|
||||
**UX Flow:**
|
||||
```
|
||||
$ gsd
|
||||
> /ollama
|
||||
Ollama v0.5.7 — running (localhost:11434)
|
||||
Loaded:
|
||||
llama3.1:8b — 4.7 GB VRAM, idle 3m
|
||||
Available:
|
||||
llama3.1:8b (4.7 GB)
|
||||
qwen2.5-coder:7b (4.4 GB)
|
||||
deepseek-r1:8b (4.9 GB)
|
||||
|
||||
> /ollama pull codestral:22b
|
||||
Pulling codestral:22b...
|
||||
████████████████████████████░░░░ 78% (14.2 GB / 18.1 GB)
|
||||
✓ codestral:22b ready
|
||||
|
||||
> /model ollama/codestral:22b
|
||||
Switched to codestral:22b (local, Ollama)
|
||||
```
|
||||
|
||||
## Implementation Order
|
||||
|
||||
1. **Phase 1** — Auto-discovery with OpenAI-compat routing. Biggest user impact, smallest risk.
|
||||
2. **Phase 3** — Management UX (`/ollama` commands). Valuable even before native API.
|
||||
3. **Phase 2** — Native `/api/chat` provider. Optimization over OpenAI-compat; do last.
|
||||
|
||||
## Core Changes Summary (minimal)
|
||||
|
||||
| File | Change |
|
||||
|------|--------|
|
||||
| `packages/pi-ai/src/types.ts` | Add `"ollama"` to `KnownProvider`, `"ollama-chat"` to `KnownApi` (Phase 2) |
|
||||
| `packages/pi-ai/src/env-api-keys.ts` | Add `"ollama"` → always returns `"ollama"` placeholder |
|
||||
| `src/onboarding.ts` | Add `"ollama"` to provider picker |
|
||||
| `src/wizard.ts` | Add `"ollama"` key mapping (no key required) |
|
||||
|
||||
Everything else lives in `src/resources/extensions/ollama/`.
|
||||
|
||||
## Risks & Mitigations
|
||||
|
||||
| Risk | Mitigation |
|
||||
|------|------------|
|
||||
| Ollama not running — startup probe latency | 1.5s timeout; cache result; probe async so it doesn't block TUI paint |
|
||||
| Model capabilities unknown | Known-model table + `/api/show` fallback + parameter_size estimation |
|
||||
| Tool calling unreliable on small models | Detect param count; warn on <7B models |
|
||||
| Ollama API changes between versions | Version detect via `/api/version`; stable endpoints only |
|
||||
| Conflicts with `models.json` Ollama config | User config always wins; auto-discovered models merge beneath manual config |
|
||||
| Extension disabled — no impact on core | Extension is additive; disabling removes all Ollama features cleanly |
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
- Unit tests: `ollama-client.ts` with mocked fetch responses
|
||||
- Unit tests: `ollama-discovery.ts` model capability parsing
|
||||
- Unit tests: `ollama-provider.ts` message format mapping + NDJSON stream parsing
|
||||
- Unit tests: `model-capabilities.ts` known model lookups
|
||||
- Integration test: mock HTTP server simulating Ollama `/api/tags`, `/api/chat`, `/api/pull`
|
||||
- Manual test: real Ollama instance with llama3.1, qwen2.5-coder, deepseek-r1
|
||||
|
||||
## Open Questions
|
||||
|
||||
1. **Startup probe** — Probe Ollama on `session_start` (adds ~1.5s if not running) or lazy on first `/model`? **Recommendation: async probe on session_start (non-blocking), eager if `OLLAMA_HOST` is set.**
|
||||
2. **Auto-start** — Try to launch Ollama if installed but not running? **Recommendation: no — too invasive. Show helpful message in `/ollama` status.**
|
||||
3. **Vision support** — Support multimodal models (llava, etc.) in Phase 2 native API? **Recommendation: yes, detected via capabilities table.**
|
||||
4. **Model refresh** — How often to re-probe Ollama for new models? **Recommendation: on `/ollama list`, on `/model` command, and every 5 min (existing TTL).**
|
||||
2
.prompt-injection-scanignore
Normal file
2
.prompt-injection-scanignore
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
# False positives in GSD prompt templates — these are legitimate LLM instructions, not injection
|
||||
src/resources/extensions/gsd/prompts/doctor-heal.md:You are now responsible
|
||||
605
CHANGELOG.md
605
CHANGELOG.md
|
|
@ -6,6 +6,592 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [2.71.0] - 2026-04-11
|
||||
|
||||
### Added
|
||||
- **mcp-server**: add secure_env_collect tool via MCP form elicitation
|
||||
|
||||
### Fixed
|
||||
- **tui**: clear pinned output on message_end to prevent duplicate display
|
||||
- **tui**: clear pinned latest output on turn completion
|
||||
- **tui**: restore pinned output above editor during tool execution
|
||||
- TOCTOU file locking race conditions in event log and custom workflow graph
|
||||
- **tui**: mask secure extension input values in interactive mode
|
||||
- **claude-code**: harden MCP elicitation schema handling
|
||||
- **claude-code**: accept secure_env_collect MCP elicitation forms
|
||||
- **interactive**: keep MCP tool output ordered and restore secure prompt fallback
|
||||
- **interactive**: preserve MCP tool output stream ordering
|
||||
- **gsd**: resolve workflow MCP test typing regressions
|
||||
- **mcp**: return isError flag on workflow tool execution failures
|
||||
- **discuss**: add structuredQuestionsAvailable conditional to all gates
|
||||
- **discuss**: add multi-round questioning to new-project discuss phase
|
||||
- **gsd**: harden claude-code workflow MCP bootstrap
|
||||
- **web**: drop provisional pre-tool question text
|
||||
|
||||
### Changed
|
||||
- extract deriveStateFromDb logic into composable helpers
|
||||
- **pr**: drop web-layer changes from MCP stream-order fix
|
||||
|
||||
## [2.70.1] - 2026-04-11
|
||||
|
||||
### Fixed
|
||||
- **routing**: address codex review — complete interactive bypass and accurate banner
|
||||
- **routing**: skip dynamic routing for interactive dispatches, always show model changes (#3962)
|
||||
- **ci**: trim windows portability integration load
|
||||
- **ci**: narrow windows portability coverage
|
||||
- **ci**: skip validate-pack in windows portability job
|
||||
- **ci**: unblock windows portability follow-up
|
||||
- **windows**: harden portability across runtime and tooling
|
||||
- **auto**: use pathToFileURL for cross-platform import and reconcile regression test
|
||||
- **auto**: resolve resource-loader.js from GSD_PKG_ROOT on resume (#3949)
|
||||
- **mcp-server**: importLocalModule resolves src/ paths from dist/ context
|
||||
- **gsd**: surface scoped doctor health warnings
|
||||
- **gsd**: skip skipped slices in milestone prompts
|
||||
- **gsd**: handle doubled-backtick pre-exec paths
|
||||
- **update**: fetch latest version from registry
|
||||
|
||||
## [2.70.0] - 2026-04-10
|
||||
|
||||
### Added
|
||||
- **mcp-server**: expose ask_user_questions via elicitation
|
||||
|
||||
### Fixed
|
||||
- **pi-ai**: remove Anthropic OAuth flow for TOS compliance
|
||||
- **mcp-server**: hydrate model credentials into env
|
||||
- **mcp-server**: hydrate stored tool credentials on startup
|
||||
- **gsd**: auto-enable cmux when detected instead of prompting
|
||||
- **mcp-server**: URL scheme regex no longer matches Windows drive letters
|
||||
|
||||
## [2.69.0] - 2026-04-10
|
||||
|
||||
### Added
|
||||
- **gsd**: implement ADR-005 multi-model provider and tool strategy
|
||||
- **gsd**: complete ADR-004 capability-aware model routing implementation
|
||||
|
||||
### Fixed
|
||||
- **gsd**: add missing directories to codebase generator exclude list
|
||||
- **gsd**: wire ADR-005 infrastructure into live paths
|
||||
- **gsd**: replace empty catch with logWarning for CI compliance
|
||||
- **gsd**: merge enhanced context sections into standard template, clean up stale gate patterns
|
||||
- **gsd**: remove broken discuss-prepared template, inject briefs into discuss.md
|
||||
|
||||
## [2.68.1] - 2026-04-10
|
||||
|
||||
### Fixed
|
||||
- **ci**: update FILE-SYSTEM-MAP.md path after docs reorganization
|
||||
- **test**: update discord invite test path after docs reorganization
|
||||
- **gsd**: resolve resource-loader import for deployed extensions
|
||||
|
||||
## [2.68.0] - 2026-04-10
|
||||
|
||||
### Added
|
||||
- expose slice replanning over workflow MCP
|
||||
- expose milestone workflow tools over MCP
|
||||
- expose slice completion over workflow MCP
|
||||
- expose task completion alias over workflow MCP
|
||||
- expose GSD planning tools over MCP
|
||||
- gate workflow MCP units by provider transport capabilities
|
||||
- expose core GSD workflow tools over MCP
|
||||
- add contextual tips system for TUI and web terminal
|
||||
|
||||
### Fixed
|
||||
- **state**: prevent false degraded-mode warning when DB not yet initialized
|
||||
- **gsd**: use debugLog in catch block to satisfy empty-catch lint
|
||||
- **gsd**: avoid false manifest and skipped-slice warnings
|
||||
- **gsd**: replace empty catch block with descriptive comment
|
||||
- guard autoCommitDirtyState and restore cwd on MergeConflictError (#2929)
|
||||
- Claude Code MCP tool output rendering and real-time streaming
|
||||
- **gsd**: surface warnings when DB or STATE.md init fails
|
||||
- **gsd**: create gsd.db, runtime/, and STATE.md during init (#3880)
|
||||
- **gsd**: suppress workflow stderr during /gsd
|
||||
- **gsd**: enforce workflow write gates over MCP
|
||||
- restore autoStartTime on resume + replace empty catch blocks (#3585)
|
||||
- **mcp**: harden workflow tool boundary
|
||||
- **gsd**: accept em-dash none verification rationale
|
||||
- **gsd**: resync managed resources on auto resume
|
||||
- **gsd**: stop stale forensics context hijacks
|
||||
- **gsd**: serialize workflow MCP execution state
|
||||
- **gsd**: restore milestone status db preflight
|
||||
- **claude-code-cli**: suppress streamed internal tool noise
|
||||
- **gsd**: skip same-path planning artifact copies
|
||||
- **claude-code-cli**: suppress internal tool call noise
|
||||
- **pi-coding-agent**: avoid oauth login for api-key providers
|
||||
- **gsd**: snapshot new untracked files before dispatch
|
||||
- **platform**: harden command execution and stabilize onboarding sync
|
||||
- **pi-ai**: restore event stream factory export
|
||||
- **gsd**: use valid codebase refresh logger
|
||||
- **gsd**: auto-refresh codebase cache
|
||||
- **gsd**: align model switching and prefs surfaces
|
||||
- route slice and validation artifacts through DB tools
|
||||
- make gsd_complete_task the only execute-task summary path
|
||||
- **docs**: stop pointing repo documentation to gsd.build
|
||||
- add activeEngineId and activeRunDir to PausedSessionMetadata interface
|
||||
- **gsd**: address QA round 4
|
||||
- **gsd**: address QA round 3
|
||||
- **gsd**: address QA round 2
|
||||
- **gsd**: address QA round 1
|
||||
- **gsd**: address review feedback from trek-e
|
||||
- **gsd**: assess recovery from paused worktree state
|
||||
- **gsd**: satisfy extension typecheck for interrupted recovery
|
||||
- **gsd**: restore hook dispatch export and guided flow imports
|
||||
- **gsd**: clear stale paused metadata in guided flow
|
||||
- **gsd**: preserve interrupted-session resume mode
|
||||
- preserve explicit interrupted-session resume mode
|
||||
- preserve step-mode and suppress stale paused resumes
|
||||
- suppress stale interrupted-session resume prompts
|
||||
|
||||
### Changed
|
||||
- harden workflow MCP executor loading
|
||||
- **ci**: add weekly workflow to regenerate model registry
|
||||
- **deps**: refresh audited package locks
|
||||
|
||||
## [2.67.0] - 2026-04-09
|
||||
|
||||
### Added
|
||||
- **context**: implement R005 decision scope cascade and derive scope from slice metadata
|
||||
- **M005**: Tiered Context Injection - relevance-scoped context with 65%+ reduction
|
||||
|
||||
### Fixed
|
||||
- **test**: align auto-loop test timers with updated session timeout
|
||||
- **gsd**: repair CI after branch split
|
||||
- **gsd**: repair CI after branch split
|
||||
- **gsd**: repair CI after branch split
|
||||
- **gsd**: fail closed for discussion gate enforcement
|
||||
- **gsd**: harden auto merge recovery and session safety
|
||||
- **gsd**: repair overlay, shortcut, and widget surfaces
|
||||
- **gsd**: prevent stale workflow reconcile state writes
|
||||
- **gsd**: align prompt contracts and validation flow
|
||||
- **pi-tui**: harden input parsing and editor focus behavior
|
||||
- **remote-questions**: cancel local TUI when remote answer wins the race
|
||||
- **auto**: increase session timeout to 120s and treat timeout as recoverable pause (#3767)
|
||||
- **ui**: apply anthropic-api display name to all model/provider UI surfaces
|
||||
- **ui**: display 'anthropic-api' in GSD preferences wizard provider list
|
||||
- **remote-questions**: race local TUI against remote channel instead of remote-only routing
|
||||
- **ui**: display 'anthropic-api' in model selector to distinguish from claude-code
|
||||
- **gates**: add mechanical enforcement for discussion question gates
|
||||
- **prompts**: harden non-bypassable gates and exclude dot-folders from scanning
|
||||
- **gsd**: ignore filename headings in parsePlan
|
||||
- **providers**: match 'out of extra usage' error and respect claude-code provider in model resolution (#3772)
|
||||
- **pi-ai**: recover XML parameters trapped in JSON strings
|
||||
- **retry**: guard claude-code fallback to anthropic provider only
|
||||
- **providers**: route Anthropic subscription users through Claude Code CLI (#3772)
|
||||
- **claude-code**: use native Windows claude lookup
|
||||
- **gsd**: suppress repeated preferences section warnings
|
||||
- **gsd**: normalize described expected output paths
|
||||
- **auto**: resilient transient error recovery — defer to Core RetryHandler and fix cmdCtx race
|
||||
|
||||
## [2.66.1] - 2026-04-08
|
||||
|
||||
### Fixed
|
||||
- **pi-tui**: revert contentCursorRow, use hardwareCursorRow as movement baseline
|
||||
- **pi-tui**: use contentCursorRow for render movement baseline instead of cursorRow
|
||||
- **gsd**: add logWarning to empty catch block in orphaned worktree cleanup
|
||||
- **gsd**: add consecutiveFinalizeTimeouts to LoopState in journal tests
|
||||
- **gsd**: add escalation and unit-detach guards to finalize timeout handlers
|
||||
- **gsd**: add timeout guard around postUnitPreVerification to prevent auto-loop hang
|
||||
- **gsd**: OS-specific keyboard shortcut hints via formatShortcut helper
|
||||
- **subagent**: support list-style tools frontmatter
|
||||
- clear autocomplete rows from content bottom
|
||||
- parse annotated pre-exec file paths
|
||||
- **gsd**: add orphaned milestone branch audit at auto-mode bootstrap
|
||||
|
||||
## [2.66.0] - 2026-04-08
|
||||
|
||||
### Added
|
||||
- **gsd**: add fast path for queued milestone discussion
|
||||
- **gsd**: add /gsd show-config command
|
||||
- **reactive**: graph diagnostics and subagent_model config
|
||||
- **dispatch**: parallel research slices and parallel milestone validation
|
||||
- **parallel**: worker model override for parallel milestone workers
|
||||
|
||||
### Fixed
|
||||
- **gsd**: validate depth verification answer before unlocking write-gate
|
||||
- **gsd**: revert unknown artifact check to warn-and-proceed
|
||||
- **gsd**: add missing cmd field to test base WorkflowEvent
|
||||
- **gsd**: address remaining adversarial review findings for wave 3
|
||||
- **gsd**: detect concurrent event log growth during reconcile
|
||||
- **gsd**: address adversarial review findings for wave 3
|
||||
- **gsd**: address adversarial review findings for wave 2
|
||||
- **gsd**: address adversarial review findings for wave 1
|
||||
- **gsd**: WAL-safe migration backup + stronger regression tests
|
||||
- **gsd**: consistency and cleanup (wave 5/5)
|
||||
- **gsd**: write safety — atomic writes and randomized tmp paths (wave 4/5)
|
||||
- **gsd**: session and recovery robustness (wave 3/5)
|
||||
- **gsd**: event log and reconciliation robustness (wave 2/5)
|
||||
- **gsd**: critical state machine data integrity fixes (wave 1/5)
|
||||
- **gsd**: critical state machine data integrity fixes (wave 1/5)
|
||||
- **gsd**: remove ecosystem research stub and address adversarial review
|
||||
- **gsd**: suppress model change notification in auto-mode unless verbose
|
||||
- **gsd**: exclude task.files from checkTaskOrdering to prevent false positives
|
||||
- **state**: skip ghost check for queued milestones in registry build
|
||||
- **ci**: replace empty catch blocks and raw stderr with logWarning
|
||||
- **logging**: add debugLog to empty catch in reopen-milestone
|
||||
- **state-machine**: 9 resilience fixes + 86 regression tests (#3161)
|
||||
- **gsd**: add incremental persistence to discuss prompts
|
||||
- replace empty catch with logWarning for silent-catch-diagnostics test
|
||||
- **test**: escape regex metacharacters in skip-by-preference pattern test
|
||||
- **test**: search for numbered step definitions in prompt ordering test
|
||||
- **test**: update notes loop test for notesVisible guard behavior
|
||||
- **test**: update action count for note captures now included in results
|
||||
- **test**: remove extraneous test file from wrong branch
|
||||
- **test**: update worktree sync tests to use separate milestone IDs
|
||||
- **gsd**: use valid LogComponent type for stale branch guard warning
|
||||
- **test**: update rogue detection test for auto-remediation behavior
|
||||
- **test**: update stuck-planning test to expect executing after reconciliation
|
||||
- **test**: update file path consistency tests for inputs-only checking
|
||||
- **test**: add CONTEXT file to queued milestone ghost detection test
|
||||
- **test**: update needs-remediation test to expect validating-milestone phase
|
||||
- **gsd**: import all-done milestones as complete during DB migration
|
||||
- **gsd**: allow milestone completion when validation skipped by preference
|
||||
- **gsd**: set slice sequence at all three insertion sites
|
||||
- **gsd**: four prompt/runtime fixes for completion and session stability
|
||||
- **gsd**: default insertMilestone status to queued instead of active
|
||||
- **gsd**: suppress repeated frontmatter YAML parse warnings
|
||||
- **gsd**: normalize list inputs in complete-task + fix roadmap dep parsing
|
||||
- **gsd**: open DB before status derivation + respect isolation:none in quick
|
||||
- **gsd**: add .bg-shell/ to baseline gitignore patterns
|
||||
- **tui**: prevent Enter key infinite loop in interview notes mode
|
||||
- **provider**: handle Enter key to initiate auth setup in provider manager
|
||||
- **gsd**: cap run-uat dispatch attempts to prevent infinite replay loop
|
||||
- **mcp**: use createRequire to resolve SDK wildcard subpath imports
|
||||
- **gsd**: mark note captures as executed in executeTriageResolutions
|
||||
- **gsd**: validate main_branch preference exists before using in merge
|
||||
- **gsd**: handle deleted cwd in projectRoot to prevent ENOENT crash
|
||||
- **gsd**: skip current milestone in syncWorktreeStateBack to prevent merge conflicts
|
||||
- **gsd**: add structuredQuestionsAvailable conditional to slice discuss
|
||||
- **gsd**: restore full tool set after discuss flow scoping
|
||||
- **gsd**: tighten verifyExpectedArtifact to prevent rogue-write false positives
|
||||
- **gsd**: add verification gate to complete-slice tool
|
||||
- **gsd**: fix pre-execution-checks false positives from backticks and task.files
|
||||
- **gsd**: stop renderAllProjections from overwriting authoritative PLAN.md
|
||||
- **gsd**: auto-checkout to main when isolation:none finds stale milestone branch
|
||||
- **gsd**: auto-remediate stale slice DB status when SUMMARY exists on disk
|
||||
- **gsd**: open DB on demand in gsd_milestone_status for non-auto sessions
|
||||
- **gsd**: detect phantom milestones from abandoned gsd_milestone_generate_id
|
||||
- **gsd**: force re-validation when verdict is needs-remediation
|
||||
- **gsd**: exclude closed slices from findMissingSummaries check
|
||||
- **gsd**: recover from stale lockfile after crash or SIGKILL
|
||||
- **gsd**: add createdAt timestamp and 30s age guard to staleness check
|
||||
- **gsd**: clear stale pendingAutoStart after /clear interrupts discussion
|
||||
- **gsd**: suppress misleading warnings for expected ENOENT/EISDIR conditions
|
||||
- **gsd**: extract real error from message content when errorMessage is useless
|
||||
- **gsd**: extract real error from message content when errorMessage is useless
|
||||
- **gsd**: show accurate pause message for queued-user-message skip
|
||||
- **gsd**: treat queued-user-message skip as non-retryable interruption
|
||||
- **gsd**: recognize "Not provided." default in isVerificationNotApplicable
|
||||
- **gsd**: discoverManifests skips symlinked extension directories
|
||||
- **gsd**: recognize "Not provided." default in isVerificationNotApplicable
|
||||
- **gsd**: reconcile plan-file tasks into DB when planner skips persistence (#3600)
|
||||
- **gsd**: use isClosedStatus() in dispatch guard instead of raw complete check
|
||||
- **browser-tools**: make sharp an optional lazy dependency
|
||||
- **gsd**: pass required arguments in defer-milestone-stamp test
|
||||
- **gsd**: replace remaining empty catch with logWarning
|
||||
- **gsd**: use logWarning instead of raw stderr in catch blocks
|
||||
- **gsd**: log error instead of empty catch in STATE.md rebuild
|
||||
- **gsd**: log error instead of empty catch in skip_slice
|
||||
- **gsd**: cast milestone classification to string for type safety
|
||||
- **gsd**: treat zero-slice roadmap as pre-planning in guided flow
|
||||
- **gsd**: rebuild STATE.md after skip-slice and strengthen rethink prompt
|
||||
- **gsd**: use main_branch preference in worktree creation
|
||||
- **gsd**: stamp defer and milestone captures as executed after triage
|
||||
- **tui**: treat absolute file paths as plain text, not commands
|
||||
- **tui**: break infinite re-render loop for images in cmux
|
||||
- **gsd**: rebuild STATE.md before guided-flow dispatch
|
||||
- **gsd**: defer queued shells in active milestone selection
|
||||
- **retry**: prevent 429 quota cascade and 30-min lockout
|
||||
- **gsd**: add fastPathInstruction to buildDiscussMilestonePrompt loadPrompt call
|
||||
|
||||
### Changed
|
||||
- auto-commit after quick-task
|
||||
- auto-commit after quick-task
|
||||
- auto-commit after quick-task
|
||||
- auto-commit after quick-task
|
||||
- auto-commit after quick-task
|
||||
- auto-commit after quick-task
|
||||
- auto-commit after quick-task
|
||||
|
||||
## [2.65.0] - 2026-04-07
|
||||
|
||||
### Added
|
||||
- **gsd**: persistent notification panel with TUI overlay, widget, and web API
|
||||
- **gsd**: wire blocking behavior and strict mode for enhanced verification
|
||||
- **gsd**: add post-execution cross-task consistency checks
|
||||
- **gsd**: add pre-execution plan verification checks
|
||||
|
||||
### Fixed
|
||||
- **gsd**: wrap long notification messages and fit overlay to content
|
||||
- **gsd**: remove background color from backdrop, fix message truncation
|
||||
- **gsd**: restore consistent overlay height to prevent ghost artifacts
|
||||
- **gsd**: improve notification overlay backdrop and content-fit sizing
|
||||
- **gsd**: only unlink notification lock when owned, prevent foreign lock deletion
|
||||
- **gsd**: add backdrop dimming and viewport padding to notification overlay
|
||||
- **gsd**: add intent + phase guards to resume context fallback (#3615)
|
||||
- **gsd**: inject task context for unstructured resume prompts (#3615)
|
||||
- **pi-coding-agent**: restore extension tools after session switch (#3616)
|
||||
- **agent-loop**: schema overload cap ignores bash execution errors (#3618)
|
||||
- **bg-shell**: prevent signal handler accumulation + cap alert queue
|
||||
- **gsd**: coerce plain-string provides field to array in complete-slice (#3585)
|
||||
- address PR #3468 review findings
|
||||
- **gsd**: persist autoStartTime across session resume so elapsed timer survives /exit
|
||||
- **gsd**: add enhanced_verification preferences to mergePreferences
|
||||
- **headless**: treat discuss and plan as multi-turn commands
|
||||
|
||||
### Changed
|
||||
- **interactive**: cap rendered chat components + kill orphan descendants
|
||||
- **tui**: render-skip, frame isolation, Text cache guard, dispose
|
||||
|
||||
## [2.64.0] - 2026-04-06
|
||||
|
||||
### Added
|
||||
- **gsd**: add LLM safety harness for auto-mode damage control
|
||||
- **ollama**: native /api/chat provider with full option exposure
|
||||
- **parallel**: slice-level parallelism with dependency-aware dispatch (#3315)
|
||||
- **mcp-client**: add OAuth auth provider for HTTP transport (#3295)
|
||||
|
||||
### Fixed
|
||||
- **ui**: remove 200-column cap on welcome screen width
|
||||
- address adversarial review findings for #3576
|
||||
- **gsd**: replace hardcoded agent skill paths with dynamic resolution (#3575)
|
||||
- **headless**: sync resources and use agent dir for query
|
||||
- **cli**: show latest version and bypass npm cache in update check
|
||||
- **gsd**: follow CONTRIBUTING standards for #3565
|
||||
- **gsd**: address Codex adversarial review findings for #3565
|
||||
- **gsd**: coerce string arrays to objects in complete-slice/task tools (#3565)
|
||||
- **gsd**: harden flat-rate routing guard against alias/resolution gaps
|
||||
- **pi-coding-agent**: register models.json providers and await Ollama probe in headless mode
|
||||
- **ollama**: use apiKey auth mode to avoid streamSimple crash
|
||||
- **gsd**: disable dynamic model routing for flat-rate providers
|
||||
- **gsd**: address Codex adversarial review findings
|
||||
- **gsd**: prevent LLM from querying gsd.db directly via bash (#3541)
|
||||
- **gsd**: seed requirements table from REQUIREMENTS.md on first update
|
||||
- **gsd**: inject S##-CONTEXT.md from slice discussion into all prompt builders
|
||||
- **cli**: guard model re-apply against session restore and async rejection
|
||||
- **pi-coding-agent**: resolve model fallback race that ignores configured provider (#3534)
|
||||
- **detection**: add xcodegen and Xcode bundle support to project detection (#1882)
|
||||
- **perf**: share jiti module cache across extension loads (#3308)
|
||||
- **resource-sync**: prune removed bundled subdirectory extensions on upgrade (#1972)
|
||||
- recognize U+2705 checkmark emoji as completion marker in prose roadmaps (#1897)
|
||||
- **web**: use safePackageRootFromImportUrl for cross-platform package root (#1881) (#1893)
|
||||
- isolate CmuxClient stdio to prevent TUI hangs in CMUX (#3306)
|
||||
- worktree health check walks parent dirs for monorepo support (#3313)
|
||||
- **gsd**: promote milestone status from queued to active in plan-milestone (#3317)
|
||||
- **worktree**: correct merge failure notification command from /complete-milestone to /gsd dispatch complete-milestone (#1901)
|
||||
- detect and block Gemini CLI OAuth tokens used as API keys (#3296)
|
||||
- **auto**: break retry loop on tool invocation errors (malformed JSON) (#3298)
|
||||
- **git**: use git add -u in symlink .gsd fallback to prevent hang (#3299)
|
||||
- handle complete-slice context exhaustion to unblock downstream slices (#3300)
|
||||
- cap consecutive tool validation failures to prevent stuck-loop (#3301)
|
||||
- make enrichment tool params optional for limited-toolcall models (#3302)
|
||||
- add filesystem safety guard to complete-slice.md (#3304)
|
||||
- **extensions**: use bundledExtensionKeys for conflict detection instead of broken path heuristic (#3305)
|
||||
- scope tools during discuss flows to prevent grammar overflow (#3307)
|
||||
- **preferences**: warn on silent parse failure for non-frontmatter files (#3310)
|
||||
- track remote-questions in managed-resources manifest (#3312)
|
||||
- **auto**: add timeout guard for postUnitPostVerification in runFinalize (#3314)
|
||||
- **gsd**: handle large markdown parameters in complete-milestone JSON parsing (#3316)
|
||||
- **metrics**: deduplicate idle-watchdog entries and fix forensics false-positives (#1973)
|
||||
- prevent milestone/slice artifact rendering corruption (#3293)
|
||||
- **doctor**: strip --fix flag before positional parse (#1919) (#1926)
|
||||
- resolve external-state worktree DB path (#2952) (#3303)
|
||||
- **gsd**: worktree teardown path validation prevents data loss (#3311)
|
||||
- prevent auto-mode from dispatching deferred slices (#3309)
|
||||
- preserve completed slice status on plan-milestone re-plan (#3318)
|
||||
- reopen DB on cold resume, recognize heavy check mark (#3319)
|
||||
- dashboard model label shows dispatched model, not stale previous unit (#3320)
|
||||
|
||||
### Changed
|
||||
- **gsd**: remove copyright line from test file
|
||||
- **gsd**: trim promptGuidelines to 1 line to reduce per-turn token cost
|
||||
- **web**: consolidate subprocess boilerplate into shared runner (#1899)
|
||||
|
||||
## [2.63.0] - 2026-04-05
|
||||
|
||||
### Added
|
||||
- **mcp-server**: add 6 read-only tools for project state queries (#3515)
|
||||
|
||||
### Fixed
|
||||
- **gsd**: enrich vague diagnostic messages with root-cause context
|
||||
- **test**: reset dedup cache between ask-user-freetext tests
|
||||
- **db**: delete orphaned WAL/SHM files alongside empty gsd.db (#2478)
|
||||
- **gsd**: prevent auto-wrapup from interrupting in-flight tool calls (#3512)
|
||||
- **gsd**: handle bare model IDs in resolveDefaultSessionModel (#3517)
|
||||
- **gsd**: wrap decision and requirement saves in transaction to prevent ID races
|
||||
- **gsd**: prefer PREFERENCES.md over settings.json for session bootstrap model (#3517)
|
||||
- **gsd**: add Claude Code official skill directories to skill resolution
|
||||
- **dedup**: hash full question payload, not just IDs
|
||||
- **gsd**: prevent duplicate ask_user_questions dispatches with per-turn dedup cache
|
||||
- **pi-ai**: extend repairToolJson to handle XML tags and truncated numbers
|
||||
- **pi-coding-agent**: cancel stale retries after model switch
|
||||
|
||||
### Changed
|
||||
- untrack .repowise/ and add to .gitignore
|
||||
|
||||
## [2.62.1] - 2026-04-05
|
||||
|
||||
### Fixed
|
||||
- **gsd**: gate steer worktree routing on active session, fix messaging
|
||||
- **gsd**: resolve steer overrides to worktree path when worktree is active
|
||||
|
||||
## [2.62.0] - 2026-04-04
|
||||
|
||||
### Added
|
||||
- **gsd**: enhance /gsd codebase with preferences, --collapse-threshold, and auto-init
|
||||
- **01-05**: fire before_model_select hook, add verbose scoring output, load capability overrides
|
||||
- **01-04**: register before_model_select placeholder handler in GSD hooks
|
||||
- **01-04**: add BeforeModelSelectEvent to extension API and wire emission
|
||||
- **01-03**: wire taskMetadata from selectAndApplyModel to resolveModelForComplexity
|
||||
- **01-03**: insert STEP 2 capability scoring into resolveModelForComplexity
|
||||
- **01-01**: add taskMetadata to ClassificationResult and export extractTaskMetadata
|
||||
- **01-01**: add capability types, data tables, and scoring functions to model-router
|
||||
|
||||
### Fixed
|
||||
- **gsd**: add codebase validation in validatePreferences so preferences are not silently dropped
|
||||
- **test**: update db-path-worktree-symlink test for simplified diagnostic logging
|
||||
- **gsd**: update tests for errors-only audit persistence, fix empty catch blocks
|
||||
- **gsd**: harden audit log persistence — errors-only, sanitized, demote probe warnings
|
||||
- **gsd**: address adversarial review findings on workflow-logger migration
|
||||
- **gsd**: fail-closed stop guard, harden backtrack parsing, fix prompt params
|
||||
- **gsd**: add diagnostic logging to empty catch blocks in auto-mode
|
||||
- **lsp**: add legacy alias for renamed kotlin-language-server key
|
||||
- break infinite notes loop when selecting "None of the above"
|
||||
- align defaultRoutingConfig capability_routing to true
|
||||
- **pi-coding-agent**: upgrade Kotlin LSP to official Kotlin/kotlin-lsp
|
||||
- **test**: use correct RequirementCounts type fields in edge case tests
|
||||
- **remote-questions**: fire configured channels in interactive mode
|
||||
|
||||
### Changed
|
||||
- **gsd**: migrate all catch blocks to centralized workflow-logger
|
||||
- init gsd
|
||||
|
||||
## [2.61.0] - 2026-04-04
|
||||
|
||||
### Added
|
||||
- stop/backtrack capture classifications for milestone regression (#3488)
|
||||
- GSD context optimization with model routing and context masking
|
||||
|
||||
## [2.60.0] - 2026-04-04
|
||||
|
||||
### Added
|
||||
- add /btw skill — ephemeral side questions from conversation context
|
||||
|
||||
### Fixed
|
||||
- **btw**: remove LLM-specific references from skill description
|
||||
|
||||
## [2.59.0] - 2026-04-03
|
||||
|
||||
### Added
|
||||
- **extensions**: add Ollama extension for first-class local LLM support (#3371)
|
||||
- **doctor**: stale commit safety check with gsd snapshot and auto-cleanup
|
||||
- **extensions**: wire up topological sort and unified registry filtering (#3152)
|
||||
- **widget**: add last commit display and dashboard layout improvements (#3226)
|
||||
- **model-routing**: enable dynamic routing by default (#3120)
|
||||
- **vscode**: sidebar redesign, SCM provider, checkpoints, diagnostics [3/3]
|
||||
- **splash**: add remote channel indicator to welcome screen tools row
|
||||
- stream full text and thinking output in headless verbose mode (#2934)
|
||||
- **gsd**: add codebase map — structural orientation for fresh agent contexts
|
||||
|
||||
### Fixed
|
||||
- **worktree**: resolve merge conflict for PR #3322 — adopt comprehensive pre-merge cleanup
|
||||
- **merge**: clean stale MERGE_HEAD before squash merge (#2912)
|
||||
- **state**: always run disk→DB reconciliation when DB is available (#2631)
|
||||
- **git-service**: fix merge-base ancestry check and .gsd/ leakage in snapshot absorption
|
||||
- **extensions**: update provides.hooks in 7 extension manifests to match actual registrations (#3157)
|
||||
- surface nativeCommit errors in reconcileMergeState instead of silently swallowing (#3052)
|
||||
- **parallel**: scope commits to milestone boundaries in parallel mode (#3047)
|
||||
- add windowsHide to all web-mode subprocess spawns (#2628) (#3046)
|
||||
- skip auto-mode pause on empty-content aborted messages (#2695) (#3045)
|
||||
- detect and remove nested .git dirs in worktree cleanup to prevent data loss (#3044)
|
||||
- prevent data loss when git isolation default changes (#2625) (#3043)
|
||||
- **read-tool**: clamp offset to file bounds instead of throwing (#3007) (#3042)
|
||||
- **gsd**: preserve queued milestones with worktrees in ghost detection (#3041)
|
||||
- **compaction**: add chunked fallback when messages exceed model context window (#3038)
|
||||
- preserve interactive terminal across tab switches and project changes (#3055)
|
||||
- call cleanupQuickBranch on turn_end to squash-merge quick branch back (#3054)
|
||||
- align run-uat artifact path to ASSESSMENT, preventing false stuck retries (#3053)
|
||||
- replace invalid Discord invite links with canonical URL (#3056)
|
||||
- add Windows shell guard to remaining spawn sites (#3058)
|
||||
- route `gsd auto` to headless runner to prevent hang on piped stdin/stdout (#3057)
|
||||
- respect .gitignore for .gsd/ in rethink prompt (#3059)
|
||||
- migrate unit ownership from JSON to SQLite to eliminate read-modify-write race (#3061)
|
||||
- **roadmap**: handle numbered, bracketed, and indented prose H3 headers in slice parser (#3063)
|
||||
- add worktree-merge to resolveModelWithFallbacksForUnit switch and update KNOWN_UNIT_TYPES (#3066)
|
||||
- clean up MERGE_HEAD on all error paths in mergeMilestoneToMain (#2912) (#3068)
|
||||
- prevent LLM from confusing background task output with user input (#3069)
|
||||
- add openai-codex provider and modern OpenAI models to MODEL_CAPABILITY_TIER and cost tables (#3070)
|
||||
- preserve active tab when switching projects (#3071)
|
||||
- include project name in desktop notifications (#3072)
|
||||
- recover from many-image dimension overflow by stripping older images (#3075)
|
||||
- resolve bare model IDs to anthropic over claude-code provider (#3076)
|
||||
- **auto**: move selectAndApplyModel before updateProgressWidget (#3079)
|
||||
- detect project relocation and recover state without data loss (#3080)
|
||||
- add free-text input to ask-user-questions when "None of the above" is selected (#3081)
|
||||
- block work execution during /gsd queue mode (#2545) (#3082)
|
||||
- detect worktree basePath in gsdRoot() to prevent escaping to project root (#3083)
|
||||
- invalidate stale quick-task captures across milestone boundaries (#3084)
|
||||
- defer model validation until after extensions register (#3089)
|
||||
- repair YAML bullet lists in malformed tool-call JSON (#3090)
|
||||
- unify SUMMARY.md render paths for projection fidelity (#3091)
|
||||
- chat mode misrepresents terminal output, looks stuck, omits user messages (#3092)
|
||||
- resolve 4 state corruption bugs in milestone/slice completion (#2945) (#3093)
|
||||
- isolate guided-flow session state and key discussion milestone queries (#2985) (#3094)
|
||||
- **guided-flow**: route dispatchWorkflow through dynamic routing pipeline (#3153)
|
||||
- skip external state migration inside git worktrees (#2970) (#3227)
|
||||
- coerce non-numeric strings in DB columns during manifest serialization (#2962) (#3229)
|
||||
- route allDiscussed and zero-slices paths to queued milestone discussion (#3150) (#3230)
|
||||
- use loose equality for null checks in secure_env_collect (#2997) (#3231)
|
||||
- prevent prompt explosion from $' in template replacement values (#2968) (#3232)
|
||||
- resolve OAuth API key in buildMemoryLLMCall via modelRegistry (#2959) (#3233)
|
||||
- **forensics**: read completion status from DB instead of legacy file (#3129) (#3234)
|
||||
- use camelCase parameter names in execute-task and complete-slice prompts (#2933) (#3236)
|
||||
- check bootstrap completeness in init wizard gate, not just .gsd/ existence (#2942) (#3237)
|
||||
- specify write tool for PROJECT.md in milestone/slice prompts (#3238)
|
||||
- widen completing-milestone gate to accept "None required" and similar phrasings (#2931) (#3239)
|
||||
- prevent ask_user_questions from poisoning auto-mode dispatch (#2936) (#3240)
|
||||
- guard null s.currentUnit in runUnitPhase closeout after stopAuto race (#2939) (#3241)
|
||||
- replace `web_search` with `search-the-web` in prompts and agent frontmatter (#2920) (#3245)
|
||||
- preserve milestone title in upsertMilestonePlanning when DB row pre-exists (#2879) (#3247)
|
||||
- invalidate stale milestone validation on roadmap reassessment (#2957) (#3242)
|
||||
- **discuss**: add roadmap fallback when DB is open but empty (#2892) (#3244)
|
||||
- integrate Codex & Gemini CLI into provider routes and rate-limit handling (#2922) (#3246)
|
||||
- **error-classifier**: widen STREAM_RE to cover all 7 V8 JSON parse error variants (#2916) (#3243)
|
||||
- prevent git stash from destroying queued milestone CONTEXT files (#2505) (#3273)
|
||||
- skip staleness rebuild in npm tarball installs (#2877) (#3250)
|
||||
- **parallel**: check worktree DB for milestone completion in merge (#2812) (#3256)
|
||||
- make claude-code provider stateful with full context and sidechain events (#2859) (#3254)
|
||||
- **worktree**: preserve non-empty gsd.db during sync to prevent truncation (#2815) (#3255)
|
||||
- align @gsd/native module type with compiled output (#3253)
|
||||
- parse hook/* completed-unit keys correctly in forensics + doctor (#2826) (#3252)
|
||||
- copy mcp.json into auto-mode worktrees (#2791) (#3251)
|
||||
- add gsd_requirement_save and upsert path for requirement updates (#3249)
|
||||
- handle pause_turn stop reason to prevent 400 errors with native web search (#2869) (#3248)
|
||||
- use authoritative milestone status in web roadmap (#2807) (#3258)
|
||||
- classify long-context entitlement 429 as quota_exhausted, not rate_limit (#2803) (#3257)
|
||||
- **docs**: use ~/.pi/agent/extensions/ for community extension install path (#3131) (#3259)
|
||||
- add disk→DB slice reconciliation in deriveStateFromDb (#2533) (#3262)
|
||||
- run forensics duplicate detection before investigation (#2704) (#3260)
|
||||
- skip TUI render loop on non-TTY stdout to prevent CPU burn (#3095) (#3263)
|
||||
- persist forensics report context across follow-up turns (#2941) (#3261)
|
||||
- invalidate workspace state on turn_end so milestones list stays current (#2706) (#3266)
|
||||
- eliminate 3 recurring doctor audit false positives (#3105) (#3264)
|
||||
- **web**: reconcile auto-mode state with on-disk lock in dashboard (#2705) (#3265)
|
||||
- treat ghost milestones as ineligible for parallel execution (#2501) (#3268)
|
||||
- redirect auto-mode to headless when stdout is piped (#2732) (#3269)
|
||||
- attempt VACUUM recovery when initSchema fails with corrupt freelist (#2519) (#3270)
|
||||
- resolve db_unavailable loop in worktree/symlink layouts (#2517) (#3271)
|
||||
- correct OAuth fallback request shape for google_search (#2963) (#3272)
|
||||
- prevent UAT stuck-loop and orphaned worktree after milestone completion (#3065)
|
||||
- **mcp**: handle server names with spaces in mcp_discover (#3037)
|
||||
- **gsd**: detect markdown body verdicts and guard plan-milestone against completed slices (#2960) (#3035)
|
||||
- **error-classifier**: replace STREAM_RE whack-a-mole with catch-all V8 JSON.parse pattern
|
||||
- type _borderColorKey as 'dim' | 'bashMode' to match ThemeColor
|
||||
- **tui**: comprehensive TUI review — layout, flow, rendering, and state fixes
|
||||
- **gsd**: harden codebase-map — bug fixes, UX polish, and expanded tests
|
||||
|
||||
### Changed
|
||||
- **state**: centralize pipeline logging through workflow logger (#3282)
|
||||
- **gitignore**: exclude src/ build artifacts, scratch files, and .plans/
|
||||
- **complexity**: reclassify planning phases from standard to heavy tier
|
||||
|
||||
## [2.58.0] - 2026-03-28
|
||||
|
||||
### Added
|
||||
|
|
@ -2154,7 +2740,24 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||
### Changed
|
||||
- License updated to MIT
|
||||
|
||||
[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.58.0...HEAD
|
||||
[Unreleased]: https://github.com/gsd-build/gsd-2/compare/v2.71.0...HEAD
|
||||
[2.71.0]: https://github.com/gsd-build/gsd-2/compare/v2.70.1...v2.71.0
|
||||
[2.70.1]: https://github.com/gsd-build/gsd-2/compare/v2.70.0...v2.70.1
|
||||
[2.70.0]: https://github.com/gsd-build/gsd-2/compare/v2.69.0...v2.70.0
|
||||
[2.69.0]: https://github.com/gsd-build/gsd-2/compare/v2.68.1...v2.69.0
|
||||
[2.68.1]: https://github.com/gsd-build/gsd-2/compare/v2.68.0...v2.68.1
|
||||
[2.68.0]: https://github.com/gsd-build/gsd-2/compare/v2.67.0...v2.68.0
|
||||
[2.67.0]: https://github.com/gsd-build/gsd-2/compare/v2.66.1...v2.67.0
|
||||
[2.66.1]: https://github.com/gsd-build/gsd-2/compare/v2.66.0...v2.66.1
|
||||
[2.66.0]: https://github.com/gsd-build/gsd-2/compare/v2.65.0...v2.66.0
|
||||
[2.65.0]: https://github.com/gsd-build/gsd-2/compare/v2.64.0...v2.65.0
|
||||
[2.64.0]: https://github.com/gsd-build/gsd-2/compare/v2.63.0...v2.64.0
|
||||
[2.63.0]: https://github.com/gsd-build/gsd-2/compare/v2.62.1...v2.63.0
|
||||
[2.62.1]: https://github.com/gsd-build/gsd-2/compare/v2.62.0...v2.62.1
|
||||
[2.62.0]: https://github.com/gsd-build/gsd-2/compare/v2.61.0...v2.62.0
|
||||
[2.61.0]: https://github.com/gsd-build/gsd-2/compare/v2.60.0...v2.61.0
|
||||
[2.60.0]: https://github.com/gsd-build/gsd-2/compare/v2.59.0...v2.60.0
|
||||
[2.59.0]: https://github.com/gsd-build/gsd-2/compare/v2.58.0...v2.59.0
|
||||
[2.58.0]: https://github.com/gsd-build/gsd-2/compare/v2.57.0...v2.58.0
|
||||
[2.57.0]: https://github.com/gsd-build/gsd-2/compare/v2.56.0...v2.57.0
|
||||
[2.56.0]: https://github.com/gsd-build/gsd-2/compare/v2.55.0...v2.56.0
|
||||
|
|
|
|||
|
|
@ -146,9 +146,14 @@ The codebase is organized into these areas. All are open to contributions:
|
|||
| AI/LLM layer | `packages/pi-ai` | Provider integrations, model handling |
|
||||
| Agent core | `packages/pi-agent-core` | Agent orchestration — RFC required for changes |
|
||||
| Coding agent | `packages/pi-coding-agent` | The main coding agent |
|
||||
| MCP server | `packages/mcp-server` | Project state tools and MCP protocol |
|
||||
| GSD extension | `src/resources/extensions/gsd/` | GSD workflow — RFC required for auto-mode |
|
||||
| Native bindings | `native/` | Platform-specific native code |
|
||||
| Other extensions | `src/resources/extensions/` | Browser, search, voice, MCP client, etc. |
|
||||
| Native engine | `native/` | Rust N-API modules (grep, git, AST, etc.) |
|
||||
| VS Code extension | `vscode-extension/` | Chat participant, sidebar, RPC integration |
|
||||
| Web interface | `web/` | Browser-based dashboard |
|
||||
| CI/Build | `.github/`, `scripts/` | Workflows, build scripts |
|
||||
| Documentation | `docs/` | User guides, ADRs, SDK docs |
|
||||
|
||||
## Review process
|
||||
|
||||
|
|
|
|||
271
README.md
271
README.md
|
|
@ -7,7 +7,7 @@
|
|||
[](https://www.npmjs.com/package/gsd-pi)
|
||||
[](https://www.npmjs.com/package/gsd-pi)
|
||||
[](https://github.com/gsd-build/GSD-2)
|
||||
[](https://discord.gg/gsd)
|
||||
[](https://discord.com/invite/nKXTsAcmbT)
|
||||
[](LICENSE)
|
||||
[](https://dexscreener.com/solana/dwudwjvan7bzkw9zwlbyv6kspdlvhwzrqy6ebk8xzxkv)
|
||||
|
||||
|
|
@ -21,187 +21,107 @@ One command. Walk away. Come back to a built project with clean git history.
|
|||
|
||||
> GSD now provisions a managed [RTK](https://github.com/rtk-ai/rtk) binary on supported macOS, Linux, and Windows installs to compress shell-command output in `bash`, `async_bash`, `bg_shell`, and verification flows. GSD forces `RTK_TELEMETRY_DISABLED=1` for all managed invocations. Set `GSD_RTK_DISABLED=1` to disable the integration.
|
||||
|
||||
> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues.
|
||||
> **📋 NOTICE: New to Node on Mac?** If you installed Node.js via Homebrew, you may be running a development release instead of LTS. **[Read this guide](./docs/user-docs/node-lts-macos.md)** to pin Node 24 LTS and avoid compatibility issues.
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## What's New in v2.52.0
|
||||
## What's New in v2.71
|
||||
|
||||
### VS Code Extension & Web UI
|
||||
### MCP Secure Env Collect
|
||||
|
||||
- **VS Code integration** — status bar, file decorations, bash terminal, session tree, conversation history, and code lens. (#2651)
|
||||
- **Dark mode contrast** — raised token floor and flattened opacity tier system for better readability. (#2734)
|
||||
- **Auth token gate** — synthetic 401 on missing token, unauthenticated boot state, and recovery screen. (#2740)
|
||||
- **Secure credential collection over MCP** — the new `secure_env_collect` tool uses MCP form elicitation to collect secrets (API keys, tokens) from external clients without exposing values in tool output. Masks input in interactive mode.
|
||||
- **Hardened elicitation schema** — MCP elicitation schema handling is stricter, with proper validation and fallback for providers that don't support forms.
|
||||
|
||||
### Capability Metadata & Model Routing
|
||||
### MCP Reliability
|
||||
|
||||
- **Capability-based model selection** — replaced model-ID pattern matching with capability metadata, making custom provider integration more reliable. (#2548)
|
||||
- **Stream ordering preserved** — MCP tool output now renders in the correct order, fixing interleaved output in Claude Code and other MCP clients.
|
||||
- **isError flag propagation** — workflow tool execution failures now correctly return `isError: true`, so MCP clients can distinguish success from failure.
|
||||
- **Multi-round discuss questions** — new-project discuss phase supports multi-round questioning with structured question gates.
|
||||
|
||||
### Key Changes
|
||||
### TUI Fixes
|
||||
|
||||
- **`--bare` mode** — wired across headless, pi-coding-agent, and resource-loader for minimal-output operation.
|
||||
- **RPC protocol v2** — new types, init handshake with version detection, and runId generation on prompt/steer/follow_up commands.
|
||||
- **PREFERENCES.md rename** — `preferences.md` renamed to `PREFERENCES.md` for consistency. (#2700, #2738)
|
||||
- **Comprehensive SQLite audit** — indexes, caching, safety, and reconciliation fixes across gsd-db.
|
||||
- **Unified error classifier** — three overlapping error classifiers consolidated into a single classify-decide-act pipeline.
|
||||
- **Pinned output restored** — pinned output bar displays above the editor during tool execution again.
|
||||
- **Turn completion cleanup** — pinned latest output is cleared on turn completion, preventing stale output from persisting.
|
||||
- **Secure input masking** — extension input values are masked in interactive mode when collecting secrets.
|
||||
|
||||
### Key Fixes
|
||||
### Reliability & Internals
|
||||
|
||||
- **Auto-mode stops on provider errors** — auto loop now halts after provider errors instead of retrying indefinitely. (#2762, #2764)
|
||||
- **Transaction safety** — state machine guards moved inside transactions in 5 tool handlers (#2752), and `transaction()` made re-entrant.
|
||||
- **Worktree seeding** — `preferences.md` seeded into auto-mode worktrees and included in worktree sync. (#2693)
|
||||
- **Idle watchdog** — interactive tools exempted from stall detection (#2676), and filesystem activity no longer overrides stalled-tool detection. (#2697)
|
||||
- **Milestone guards** — `allSlicesDone` guarded against vacuous truth on empty slice arrays (#2679), and `complete-milestone` dispatch blocked when validation is `needs-remediation`. (#2682)
|
||||
- **Docker overhaul** — fragile setup replaced with proven container patterns. (#2716)
|
||||
- **Windows** — EINVAL prevented by disabling detached process groups on Win32. (#2744)
|
||||
- **Audit log** — `setLogBasePath` wired into engine init to resurrect audit logging. (#2745)
|
||||
- **TOCTOU file locking** — race conditions in event log and custom workflow graph file locking are fixed with proper atomic lock acquisition.
|
||||
- **State derive refactor** — `deriveStateFromDb` god function extracted into composable, testable helpers.
|
||||
- **Windows portability** — hardened cross-platform portability across runtime, tooling, and CI.
|
||||
- **Model routing transparency** — dynamic routing is skipped for interactive dispatches; model changes are always shown in the banner.
|
||||
- **Capability-aware routing (ADR-004)** — full implementation of capability scoring, `before_model_select` hook, and task metadata extraction.
|
||||
- **Multi-model provider strategy (ADR-005)** — infrastructure for multi-provider model selection wired into live paths.
|
||||
|
||||
### v2.51.0 — Skills, RTK, and Verification
|
||||
See the full [Changelog](./CHANGELOG.md) for details on every release.
|
||||
|
||||
- **`/terminal` command** — direct shell execution from the slash command interface. (#2349)
|
||||
- **Managed RTK integration** — RTK binary auto-provisioned with opt-in preference and web UI toggle. (#2620)
|
||||
- **Verification classes** — compliance checked before milestone completion, with classes injected into validation prompts. (#2621, #2623)
|
||||
- **Skills overhaul** — 30+ new skill packs covering major frameworks, databases, and cloud platforms; curated catalog with `~/.agents/skills/` as primary directory.
|
||||
<details>
|
||||
<summary>Previous highlights (v2.70 and earlier)</summary>
|
||||
|
||||
### v2.50.0 — Quality Gates
|
||||
- **Full workflow over MCP (v2.68)** — slice replanning, milestone management, slice completion, task completion, and core planning tools exposed over MCP
|
||||
- **Transport-gated MCP (v2.68)** — workflow tool availability adapts to provider transport capabilities automatically
|
||||
- **Contextual tips system (v2.68)** — TUI and web terminal surface contextual tips based on workflow state
|
||||
- **Ask user questions over MCP (v2.70)** — interactive questions exposed via elicitation for external integrations
|
||||
- **Tiered Context Injection (M005)** — relevance-scoped context with 65%+ token reduction
|
||||
- **Resilient transient error recovery** — defers to Core RetryHandler and fixes cmdCtx race conditions
|
||||
- **Anthropic subscription routing** — auto-routed through Claude Code CLI provider with proper display names
|
||||
- **5-wave state machine hardening** — critical data integrity fixes across atomic writes, event log reconciliation, session recovery
|
||||
- **Discussion gate enforcement** — mechanical enforcement with fail-closed behavior
|
||||
- **Slice-level parallelism** — dependency-aware parallel dispatch within a milestone
|
||||
- **Persistent notification panel** — TUI overlay, widget, and web API for real-time notifications
|
||||
- **MCP server** — 6 read-only project state tools for external integrations, auto-wrapup guard, and question dedup
|
||||
- **Ollama extension** — first-class local LLM support via Ollama, with dynamic routing enabled by default
|
||||
- **Discord bot & daemon** — dedicated daemon package, Discord bot, and headless text mode with tool calls
|
||||
- **Capability-aware model routing (ADR-004)** — capability scoring, `before_model_select` hook, and task metadata extraction
|
||||
- **VS Code sidebar redesign** — SCM provider, checkpoints, diagnostics panel, activity feed, workflow controls, session forking
|
||||
- **`/gsd parallel watch`** — native TUI overlay for real-time worker monitoring
|
||||
- **Codebase map** — automatic codebase map injection for fresh agent contexts
|
||||
- **`--resume` flag** — resume previous sessions from the CLI
|
||||
- **Concurrent invocation guard** — prevents overlapping auto-mode runs
|
||||
- **VS Code integration** — status bar, file decorations, bash terminal, session tree, conversation history, and code lens
|
||||
- **Skills overhaul** — 30+ skill packs covering major frameworks, databases, and cloud platforms
|
||||
- **Single-writer state engine** — disciplined state transitions with machine guards and TOCTOU hardening
|
||||
- **DB-backed planning tools** — atomic SQLite tool calls for state transitions
|
||||
- **Declarative workflow engine** — YAML workflows through auto-loop
|
||||
- **Doctor: worktree lifecycle checks** — validates worktree health, detects orphans, consolidates cleanup
|
||||
|
||||
- **Quality gates** — 8-question quality gates added to planning and completion templates, with parallel evaluation via `evaluating-gates` phase.
|
||||
- **Structured error propagation** — errors wired through `UnitResult` for better diagnostics.
|
||||
|
||||
### v2.49.0 — Git Trailers & Yolo Mode
|
||||
|
||||
- **`--yolo` flag** — `/gsd auto --yolo` for non-interactive project init.
|
||||
- **Git trailers** — GSD metadata moved from commit subject scopes to git trailers.
|
||||
|
||||
### v2.48.0 — Forensics & Discussion
|
||||
|
||||
- **`/gsd discuss` for queued milestones** — target milestones still in the queue. (#2349)
|
||||
- **Enhanced forensics** — journal and activity log awareness added to `/gsd forensics`.
|
||||
|
||||
### v2.47.0 — External Providers
|
||||
|
||||
- **External tool execution mode** — `externalToolExecution` mode for external providers in agent-core.
|
||||
- **Claude Code CLI provider** — new provider extension for Claude Code CLI. (#2382)
|
||||
|
||||
### Previous highlights (v2.42–v2.46)
|
||||
|
||||
- **Single-writer state engine** — disciplined state transitions with machine guards, actor identity, reversibility, and TOCTOU hardening. (#2494)
|
||||
- **`/gsd rethink`** — conversational project reorganization. (#2459)
|
||||
- **`/gsd mcp`** — MCP server status and connectivity. (#2362)
|
||||
- **Complete offline mode** — fully offline with local models. (#2429)
|
||||
- **Global KNOWLEDGE.md injection** — cross-project knowledge via `~/.gsd/agent/KNOWLEDGE.md`. (#2331)
|
||||
- **Mobile-responsive web UI** — browser interface works on phones and tablets. (#2354)
|
||||
- **Default isolation mode changed to `none`** — set `git.isolation: worktree` explicitly if needed. (#2481)
|
||||
- **Non-API-key provider extensions** — support for Claude Code CLI and similar providers. (#2382)
|
||||
- **Docker sandbox template** — official Docker template for isolated auto mode. (#2360)
|
||||
- **DB-backed planning tools** — write-side state transitions use atomic SQLite tool calls. (#2141)
|
||||
- **Declarative workflow engine** — YAML workflows through auto-loop. (#2024)
|
||||
- **`/gsd fast`** — toggle service tier for prioritized API routing. (#1862)
|
||||
|
||||
---
|
||||
|
||||
## What's New in v2.41.0
|
||||
|
||||
### New Features
|
||||
|
||||
- **Browser-based web interface** — run GSD from the browser with `gsd --web`. Full project management, real-time progress, and multi-project support via server-sent events. (#1717)
|
||||
- **Doctor: worktree lifecycle checks** — `/gsd doctor` now validates worktree health, detects orphaned worktrees, consolidates cleanup, and enhances `/worktree list` with lifecycle status. (#1814)
|
||||
- **CI: docs-only PR detection** — PRs that only change documentation skip build and test steps, with a new prompt injection scan for security. (#1699)
|
||||
- **Custom Models guide** — new documentation for adding custom providers (Ollama, vLLM, LM Studio, proxies) via `models.json`. (#1670)
|
||||
|
||||
### Data Loss Prevention (Critical Fixes)
|
||||
|
||||
This release includes 7 fixes preventing silent data loss in auto-mode:
|
||||
|
||||
- **Hallucination guard** — execute-task agents that complete with zero tool calls are now rejected as hallucinated. Previously, agents could produce detailed but fabricated summaries without writing any code, wasting ~$25/milestone. (#1838)
|
||||
- **Merge anchor verification** — before deleting a milestone worktree/branch, GSD now verifies the code is actually on the integration branch. Prevents orphaning commits when squash-merge produces an empty diff. (#1829)
|
||||
- **Dirty working tree detection** — `nativeMergeSquash` now distinguishes dirty-tree rejections from content conflicts, preventing silent commit loss when synced `.gsd/` files block the merge. (#1752)
|
||||
- **Doctor cleanup safety** — the `orphaned_completed_units` check no longer auto-fixes during post-task health checks. Previously, timing races could cause the doctor to remove valid completion keys, reverting users to earlier tasks. (#1825)
|
||||
- **Root file reverse-sync** — worktree teardown now syncs root-level `.gsd/` files (PROJECT.md, REQUIREMENTS.md, completed-units.json) back to the project root. Previously these were lost on milestone closeout. (#1831)
|
||||
- **Empty merge guard** — milestone branches with unanchored code changes are preserved instead of deleted when squash-merge produces nothing to commit. (#1755)
|
||||
- **Crash-safe task closeout** — orphaned checkboxes in PLAN.md are unchecked on retry, preventing phantom task completion. (#1759)
|
||||
|
||||
### Auto-Mode Stability
|
||||
|
||||
- **Terminal hang fix** — `stopAuto()` now resolves pending promises, preventing the terminal from freezing permanently after stopping auto-mode. (#1818)
|
||||
- **Signal handler coverage** — SIGHUP and SIGINT now clean up lock files, not just SIGTERM. Prevents stranded locks on VS-Code crash. (#1821)
|
||||
- **Needs-discussion routing** — milestones in `needs-discussion` phase now route to the smart entry UI instead of hard-stopping, breaking the infinite loop. (#1820)
|
||||
- **Infrastructure error handling** — auto-mode stops immediately on ENOSPC, ENOMEM, and similar unrecoverable errors instead of retrying. (#1780)
|
||||
- **Dependency-aware dispatch** — slice dispatch now uses declared `depends_on` instead of positional ordering. (#1770)
|
||||
- **Queue mode depth verification** — the write gate now processes depth verification in queue mode, fixing a deadlock where CONTEXT.md writes were permanently blocked. (#1823)
|
||||
|
||||
### Roadmap Parser Improvements
|
||||
|
||||
- **Table format support** — roadmaps using markdown tables (`| S01 | Title | Risk | Status |`) are now parsed correctly. (#1741)
|
||||
- **Prose header fallback** — when `## Slices` contains H3 headers instead of checkboxes, the prose parser is invoked as a fallback. (#1744)
|
||||
- **Completion marker detection** — prose headers with `✓` or `(Complete)` markers are correctly identified as done. (#1816)
|
||||
- **Zero-slice stub handling** — stub roadmaps from `/gsd queue` return `pre-planning` instead of `blocked`. (#1826)
|
||||
- **Immediate roadmap fix** — roadmap checkbox and UAT stub are fixed immediately after last task instead of deferring to `complete-slice`. (#1819)
|
||||
|
||||
### State & Git Improvements
|
||||
|
||||
- **CONTEXT-DRAFT.md fallback** — `depends_on` is read from CONTEXT-DRAFT.md when CONTEXT.md doesn't exist, preventing draft milestones from being promoted past dependency constraints. (#1743)
|
||||
- **Unborn branch support** — `nativeBranchExists` handles repos with zero commits, preventing dispatch deadlock on new repos. (#1815)
|
||||
- **Ghost milestone detection** — empty `.gsd/milestones/` directories are skipped instead of crashing `deriveState()`. (#1817)
|
||||
- **Default branch detection** — milestone merge detects `master` vs `main` instead of hardcoding. (#1669)
|
||||
- **Milestone title extraction** — titles are pulled from CONTEXT.md headings when no ROADMAP exists. (#1729)
|
||||
|
||||
### Windows & Platform
|
||||
|
||||
- **Windows path handling** — 8.3 short paths, `pathToFileURL` for ESM imports, and `realpathSync.native` fixes across the test suite and verification gate. (#1804)
|
||||
- **DEP0190 fix** — `spawnSync` deprecation warning eliminated by passing commands to shell explicitly. (#1827)
|
||||
- **Web build skip on Windows** — Next.js webpack EPERM errors on system directories are handled gracefully.
|
||||
|
||||
### Developer Experience
|
||||
|
||||
- **@ file finder fix** — typing `@` no longer freezes the TUI. The fix adds debounce, dedup, and empty-query short-circuit. (#1832)
|
||||
- **Tool-call loop guard** — detects and breaks infinite tool-call loops within a single unit, preventing stack overflow. (#1801)
|
||||
- **Completion deferral fix** — roadmap checkbox and UAT stub are fixed at task level, closing the fragile handoff window between last task and `complete-slice`. (#1819)
|
||||
|
||||
See the full [Changelog](./CHANGELOG.md) for all 70+ fixes in this release.
|
||||
|
||||
### Previous highlights (v2.39–v2.41)
|
||||
|
||||
- **Browser-based web interface** — run GSD from the browser with `gsd --web`
|
||||
- **GitHub sync extension** — auto-sync milestones to GitHub Issues, PRs, and Milestones
|
||||
- **Skill tool resolution** — skills auto-activate in dispatched prompts
|
||||
- **Health check phase 2** — real-time doctor issues in dashboard and visualizer
|
||||
- **Forensics upgrade** — full-access GSD debugger with anomaly detection
|
||||
- **7 data-loss prevention fixes** — hallucination guard, merge anchor verification, dirty tree detection, and more
|
||||
- **Pipeline decomposition** — auto-loop rewritten as linear phase pipeline
|
||||
- **Sliding-window stuck detection** — pattern-aware, fewer false positives
|
||||
- **Data-loss recovery** — automatic detection and recovery from v2.30–v2.38 migration issues
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
Full documentation is available at **[gsd.build](https://gsd.build)** (powered by Mintlify) and in the [`docs/`](./docs/) directory:
|
||||
Full documentation is in the [`docs/`](./docs/) directory:
|
||||
|
||||
- **[Getting Started](./docs/getting-started.md)** — install, first run, basic usage
|
||||
- **[Auto Mode](./docs/auto-mode.md)** — autonomous execution deep-dive
|
||||
- **[Configuration](./docs/configuration.md)** — all preferences, models, git, and hooks
|
||||
- **[Custom Models](./docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies)
|
||||
- **[Token Optimization](./docs/token-optimization.md)** — profiles, context compression, complexity routing
|
||||
- **[Cost Management](./docs/cost-management.md)** — budgets, tracking, projections
|
||||
- **[Git Strategy](./docs/git-strategy.md)** — worktree isolation, branching, merge behavior
|
||||
- **[Parallel Orchestration](./docs/parallel-orchestration.md)** — run multiple milestones simultaneously
|
||||
- **[Working in Teams](./docs/working-in-teams.md)** — unique IDs, shared artifacts
|
||||
- **[Skills](./docs/skills.md)** — bundled skills, discovery, custom authoring
|
||||
- **[Commands Reference](./docs/commands.md)** — all commands and keyboard shortcuts
|
||||
- **[Architecture](./docs/architecture.md)** — system design and dispatch pipeline
|
||||
- **[Troubleshooting](./docs/troubleshooting.md)** — common issues, doctor, forensics, recovery
|
||||
- **[CI/CD Pipeline](./docs/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod)
|
||||
- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration
|
||||
- **[Visualizer](./docs/visualizer.md)** — workflow visualizer with stats and discussion status
|
||||
- **[Remote Questions](./docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed
|
||||
- **[Dynamic Model Routing](./docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure
|
||||
- **[Web Interface](./docs/web-interface.md)** — browser-based project management and real-time progress
|
||||
- **[Pipeline Simplification (ADR-003)](./docs/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion
|
||||
### User Guides
|
||||
|
||||
- **[Getting Started](./docs/user-docs/getting-started.md)** — install, first run, basic usage
|
||||
- **[Auto Mode](./docs/user-docs/auto-mode.md)** — autonomous execution deep-dive
|
||||
- **[Configuration](./docs/user-docs/configuration.md)** — all preferences, models, git, and hooks
|
||||
- **[Custom Models](./docs/user-docs/custom-models.md)** — add custom providers (Ollama, vLLM, LM Studio, proxies)
|
||||
- **[Token Optimization](./docs/user-docs/token-optimization.md)** — profiles, context compression, complexity routing
|
||||
- **[Cost Management](./docs/user-docs/cost-management.md)** — budgets, tracking, projections
|
||||
- **[Git Strategy](./docs/user-docs/git-strategy.md)** — worktree isolation, branching, merge behavior
|
||||
- **[Parallel Orchestration](./docs/user-docs/parallel-orchestration.md)** — run multiple milestones simultaneously
|
||||
- **[Working in Teams](./docs/user-docs/working-in-teams.md)** — unique IDs, shared artifacts
|
||||
- **[Skills](./docs/user-docs/skills.md)** — bundled skills, discovery, custom authoring
|
||||
- **[Commands Reference](./docs/user-docs/commands.md)** — all commands and keyboard shortcuts
|
||||
- **[Troubleshooting](./docs/user-docs/troubleshooting.md)** — common issues, doctor, forensics, recovery
|
||||
- **[Visualizer](./docs/user-docs/visualizer.md)** — workflow visualizer with stats and discussion status
|
||||
- **[Remote Questions](./docs/user-docs/remote-questions.md)** — route decisions to Slack or Discord when human input is needed
|
||||
- **[Dynamic Model Routing](./docs/user-docs/dynamic-model-routing.md)** — complexity-based model selection and budget pressure
|
||||
- **[Web Interface](./docs/user-docs/web-interface.md)** — browser-based project management and real-time progress
|
||||
- **[Migration from v1](./docs/user-docs/migration.md)** — `.planning` → `.gsd` migration
|
||||
- **[Docker Sandbox](./docker/README.md)** — run GSD auto mode in an isolated Docker container
|
||||
- **[Migration from v1](./docs/migration.md)** — `.planning` → `.gsd` migration
|
||||
|
||||
### Developer Docs
|
||||
|
||||
- **[Architecture](./docs/dev/architecture.md)** — system design and dispatch pipeline
|
||||
- **[CI/CD Pipeline](./docs/dev/ci-cd-pipeline.md)** — three-stage promotion pipeline (Dev → Test → Prod)
|
||||
- **[Pipeline Simplification (ADR-003)](./docs/dev/ADR-003-pipeline-simplification.md)** — merged research into planning, mechanical completion
|
||||
- **[VS Code Extension](./vscode-extension/README.md)** — chat participant, sidebar dashboard, RPC integration
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -417,7 +337,7 @@ gsd headless query
|
|||
gsd headless dispatch plan
|
||||
```
|
||||
|
||||
Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed.
|
||||
Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/user-docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed.
|
||||
|
||||
**Multi-session orchestration** — headless mode supports file-based IPC in `.gsd/parallel/` for coordinating multiple GSD workers across milestones. Build orchestrators that spawn, monitor, and budget-cap a fleet of GSD workers.
|
||||
|
||||
|
|
@ -590,9 +510,8 @@ auto_report: true
|
|||
| `verification_commands`| Array of shell commands to run after task execution (e.g., `["npm run lint", "npm run test"]`) |
|
||||
| `verification_auto_fix`| Auto-retry on verification failures (default: true) |
|
||||
| `verification_max_retries` | Max retries for verification failures (default: 2) |
|
||||
| `require_slice_discussion` | Pause auto-mode before each slice for human discussion review |
|
||||
| `phases.require_slice_discussion` | Pause auto-mode before each slice for human discussion review |
|
||||
| `auto_report` | Auto-generate HTML reports after milestone completion (default: true) |
|
||||
| `searchExcludeDirs` | Directories to exclude from `@` file autocomplete (e.g., `["node_modules", ".git", "dist"]`) |
|
||||
|
||||
### Agent Instructions
|
||||
|
||||
|
|
@ -622,11 +541,11 @@ token_profile: budget # or balanced (default), quality
|
|||
|
||||
**Budget pressure** graduates model downgrading as you approach your budget ceiling — 50%, 75%, and 90% thresholds progressively shift work to cheaper tiers.
|
||||
|
||||
See the full [Token Optimization Guide](./docs/token-optimization.md) for details.
|
||||
See the full [Token Optimization Guide](./docs/user-docs/token-optimization.md) for details.
|
||||
|
||||
### Bundled Tools
|
||||
|
||||
GSD ships with 19 extensions, all loaded automatically:
|
||||
GSD ships with 24 extensions, all loaded automatically:
|
||||
|
||||
| Extension | What it provides |
|
||||
| ---------------------- | ---------------------------------------------------------------------------------------------------------------------- |
|
||||
|
|
@ -648,17 +567,24 @@ GSD ships with 19 extensions, all loaded automatically:
|
|||
| **Remote Questions** | Route decisions to Slack/Discord when human input is needed in headless/CI mode |
|
||||
| **Universal Config** | Discover and import MCP servers and rules from other AI coding tools |
|
||||
| **AWS Auth** | Automatic Bedrock credential refresh for AWS-hosted models |
|
||||
| **TTSR** | Tool-use type-safe runtime validation |
|
||||
| **Ollama** | First-class local LLM support via Ollama |
|
||||
| **Claude Code CLI** | External provider extension for Claude Code CLI |
|
||||
| **cmux** | Claude multiplexer integration — desktop notifications, sidebar metadata, visual subagent splits |
|
||||
| **GitHub Sync** | Auto-sync milestones to GitHub Issues, PRs, and Milestones |
|
||||
| **LSP** | Language Server Protocol — diagnostics, definitions, references, hover, rename |
|
||||
| **TTSR** | Tool-triggered system rules — conditional context injection based on tool usage |
|
||||
|
||||
### Bundled Agents
|
||||
|
||||
Three specialized subagents for delegated work:
|
||||
Five specialized subagents for delegated work:
|
||||
|
||||
| Agent | Role |
|
||||
| -------------- | ------------------------------------------------------------ |
|
||||
| **Scout** | Fast codebase recon — returns compressed context for handoff |
|
||||
| **Researcher** | Web research — finds and synthesizes current information |
|
||||
| **Worker** | General-purpose execution in an isolated context window |
|
||||
| Agent | Role |
|
||||
| ------------------- | ------------------------------------------------------------ |
|
||||
| **Scout** | Fast codebase recon — returns compressed context for handoff |
|
||||
| **Researcher** | Web research — finds and synthesizes current information |
|
||||
| **Worker** | General-purpose execution in an isolated context window |
|
||||
| **JavaScript Pro** | JavaScript-specialized execution and debugging |
|
||||
| **TypeScript Pro** | TypeScript-specialized execution and debugging |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -733,9 +659,8 @@ gsd (CLI binary)
|
|||
├─ resource-loader.ts Syncs bundled extensions + agents to ~/.gsd/agent/
|
||||
└─ src/resources/
|
||||
├─ extensions/gsd/ Core GSD extension (auto, state, commands, ...)
|
||||
├─ extensions/... 18 supporting extensions
|
||||
├─ agents/ scout, researcher, worker
|
||||
├─ AGENTS.md Agent routing instructions
|
||||
├─ extensions/... 21 supporting extensions
|
||||
├─ agents/ scout, researcher, worker, javascript-pro, typescript-pro
|
||||
└─ GSD-WORKFLOW.md Manual bootstrap protocol
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -4,51 +4,67 @@ Welcome to the GSD documentation. This covers everything from getting started to
|
|||
|
||||
## User Documentation
|
||||
|
||||
Guides for installing, configuring, and using GSD day-to-day. Located in [`user-docs/`](./user-docs/).
|
||||
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [Getting Started](./getting-started.md) | Installation, first run, and basic usage |
|
||||
| [Auto Mode](./auto-mode.md) | How autonomous execution works — the state machine, crash recovery, and steering |
|
||||
| [Commands Reference](./commands.md) | All commands, keyboard shortcuts, and CLI flags |
|
||||
| [Remote Questions](./remote-questions.md) | Discord and Slack integration for headless auto-mode |
|
||||
| [Configuration](./configuration.md) | Preferences, model selection, git settings, and token profiles |
|
||||
| [Custom Models](./custom-models.md) | Add custom providers (Ollama, vLLM, LM Studio, proxies) via models.json |
|
||||
| [Token Optimization](./token-optimization.md) | Token profiles, context compression, complexity routing, and adaptive learning (v2.17) |
|
||||
| [Dynamic Model Routing](./dynamic-model-routing.md) | Complexity-based model selection, cost tables, escalation, and budget pressure (v2.19) |
|
||||
| [Captures & Triage](./captures-triage.md) | Fire-and-forget thought capture during auto-mode with automated triage (v2.19) |
|
||||
| [Workflow Visualizer](./visualizer.md) | Interactive TUI overlay for progress, dependencies, metrics, and timeline (v2.19) |
|
||||
| [Cost Management](./cost-management.md) | Budget ceilings, cost tracking, projections, and enforcement modes |
|
||||
| [Git Strategy](./git-strategy.md) | Worktree isolation, branching model, and merge behavior |
|
||||
| [Parallel Orchestration](./parallel-orchestration.md) | Run multiple milestones simultaneously with worker isolation and coordination |
|
||||
| [Working in Teams](./working-in-teams.md) | Unique milestone IDs, `.gitignore` setup, and shared planning artifacts |
|
||||
| [Skills](./skills.md) | Bundled skills, skill discovery, and custom skill authoring |
|
||||
| [Migration from v1](./migration.md) | Migrating `.planning` directories from the original GSD |
|
||||
| [Troubleshooting](./troubleshooting.md) | Common issues, `/gsd doctor` (real-time visibility v2.40), `/gsd forensics` (full debugger v2.40), and recovery procedures |
|
||||
| [Web Interface](./web-interface.md) | Browser-based project management with `gsd --web` (v2.41) |
|
||||
| [Getting Started](./user-docs/getting-started.md) | Installation, first run, and basic usage |
|
||||
| [Auto Mode](./user-docs/auto-mode.md) | How autonomous execution works — the state machine, crash recovery, and steering |
|
||||
| [Commands Reference](./user-docs/commands.md) | All commands, keyboard shortcuts, and CLI flags |
|
||||
| [Remote Questions](./user-docs/remote-questions.md) | Discord and Slack integration for headless auto-mode |
|
||||
| [Configuration](./user-docs/configuration.md) | Preferences, model selection, git settings, and token profiles |
|
||||
| [Provider Setup](./user-docs/providers.md) | Step-by-step setup for OpenRouter, Ollama, LM Studio, vLLM, and all supported providers |
|
||||
| [Custom Models](./user-docs/custom-models.md) | Advanced model configuration — models.json schema, compat flags, overrides |
|
||||
| [Token Optimization](./user-docs/token-optimization.md) | Token profiles, context compression, complexity routing, and adaptive learning (v2.17) |
|
||||
| [Dynamic Model Routing](./user-docs/dynamic-model-routing.md) | Complexity-based model selection, cost tables, escalation, and budget pressure (v2.19) |
|
||||
| [Captures & Triage](./user-docs/captures-triage.md) | Fire-and-forget thought capture during auto-mode with automated triage (v2.19) |
|
||||
| [Workflow Visualizer](./user-docs/visualizer.md) | Interactive TUI overlay for progress, dependencies, metrics, and timeline (v2.19) |
|
||||
| [Cost Management](./user-docs/cost-management.md) | Budget ceilings, cost tracking, projections, and enforcement modes |
|
||||
| [Git Strategy](./user-docs/git-strategy.md) | Worktree isolation, branching model, and merge behavior |
|
||||
| [Parallel Orchestration](./user-docs/parallel-orchestration.md) | Run multiple milestones simultaneously with worker isolation and coordination |
|
||||
| [Working in Teams](./user-docs/working-in-teams.md) | Unique milestone IDs, `.gitignore` setup, and shared planning artifacts |
|
||||
| [Skills](./user-docs/skills.md) | Bundled skills, skill discovery, and custom skill authoring |
|
||||
| [Migration from v1](./user-docs/migration.md) | Migrating `.planning` directories from the original GSD |
|
||||
| [Troubleshooting](./user-docs/troubleshooting.md) | Common issues, `/gsd doctor` (real-time visibility v2.40), `/gsd forensics` (full debugger v2.40), and recovery procedures |
|
||||
| [Web Interface](./user-docs/web-interface.md) | Browser-based project management with `gsd --web` (v2.41) |
|
||||
| [VS Code Extension](../vscode-extension/README.md) | Chat participant, sidebar dashboard, and RPC integration for VS Code |
|
||||
|
||||
## Architecture & Internals
|
||||
|
||||
Design documents, ADRs, and internal references. Located in [`dev/`](./dev/).
|
||||
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [Architecture Overview](./architecture.md) | System design, extension model, state-on-disk, and dispatch pipeline |
|
||||
| [Architecture Overview](./dev/architecture.md) | System design, extension model, state-on-disk, and dispatch pipeline |
|
||||
| [Native Engine](../native/README.md) | Rust N-API modules for performance-critical operations |
|
||||
| [ADR-001: Branchless Worktree Architecture](./ADR-001-branchless-worktree-architecture.md) | Decision record for the v2.14 git architecture |
|
||||
| [ADR-003: Pipeline Simplification](./ADR-003-pipeline-simplification.md) | Research merged into planning, mechanical completion (v2.30) |
|
||||
| [ADR-004: Capability-Aware Model Routing](./ADR-004-capability-aware-model-routing.md) | Extend routing from tier/cost selection to task-capability matching |
|
||||
| [ADR-001: Branchless Worktree Architecture](./dev/ADR-001-branchless-worktree-architecture.md) | Decision record for the v2.14 git architecture |
|
||||
| [ADR-003: Pipeline Simplification](./dev/ADR-003-pipeline-simplification.md) | Research merged into planning, mechanical completion (v2.30) |
|
||||
| [ADR-004: Capability-Aware Model Routing](./dev/ADR-004-capability-aware-model-routing.md) | Extend routing from tier/cost selection to task-capability matching |
|
||||
| [ADR-007: Model Catalog Split](./dev/ADR-007-model-catalog-split.md) | Separate model metadata from routing logic for extensibility |
|
||||
| [ADR-008: GSD Tools over MCP](./dev/ADR-008-gsd-tools-over-mcp-for-provider-parity.md) | Native tools over MCP for provider parity |
|
||||
| [ADR-008: Implementation Plan](./dev/ADR-008-IMPLEMENTATION-PLAN.md) | Implementation plan for ADR-008 |
|
||||
| [Context Optimization Opportunities](./dev/pi-context-optimization-opportunities.md) | Analysis of context window usage and optimization strategies |
|
||||
| [File System Map](./dev/FILE-SYSTEM-MAP.md) | Complete file system reference |
|
||||
| [CI/CD Pipeline](./dev/ci-cd-pipeline.md) | Continuous integration and deployment pipeline |
|
||||
| [Frontier Techniques](./dev/FRONTIER-TECHNIQUES.md) | Advanced techniques and research |
|
||||
| [PRD: Branchless Worktree](./dev/PRD-branchless-worktree-architecture.md) | Product requirements for branchless worktree architecture |
|
||||
| [Agent Knowledge Index](./dev/agent-knowledge-index.md) | Index of agent knowledge resources |
|
||||
|
||||
## Pi SDK Documentation
|
||||
|
||||
These guides cover the underlying Pi SDK that GSD is built on. Useful if you want to extend GSD or build your own agent application.
|
||||
Guides for the underlying Pi SDK that GSD is built on. Located in [`dev/`](./dev/).
|
||||
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [What is Pi](./what-is-pi/README.md) | Core concepts — modes, agent loop, sessions, tools, providers |
|
||||
| [Extending Pi](./extending-pi/README.md) | Building extensions — tools, commands, UI, events, state |
|
||||
| [Context & Hooks](./context-and-hooks/README.md) | Context pipeline, hook reference, inter-extension communication |
|
||||
| [Pi UI / TUI](./pi-ui-tui/README.md) | Terminal UI components, theming, keyboard input, rendering |
|
||||
| [What is Pi](./dev/what-is-pi/README.md) | Core concepts — modes, agent loop, sessions, tools, providers |
|
||||
| [Extending Pi](./dev/extending-pi/README.md) | Building extensions — tools, commands, UI, events, state |
|
||||
| [Context & Hooks](./dev/context-and-hooks/README.md) | Context pipeline, hook reference, inter-extension communication |
|
||||
| [Pi UI / TUI](./dev/pi-ui-tui/README.md) | Terminal UI components, theming, keyboard input, rendering |
|
||||
|
||||
## Research
|
||||
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [Building Coding Agents](./building-coding-agents/README.md) | Research notes on agent design — decomposition, context engineering, cost/quality tradeoffs |
|
||||
| [Building Coding Agents](./dev/building-coding-agents/README.md) | Research notes on agent design — decomposition, context engineering, cost/quality tradeoffs |
|
||||
| [Proposals](./dev/proposals/) | Feature proposals and workflow definitions |
|
||||
| [Superpowers](./dev/superpowers/) | Plans and specs for superpower features |
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# ADR-004: Capability-Aware Model Routing
|
||||
|
||||
**Status:** Proposed (Revised)
|
||||
**Status:** Implemented (Phase 2)
|
||||
**Date:** 2026-03-26
|
||||
**Revised:** 2026-03-26
|
||||
**Revised:** 2026-04-03
|
||||
**Deciders:** Jeremy McSpadden
|
||||
**Related:** ADR-003 (pipeline simplification), [Issue #2655](https://github.com/gsd-build/gsd-2/issues/2655), `docs/dynamic-model-routing.md`
|
||||
|
||||
67
docs/dev/ADR-005-multi-model-provider-tool-strategy.md
Normal file
67
docs/dev/ADR-005-multi-model-provider-tool-strategy.md
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
# ADR-005: Multi-Model, Multi-Provider, and Tool Strategy
|
||||
|
||||
**Status:** Accepted
|
||||
**Date:** 2026-03-27
|
||||
**Deciders:** Jeremy McSpadden
|
||||
**Related:** ADR-004 (capability-aware model routing), ADR-003 (pipeline simplification), [Issue #2790](https://github.com/gsd-build/gsd-2/issues/2790)
|
||||
|
||||
## Context
|
||||
|
||||
PR #2755 lands capability-aware model routing (ADR-004), extending the router from a one-dimensional complexity-tier system to a two-dimensional system that scores models across 7 capability dimensions. GSD can now intelligently pick the best model for a task from a heterogeneous pool.
|
||||
|
||||
But model selection is only one piece of the multi-model puzzle. The system faces structural gaps as users configure diverse provider pools:
|
||||
|
||||
1. **Tool compatibility is assumed, not verified** — Every registered tool is sent to every model regardless of provider capabilities.
|
||||
2. **No tool-aware model routing** — ADR-004 scores 7 capability dimensions but none encode whether a model can actually use the tools a task requires.
|
||||
3. **Provider failover loses context fidelity** — Cross-provider switches silently degrade conversation quality (thinking blocks dropped, tool IDs remapped).
|
||||
4. **Tool availability is static across a session** — The same tools are presented regardless of the selected model's capabilities.
|
||||
5. **No provider capability registry** — Provider quirks are scattered across `*-shared.ts` files.
|
||||
|
||||
## Decision
|
||||
|
||||
Introduce a provider capability registry and tool compatibility layer that integrates with ADR-004's capability-aware model router.
|
||||
|
||||
### Design Principles
|
||||
|
||||
1. **Layered on ADR-004, not replacing it.** Capability scoring remains primary. This adds tool compatibility as a hard constraint.
|
||||
2. **Hard constraints filter; soft scores rank.** Tool support is binary — it filters the eligible set before scoring.
|
||||
3. **Provider knowledge is declarative, not scattered.** Provider capabilities move to an explicit registry.
|
||||
4. **Tool sets adapt to model capabilities.** Active tool set adjusts when the router selects a different model.
|
||||
5. **Graceful degradation preserved.** Unknown providers get full tool access — same as today.
|
||||
|
||||
### Implementation Phases
|
||||
|
||||
1. **Phase 1:** Provider Capabilities Registry (`packages/pi-ai/src/providers/provider-capabilities.ts`)
|
||||
2. **Phase 2:** Tool Compatibility Metadata (extend `ToolDefinition` with `compatibility` field)
|
||||
3. **Phase 3:** Tool-compatibility filter in routing pipeline + `ProviderSwitchReport` in `transform-messages.ts`
|
||||
4. **Phase 4:** `adjustToolSet` extension hook
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
- Eliminates silent tool failures when routing to incompatible providers
|
||||
- Makes cross-provider routing safe by default
|
||||
- Provider knowledge becomes queryable (registry vs scattered code)
|
||||
- Cross-provider context loss becomes visible via `ProviderSwitchReport`
|
||||
|
||||
### Negative
|
||||
- More metadata to maintain (provider capabilities, tool compatibility)
|
||||
- Tool filtering adds a pipeline step (sub-millisecond, O(models × tools))
|
||||
- Risk of over-filtering (mitigated: opt-in per tool, permissive defaults)
|
||||
|
||||
### Neutral
|
||||
- Existing behavior unchanged without metadata
|
||||
- ADR-004 scoring is unmodified
|
||||
- Provider implementations simplify over time as registry replaces scattered workarounds
|
||||
|
||||
## Appendix: Architecture Reference
|
||||
|
||||
| File | Role |
|
||||
|------|------|
|
||||
| `packages/pi-ai/src/providers/register-builtins.ts` | Provider registration |
|
||||
| `packages/pi-ai/src/providers/*-shared.ts` | Provider-specific handling |
|
||||
| `packages/pi-ai/src/providers/transform-messages.ts` | Cross-provider normalization |
|
||||
| `packages/pi-ai/src/types.ts` | Core types |
|
||||
| `packages/pi-coding-agent/src/core/extensions/types.ts` | ToolDefinition, ExtensionAPI |
|
||||
| `src/resources/extensions/gsd/model-router.ts` | Capability scoring (ADR-004) |
|
||||
| `src/resources/extensions/gsd/auto-model-selection.ts` | Model selection orchestration |
|
||||
285
docs/dev/ADR-007-model-catalog-split.md
Normal file
285
docs/dev/ADR-007-model-catalog-split.md
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
# ADR-007: Model Catalog Split and Provider API Encapsulation
|
||||
|
||||
**Status:** Proposed
|
||||
**Date:** 2026-04-03
|
||||
**Deciders:** Jeremy McSpadden
|
||||
**Related:** ADR-004 (capability-aware model routing), [ADR-005](https://github.com/gsd-build/gsd-2/issues/2790), [ADR-006](https://github.com/gsd-build/gsd-2/issues/2995), `packages/pi-ai/src/providers/`, `packages/pi-ai/src/models.ts`
|
||||
|
||||
## Context
|
||||
|
||||
The model/provider system in `pi-ai` has two structural problems worth fixing — but the system is **not fundamentally broken**. The heavy lifting (lazy SDK imports, registry-based dispatch, extension-based registration) is already well-designed. This ADR targets the two areas where the current design creates real friction without proposing unnecessary runtime changes.
|
||||
|
||||
### Current Architecture
|
||||
|
||||
```
|
||||
stream.ts
|
||||
└─ import "./providers/register-builtins.js" ← side-effect import at load time
|
||||
├─ import anthropic.ts (6.8 KB)
|
||||
├─ import anthropic-vertex.ts (3.9 KB)
|
||||
├─ import openai-completions.ts (26 KB)
|
||||
├─ import openai-responses.ts (6.4 KB)
|
||||
├─ import openai-codex-responses.ts (29 KB)
|
||||
├─ import azure-openai-responses.ts (7.8 KB)
|
||||
├─ import google.ts (13.6 KB)
|
||||
├─ import google-vertex.ts (14.5 KB)
|
||||
├─ import google-gemini-cli.ts (30 KB)
|
||||
├─ import mistral.ts (18.9 KB)
|
||||
└─ amazon-bedrock.ts (24 KB) ← only lazy-loaded provider
|
||||
|
||||
models.ts
|
||||
└─ import models.generated.ts ← 13,848 lines, ALL providers, loaded at init
|
||||
└─ import models.custom.ts ← 197 lines, additional providers
|
||||
```
|
||||
|
||||
### What Already Works Well
|
||||
|
||||
1. **SDK lazy loading.** Every provider file uses `async function getXxxClass()` with a cached dynamic `import()`. The heavy npm packages (`@anthropic-ai/sdk`, `openai`, `@google/genai`, `@aws-sdk/*`, `@mistralai/*`) are only loaded on first API call. This is where the real startup cost would be — and it's already handled.
|
||||
|
||||
2. **Registry-based dispatch.** `api-registry.ts` cleanly maps API types to stream functions. Callers use `stream(model, context)` and the registry routes to the right provider. This pattern is sound.
|
||||
|
||||
3. **Extension registration.** Ollama and Claude Code CLI register via `registerApiProvider()` at runtime. This extensibility point works correctly.
|
||||
|
||||
4. **Provider implementation code loading (~200KB total).** While all providers load eagerly, V8 parses local `.js` files in single-digit milliseconds each. The total parse cost for all provider files is ~10-30ms — not a user-visible bottleneck on a CLI that's about to make a multi-second API call anyway.
|
||||
|
||||
### What's Actually Worth Fixing
|
||||
|
||||
#### Problem 1: Monolithic model catalog — developer experience, not runtime
|
||||
|
||||
`models.generated.ts` is **13,848 lines in a single file**. This creates real friction:
|
||||
|
||||
- **PR reviews are painful.** When the generation script runs, the diff is a wall of changes across unrelated providers. Reviewers can't tell what actually changed for a specific provider.
|
||||
- **Navigation is slow.** Finding a specific model requires scrolling or searching through thousands of lines of static object literals.
|
||||
- **Merge conflicts are frequent.** Any two PRs that touch model generation will conflict on the same monolithic file.
|
||||
- **Git blame is useless.** Every line was "last changed" by the generation script, obscuring the history of individual provider additions.
|
||||
|
||||
The runtime cost of loading all model definitions is negligible — a Map of ~200 model objects is maybe 50-100KB of heap. The problem is purely about code organization and developer workflow.
|
||||
|
||||
#### Problem 2: Barrel export leaks provider internals — API design
|
||||
|
||||
`packages/pi-ai/src/index.ts` re-exports every provider module's internals:
|
||||
|
||||
```typescript
|
||||
export * from "./providers/anthropic.js";
|
||||
export * from "./providers/google.js";
|
||||
export * from "./providers/google-gemini-cli.js";
|
||||
export * from "./providers/google-vertex.js";
|
||||
export * from "./providers/mistral.js";
|
||||
export * from "./providers/openai-completions.js";
|
||||
export * from "./providers/openai-responses.js";
|
||||
// ... etc
|
||||
```
|
||||
|
||||
This is a public API problem:
|
||||
|
||||
- **Consumers can bypass the registry.** Any code that `import { streamAnthropic } from "pi-ai"` has a direct dependency on an implementation detail that should be internal.
|
||||
- **Refactoring is blocked.** Renaming a function inside a provider file is a breaking change because it's re-exported from the package root.
|
||||
- **API surface is unnecessarily large.** The public API should be `stream()`, `streamSimple()`, `registerApiProvider()`, model utilities, and types. Provider-specific stream functions are implementation details.
|
||||
|
||||
### What Is NOT Worth Changing
|
||||
|
||||
**Lazy provider loading (converting `register-builtins.ts` to async on-demand loading).** This was considered and rejected because:
|
||||
|
||||
1. **The SDKs are already lazy.** The heavy cost is handled. Provider implementation code (~200KB of local `.js`) parses in ~10-30ms total.
|
||||
2. **Async resolution adds complexity to the hot path.** `stream.ts` currently does a synchronous `Map.get()`. Making `resolveApiProvider` async adds a microtask hop to every API call — not just the first. Small but measurable, and for no user-visible gain.
|
||||
3. **High blast radius, low payoff.** Touching `stream.ts`, `api-registry.ts`, and the registration lifecycle simultaneously risks regressions in the core streaming path for an optimization that wouldn't show up in profiling.
|
||||
4. **Bedrock's lazy loading is a special case, not a template.** It exists because `@aws-sdk/client-bedrock-runtime` is uniquely massive. Generalizing this pattern to providers where the SDK is already lazy-imported doesn't compound the benefit.
|
||||
|
||||
## Decision
|
||||
|
||||
**Make two targeted improvements to code organization and API hygiene. Do not change runtime loading behavior.**
|
||||
|
||||
### Change 1: Split `models.generated.ts` into per-provider files
|
||||
|
||||
Replace the monolithic 13,848-line generated file with per-provider files:
|
||||
|
||||
```
|
||||
packages/pi-ai/src/models/
|
||||
├── index.ts ← re-exports combined registry, same public API
|
||||
├── generated/
|
||||
│ ├── anthropic.ts ← Anthropic model definitions
|
||||
│ ├── openai.ts ← OpenAI model definitions
|
||||
│ ├── google.ts ← Google model definitions
|
||||
│ ├── mistral.ts ← Mistral model definitions
|
||||
│ ├── amazon-bedrock.ts ← Bedrock model definitions
|
||||
│ ├── groq.ts ← Groq model definitions
|
||||
│ ├── xai.ts ← xAI model definitions
|
||||
│ ├── cerebras.ts ← Cerebras model definitions
|
||||
│ ├── openrouter.ts ← OpenRouter model definitions
|
||||
│ └── ... ← one file per provider in the catalog
|
||||
├── custom.ts ← replaces models.custom.ts (unchanged content)
|
||||
└── capability-patches.ts ← CAPABILITY_PATCHES extracted for clarity
|
||||
```
|
||||
|
||||
**`models/index.ts` keeps the exact same synchronous public API:**
|
||||
|
||||
```typescript
|
||||
// models/index.ts
|
||||
// GSD-2 — Model registry (split by provider for maintainability)
|
||||
|
||||
import { ANTHROPIC_MODELS } from "./generated/anthropic.js";
|
||||
import { OPENAI_MODELS } from "./generated/openai.js";
|
||||
import { GOOGLE_MODELS } from "./generated/google.js";
|
||||
// ... one import per provider
|
||||
|
||||
import { CUSTOM_MODELS } from "./custom.js";
|
||||
import { CAPABILITY_PATCHES, applyCapabilityPatches } from "./capability-patches.js";
|
||||
import type { Api, KnownProvider, Model, Usage } from "../types.js";
|
||||
|
||||
// Combine all generated models into single registry — same as today
|
||||
const MODELS = {
|
||||
...ANTHROPIC_MODELS,
|
||||
...OPENAI_MODELS,
|
||||
...GOOGLE_MODELS,
|
||||
// ...
|
||||
};
|
||||
|
||||
// Rest of the file is identical to current models.ts:
|
||||
// modelRegistry Map construction, capability patch application,
|
||||
// getModel(), getProviders(), getModels(), calculateCost(),
|
||||
// supportsXhigh(), modelsAreEqual()
|
||||
```
|
||||
|
||||
**Key constraint: loading stays synchronous and eager.** All model files are statically imported. The Map is built at module init exactly as today. No async, no lazy loading, no runtime behavior change. This is purely a file organization change.
|
||||
|
||||
**Update `generate-models.ts`** to emit one file per provider instead of a single `models.generated.ts`. The script already groups models by provider internally — it just needs to write separate files instead of one.
|
||||
|
||||
#### Why this matters
|
||||
|
||||
| Before | After |
|
||||
|--------|-------|
|
||||
| PR diffs show 13K-line file changes | PR diffs scoped to the provider that changed |
|
||||
| Merge conflicts on any concurrent model update | Conflicts only when same provider is touched |
|
||||
| `git blame` shows "regenerate models" for every line | `git blame` shows per-provider history |
|
||||
| Finding a model = search through 13K lines | Finding a model = open the provider file |
|
||||
| One reviewer must understand all providers | Reviewers only need context for affected provider |
|
||||
|
||||
### Change 2: Stop barrel-exporting provider internals
|
||||
|
||||
**Update `packages/pi-ai/src/index.ts`:**
|
||||
|
||||
```typescript
|
||||
// Before (current — 17 re-exports including all providers):
|
||||
export * from "./providers/anthropic.js";
|
||||
export * from "./providers/azure-openai-responses.js";
|
||||
export * from "./providers/google.js";
|
||||
export * from "./providers/google-gemini-cli.js";
|
||||
export * from "./providers/google-vertex.js";
|
||||
export * from "./providers/mistral.js";
|
||||
export * from "./providers/openai-completions.js";
|
||||
export * from "./providers/openai-responses.js";
|
||||
export * from "./providers/register-builtins.js";
|
||||
// ...
|
||||
|
||||
// After (clean public API):
|
||||
export * from "./api-registry.js";
|
||||
export * from "./env-api-keys.js";
|
||||
export * from "./models/index.js";
|
||||
export * from "./providers/register-builtins.js"; // resetApiProviders() is public
|
||||
export * from "./stream.js";
|
||||
export * from "./types.js";
|
||||
export * from "./utils/event-stream.js";
|
||||
export * from "./utils/json-parse.js";
|
||||
export type { OAuthAuthInfo, OAuthCredentials, /* ... */ } from "./utils/oauth/types.js";
|
||||
export * from "./utils/overflow.js";
|
||||
export * from "./utils/typebox-helpers.js";
|
||||
export * from "./utils/repair-tool-json.js";
|
||||
export * from "./utils/validation.js";
|
||||
```
|
||||
|
||||
Provider-specific exports (`streamAnthropic`, `streamGoogle`, etc.) are removed from the public API. Any external consumer that imported them directly should use the registry-based `stream()` / `streamSimple()` functions instead — which is how all internal callers already work.
|
||||
|
||||
#### Why this matters
|
||||
|
||||
- **Enforces the registry pattern.** The correct way to call a provider is `stream(model, context)`. Direct provider function imports create fragile coupling.
|
||||
- **Enables future refactoring.** Provider internal function signatures can change without breaking the package API. Today, renaming `streamAnthropic` would be a semver-breaking change.
|
||||
- **Reduces API surface.** Consumers see only what they need: `stream`, `streamSimple`, `registerApiProvider`, model utilities, and types.
|
||||
|
||||
### What Does NOT Change
|
||||
|
||||
- **Runtime behavior** — all providers still load eagerly, same as today
|
||||
- **The `Model<TApi>` type system** — all types, interfaces, and generics stay the same
|
||||
- **The `ApiProvider` interface** — providers still implement `{ api, stream, streamSimple }`
|
||||
- **The `api-registry.ts` registry** — synchronous `Map.get()` dispatch, unchanged
|
||||
- **`stream.ts`** — no changes to the streaming entry point
|
||||
- **`register-builtins.ts`** — still eagerly imports and registers all providers (only `resetApiProviders` remains in barrel export)
|
||||
- **The extension system** — `registerApiProvider()` continues to work for Ollama, Claude Code CLI, etc.
|
||||
- **`models.json` user config** — custom models, overrides, provider settings are unaffected
|
||||
- **Model discovery** — discovery adapters are already lazy and independent
|
||||
- **Model routing** — ADR-004's capability-aware routing is orthogonal
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
1. **Cleaner PRs.** Model catalog changes are scoped to the provider that changed. Reviewers see a 200-line diff in `models/generated/openai.ts` instead of a 13K-line diff in `models.generated.ts`.
|
||||
|
||||
2. **Fewer merge conflicts.** Two PRs that update different providers no longer conflict on the same file.
|
||||
|
||||
3. **Better navigability.** Developers can jump directly to `models/generated/anthropic.ts` to see Anthropic's model definitions instead of searching through a monolith.
|
||||
|
||||
4. **Cleaner package API.** `pi-ai` exports only what consumers need. Provider internals are properly encapsulated.
|
||||
|
||||
5. **Future-proofs refactoring.** Provider implementation details can evolve without breaking the public API contract.
|
||||
|
||||
6. **Zero runtime risk.** No changes to loading, registration, streaming, or dispatch. The refactor is purely structural.
|
||||
|
||||
### Negative
|
||||
|
||||
1. **More files.** Instead of 1 generated file + 1 custom file, we'll have ~15-20 generated files. Marginal complexity increase, but each file is focused and small.
|
||||
|
||||
2. **Generation script update.** `generate-models.ts` needs to write per-provider files. The script already groups by provider, so this is straightforward but requires testing.
|
||||
|
||||
3. **Import audit for barrel export change.** Any code that directly imports `streamAnthropic` (etc.) from `pi-ai` needs to be updated. Based on research, the main consumer is `register-builtins.ts` itself, which imports providers directly (not through the barrel). External usage should be minimal.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### 1. Full lazy provider loading (original ADR-005 proposal)
|
||||
|
||||
Make all providers load on-demand via async dynamic imports, generalizing the Bedrock pattern. **Rejected** because:
|
||||
- SDK imports are already lazy — the heavy cost is handled
|
||||
- Provider implementation parsing is ~10-30ms total — not a bottleneck
|
||||
- Adds async complexity to the synchronous stream dispatch hot path
|
||||
- High migration effort and regression risk for unmeasurable performance gain
|
||||
|
||||
### 2. Plugin architecture with separate npm packages
|
||||
|
||||
Move each provider to its own package (`@gsd/provider-anthropic`, etc.). Maximum isolation but dramatically more complex build/release/versioning. Overkill for a monorepo where all providers ship together.
|
||||
|
||||
### 3. Do nothing
|
||||
|
||||
The current architecture works. This is a valid choice. The split is justified by the developer experience friction (13K-line file, merge conflicts, unusable git blame) and the API hygiene issue (leaking provider internals), not by a runtime problem. If the team is not experiencing these friction points, deferring is reasonable.
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Wave 1: Split Model Catalog (Low-Medium Risk)
|
||||
1. Update `generate-models.ts` to emit per-provider files into `models/generated/`
|
||||
2. Create `models/index.ts` that imports all per-provider files and builds the same registry
|
||||
3. Extract `CAPABILITY_PATCHES` into `models/capability-patches.ts`
|
||||
4. Move `models.custom.ts` to `models/custom.ts`
|
||||
5. Update imports in `models.ts` (or replace it with the new `models/index.ts`)
|
||||
6. Verify `npm run build` and `npm run test` pass
|
||||
7. Delete `models.generated.ts` and `models.custom.ts`
|
||||
|
||||
### Wave 2: Clean Up Barrel Export (Low Risk)
|
||||
1. Remove provider re-exports from `index.ts`
|
||||
2. Grep for direct provider imports from `"pi-ai"` across the codebase
|
||||
3. Migrate any found usages to use `stream()` / `streamSimple()` through the registry
|
||||
4. Verify build and tests
|
||||
|
||||
### Wave 3: Validate
|
||||
1. Run full test suite
|
||||
2. Verify extension registration (Ollama, Claude Code CLI) still works
|
||||
3. Verify `resetApiProviders()` test helper still works
|
||||
4. Spot-check a few providers end-to-end
|
||||
|
||||
## References
|
||||
|
||||
- Current model catalog: `packages/pi-ai/src/models.generated.ts` (13,848 lines)
|
||||
- Current barrel export: `packages/pi-ai/src/index.ts`
|
||||
- Model registry: `packages/pi-ai/src/models.ts`
|
||||
- API provider registry: `packages/pi-ai/src/api-registry.ts`
|
||||
- Eager registration: `packages/pi-ai/src/providers/register-builtins.ts`
|
||||
- Stream dispatch: `packages/pi-ai/src/stream.ts`
|
||||
- Generation script: `packages/pi-ai/scripts/generate-models.ts`
|
||||
- Extension registration: `packages/pi-coding-agent/src/core/model-registry.ts`
|
||||
- ADR-004: `docs/ADR-004-capability-aware-model-routing.md`
|
||||
335
docs/dev/ADR-008-IMPLEMENTATION-PLAN.md
Normal file
335
docs/dev/ADR-008-IMPLEMENTATION-PLAN.md
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
# ADR-008 Implementation Plan
|
||||
|
||||
**Related ADR:** [ADR-008-gsd-tools-over-mcp-for-provider-parity.md](/Users/jeremymcspadden/Github/gsd-2/docs/ADR-008-gsd-tools-over-mcp-for-provider-parity.md)
|
||||
**Status:** Draft
|
||||
**Date:** 2026-04-09
|
||||
|
||||
## Objective
|
||||
|
||||
Implement the ADR-008 decision by exposing the core GSD workflow tool contract over MCP, then wiring MCP-backed access into provider paths that cannot use the native in-process GSD tool registry directly.
|
||||
|
||||
The first usable outcome is:
|
||||
|
||||
- a Claude Code-backed execution session can complete a task using canonical GSD tools
|
||||
- no manual summary-writing fallback is needed
|
||||
- native provider behavior remains unchanged
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Replacing native in-process GSD tools with MCP
|
||||
- Exporting every historical alias in the first rollout
|
||||
- Reworking the entire session-oriented MCP server before proving the workflow-tool surface
|
||||
- Supporting every provider path before Claude Code is working end-to-end
|
||||
|
||||
## Constraints
|
||||
|
||||
- Native and MCP tool paths must share business logic
|
||||
- MCP must not bypass write-gate or discussion-gate protections
|
||||
- Canonical GSD state transitions must remain DB-backed
|
||||
- Provider capability mismatches must fail early, not degrade silently
|
||||
|
||||
## Workstreams
|
||||
|
||||
### 1. Shared Handler Extraction
|
||||
|
||||
Goal: separate business logic from transport registration.
|
||||
|
||||
Targets:
|
||||
|
||||
- `src/resources/extensions/gsd/bootstrap/db-tools.ts`
|
||||
- `src/resources/extensions/gsd/bootstrap/query-tools.ts`
|
||||
- `src/resources/extensions/gsd/tools/complete-task.ts`
|
||||
- sibling modules used by planning/summary/validation tools
|
||||
|
||||
Deliverables:
|
||||
|
||||
- transport-neutral handler entrypoints for the minimum workflow tool set
|
||||
- thin native registration wrappers that call those handlers
|
||||
- thin MCP registration wrappers that call those handlers
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- native tool behavior is unchanged
|
||||
- no workflow tool logic is duplicated in MCP server code
|
||||
|
||||
### 2. Workflow-Tool MCP Surface
|
||||
|
||||
Goal: add an MCP server surface for real GSD workflow tools, distinct from the current session/read API.
|
||||
|
||||
Preferred first-cut tool set:
|
||||
|
||||
- `gsd_summary_save`
|
||||
- `gsd_decision_save`
|
||||
- `gsd_plan_milestone`
|
||||
- `gsd_plan_slice`
|
||||
- `gsd_plan_task`
|
||||
- `gsd_task_complete`
|
||||
- `gsd_slice_complete`
|
||||
- `gsd_complete_milestone`
|
||||
- `gsd_validate_milestone`
|
||||
- `gsd_replan_slice`
|
||||
- `gsd_reassess_roadmap`
|
||||
- `gsd_save_gate_result`
|
||||
- `gsd_milestone_status`
|
||||
|
||||
Likely files:
|
||||
|
||||
- `packages/mcp-server/src/server.ts` or a new sibling server package
|
||||
- `packages/mcp-server/src/...` supporting modules
|
||||
- shared tool-definition metadata if needed
|
||||
|
||||
Decisions to make during implementation:
|
||||
|
||||
- extend existing MCP package vs create `packages/mcp-gsd-tools-server`
|
||||
- canonical names only vs selected alias export
|
||||
- single combined server vs separate “session” and “workflow” server modes
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- MCP tool discovery shows the minimum tool set
|
||||
- each MCP tool invokes the shared handlers successfully in isolation
|
||||
|
||||
### 3. Safety and Policy Parity
|
||||
|
||||
Goal: ensure MCP mutations enforce the same rules as native tool calls.
|
||||
|
||||
Targets:
|
||||
|
||||
- `src/resources/extensions/gsd/bootstrap/write-gate.ts`
|
||||
- any current tool-call gating hooks tied to native runtime only
|
||||
- MCP wrapper layer before shared handler invocation
|
||||
|
||||
Required protections:
|
||||
|
||||
- discussion gate blocking
|
||||
- queue-mode restrictions
|
||||
- write-path restrictions
|
||||
- canonical DB/file rendering order
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- MCP cannot be used to bypass native write restrictions
|
||||
- blocked native scenarios remain blocked over MCP
|
||||
|
||||
### 4. Claude Code Provider Integration
|
||||
|
||||
Goal: attach the GSD workflow-tool MCP surface to Claude Code sessions.
|
||||
|
||||
Targets:
|
||||
|
||||
- `src/resources/extensions/claude-code-cli/stream-adapter.ts`
|
||||
- `src/resources/extensions/claude-code-cli/index.ts`
|
||||
|
||||
Expected work:
|
||||
|
||||
- build a GSD-managed `mcpServers` config for the Claude SDK session
|
||||
- attach the workflow MCP server only when the session requires GSD tools
|
||||
- keep current Claude Code streaming behavior intact
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- Claude Code session can discover the GSD workflow MCP tools
|
||||
- task execution path can call `gsd_task_complete` successfully
|
||||
|
||||
### 5. Capability Detection and Failure Path
|
||||
|
||||
Goal: refuse to start tool-dependent workflows when required capabilities are unavailable.
|
||||
|
||||
Targets:
|
||||
|
||||
- GSD dispatch / auto-mode preflight
|
||||
- provider selection and routing checks
|
||||
- user-facing compatibility errors
|
||||
|
||||
Required behavior:
|
||||
|
||||
- if native GSD tools are available, proceed
|
||||
- else if GSD workflow MCP tools are available, proceed
|
||||
- else fail fast with a precise message
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- no execution prompt is sent that requires unavailable tools
|
||||
- users with only unsupported capability combinations get a hard error, not a fake fallback
|
||||
|
||||
### 6. Prompt and Documentation Alignment
|
||||
|
||||
Goal: keep the workflow contract strict while removing transport assumptions from docs and runtime messaging.
|
||||
|
||||
Targets:
|
||||
|
||||
- `src/resources/extensions/gsd/prompts/execute-task.md`
|
||||
- related planning/discuss prompts that reference tool availability
|
||||
- provider and MCP docs
|
||||
|
||||
Rules:
|
||||
|
||||
- prompts should keep requiring canonical GSD completion/planning tools
|
||||
- prompts should not imply “native in-process tool only”
|
||||
- docs should explain native vs MCP-backed fulfillment paths
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- prompt contract matches runtime reality
|
||||
- no provider is told to use a tool surface it cannot access
|
||||
|
||||
## Phase Plan
|
||||
|
||||
## Phase 1: Spike and Handler Extraction
|
||||
|
||||
Scope:
|
||||
|
||||
- extract shared logic for `gsd_summary_save`, `gsd_task_complete`, and `gsd_milestone_status`
|
||||
- prove native wrappers still work
|
||||
|
||||
Why first:
|
||||
|
||||
- these tools are enough to test end-to-end completion semantics without migrating the full catalog
|
||||
|
||||
Verification:
|
||||
|
||||
- existing native tests still pass
|
||||
- new unit tests cover shared handler entrypoints directly
|
||||
|
||||
## Phase 2: Minimal Workflow MCP Server
|
||||
|
||||
Scope:
|
||||
|
||||
- expose the three extracted tools over MCP
|
||||
- ensure discovery schemas are clean and canonical
|
||||
|
||||
Verification:
|
||||
|
||||
- MCP discovery returns all three tools
|
||||
- direct MCP calls succeed against a fixture project
|
||||
|
||||
## Phase 3: Claude Code End-to-End Proof
|
||||
|
||||
Scope:
|
||||
|
||||
- wire the minimal workflow MCP server into the Claude SDK session
|
||||
- run a single execution path that ends with task completion
|
||||
|
||||
Verification:
|
||||
|
||||
- Claude Code can call `gsd_task_complete`
|
||||
- summary file, DB state, and plan checkbox update correctly
|
||||
|
||||
## Phase 4: Expand to Full Minimum Workflow Set
|
||||
|
||||
Scope:
|
||||
|
||||
- add planning, slice completion, milestone completion, roadmap reassessment, and gate result tools
|
||||
|
||||
Verification:
|
||||
|
||||
- discuss/plan/execute/complete lifecycle works over MCP for the supported flow set
|
||||
|
||||
## Phase 5: Capability Gating and UX Hardening
|
||||
|
||||
Scope:
|
||||
|
||||
- add preflight capability checks
|
||||
- add clear error messaging for unsupported setups
|
||||
|
||||
Verification:
|
||||
|
||||
- unsupported provider/session combinations fail before execution starts
|
||||
|
||||
## Phase 6: Prompt and Doc Cleanup
|
||||
|
||||
Scope:
|
||||
|
||||
- align prompts and docs with the new transport-neutral contract
|
||||
|
||||
Verification:
|
||||
|
||||
- prompt references are accurate
|
||||
- docs describe the supported architecture and limitations
|
||||
|
||||
## File-Level Starting Map
|
||||
|
||||
High-probability files for the first implementation:
|
||||
|
||||
- `src/resources/extensions/gsd/bootstrap/db-tools.ts`
|
||||
- `src/resources/extensions/gsd/bootstrap/query-tools.ts`
|
||||
- `src/resources/extensions/gsd/bootstrap/write-gate.ts`
|
||||
- `src/resources/extensions/gsd/tools/complete-task.ts`
|
||||
- `src/resources/extensions/claude-code-cli/stream-adapter.ts`
|
||||
- `src/resources/extensions/claude-code-cli/index.ts`
|
||||
- `packages/mcp-server/src/server.ts`
|
||||
- `packages/mcp-server/src/session-manager.ts`
|
||||
- `packages/mcp-server/README.md`
|
||||
- `src/resources/extensions/gsd/prompts/execute-task.md`
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Unit
|
||||
|
||||
- shared handlers
|
||||
- MCP wrapper adapters
|
||||
- gating / capability-check helpers
|
||||
|
||||
### Integration
|
||||
|
||||
- direct MCP tool invocation against fixture projects
|
||||
- native tool invocation regression coverage
|
||||
- Claude Code provider path with MCP attached
|
||||
|
||||
### End-to-End
|
||||
|
||||
- plan or execute a small fixture task and complete it through canonical GSD tools
|
||||
- confirm DB row, rendered summary, and plan state stay in sync
|
||||
|
||||
## Risks
|
||||
|
||||
### Risk 1: Logic Drift
|
||||
|
||||
If native and MCP wrappers each evolve their own behavior, parity will collapse quickly.
|
||||
|
||||
Mitigation:
|
||||
|
||||
- shared handler extraction before broad MCP exposure
|
||||
|
||||
### Risk 2: Safety Regression
|
||||
|
||||
If MCP becomes a side door around native gating, the architecture is worse than before.
|
||||
|
||||
Mitigation:
|
||||
|
||||
- centralize or reuse gating checks before shared handler invocation
|
||||
|
||||
### Risk 3: Overly Broad First Rollout
|
||||
|
||||
Exporting every tool and alias immediately increases scope and test burden.
|
||||
|
||||
Mitigation:
|
||||
|
||||
- ship a minimal workflow tool set first
|
||||
|
||||
### Risk 4: Claude SDK Session Wiring Complexity
|
||||
|
||||
Attaching MCP servers dynamically may expose edge cases around cwd, permissions, or subprocess lifecycle.
|
||||
|
||||
Mitigation:
|
||||
|
||||
- prove a narrow spike with 2-3 tools before expanding
|
||||
|
||||
## Exit Criteria for ADR-008
|
||||
|
||||
ADR-008 is considered implemented when:
|
||||
|
||||
1. Claude Code-backed execution can use canonical GSD workflow tools over MCP.
|
||||
2. Native provider behavior remains intact.
|
||||
3. Shared handlers back both native and MCP invocation.
|
||||
4. Gating and state integrity protections apply equally to MCP mutations.
|
||||
5. Capability checks prevent prompts from requiring unavailable tools.
|
||||
|
||||
## Recommended Next Task
|
||||
|
||||
Start with a narrow spike:
|
||||
|
||||
1. Extract shared handlers for `gsd_summary_save`, `gsd_task_complete`, and `gsd_milestone_status`.
|
||||
2. Expose those tools through a minimal workflow MCP server.
|
||||
3. Attach that MCP server to Claude Code sessions.
|
||||
4. Prove end-to-end task completion on a fixture project.
|
||||
240
docs/dev/ADR-008-gsd-tools-over-mcp-for-provider-parity.md
Normal file
240
docs/dev/ADR-008-gsd-tools-over-mcp-for-provider-parity.md
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
# ADR-008: Expose GSD Workflow Tools Over MCP for Provider Parity
|
||||
|
||||
**Status:** Proposed
|
||||
**Date:** 2026-04-09
|
||||
**Deciders:** Jeremy McSpadden
|
||||
**Related:** ADR-004 (capability-aware model routing), ADR-007 (model catalog split and provider API encapsulation), `src/resources/extensions/gsd/bootstrap/db-tools.ts`, `src/resources/extensions/claude-code-cli/stream-adapter.ts`, `packages/mcp-server/src/server.ts`
|
||||
|
||||
## Context
|
||||
|
||||
GSD currently has two different tool surfaces:
|
||||
|
||||
1. **In-process extension tools** registered directly into the runtime via `pi.registerTool(...)`.
|
||||
2. **An external MCP server** that exposes session orchestration and read-only project inspection.
|
||||
|
||||
This split is now creating a real provider compatibility problem.
|
||||
|
||||
### What exists today
|
||||
|
||||
The core GSD workflow tools are internal extension tools. Examples include:
|
||||
|
||||
- `gsd_summary_save`
|
||||
- `gsd_plan_milestone`
|
||||
- `gsd_plan_slice`
|
||||
- `gsd_plan_task`
|
||||
- `gsd_task_complete` / `gsd_complete_task`
|
||||
- `gsd_slice_complete`
|
||||
- `gsd_complete_milestone`
|
||||
- `gsd_validate_milestone`
|
||||
- `gsd_replan_slice`
|
||||
- `gsd_reassess_roadmap`
|
||||
|
||||
These are registered in `src/resources/extensions/gsd/bootstrap/db-tools.ts` and related bootstrap files. GSD prompts assume these tools are available during discuss, plan, and execute flows.
|
||||
|
||||
Separately, `packages/mcp-server/src/server.ts` exposes a different tool surface:
|
||||
|
||||
- session control: `gsd_execute`, `gsd_status`, `gsd_result`, `gsd_cancel`, `gsd_query`, `gsd_resolve_blocker`
|
||||
- read-only inspection: `gsd_progress`, `gsd_roadmap`, `gsd_history`, `gsd_doctor`, `gsd_captures`, `gsd_knowledge`
|
||||
|
||||
That MCP server is useful, but it is **not** a transport for the internal workflow/mutation tools.
|
||||
|
||||
### The current failure mode
|
||||
|
||||
The Claude Code CLI provider uses the Anthropic Agent SDK through `src/resources/extensions/claude-code-cli/stream-adapter.ts`. That adapter starts a Claude SDK session, but it does not forward the internal GSD tool registry into the SDK session, nor does it attach a GSD MCP server for those tools.
|
||||
|
||||
As a result:
|
||||
|
||||
- prompts tell the model to call tools like `gsd_complete_task`
|
||||
- the tools exist in GSD
|
||||
- but Claude Code sessions do not actually receive those tools
|
||||
|
||||
This produces a contract mismatch: the model is required to use tools that are unavailable in that provider path.
|
||||
|
||||
### Why this matters
|
||||
|
||||
This is not a one-off Claude Code bug. It reveals a deeper architectural issue:
|
||||
|
||||
- GSD’s core workflow contract is transport-specific
|
||||
- prompt authors assume “internal extension tool availability”
|
||||
- provider integrations do not all share the same execution surface
|
||||
|
||||
If GSD wants provider parity, its workflow tools need a transport-neutral exposure model.
|
||||
|
||||
## Decision
|
||||
|
||||
**Expose the GSD workflow tool contract over MCP as a first-class transport, and make MCP the compatibility layer for providers that cannot directly access the in-process GSD tool registry.**
|
||||
|
||||
This means:
|
||||
|
||||
1. GSD will keep its existing in-process tool registration for native runtime use.
|
||||
2. GSD will add an MCP execution surface for the same workflow tools.
|
||||
3. Both surfaces must call the same underlying business logic.
|
||||
4. Provider integrations such as Claude Code will use the MCP surface when they cannot access native in-process tools directly.
|
||||
|
||||
The decision is explicitly **not** to replace the native tool system with MCP everywhere. MCP is the parity and portability layer, not the only runtime path.
|
||||
|
||||
## Decision Details
|
||||
|
||||
### 1. One handler layer, multiple transports
|
||||
|
||||
GSD tool behavior must not be implemented twice.
|
||||
|
||||
The transport-neutral business logic for workflow tools should be shared by:
|
||||
|
||||
- native extension tool registration (`pi.registerTool(...)`)
|
||||
- MCP server tool registration
|
||||
|
||||
The MCP server should wrap the same handlers used by `db-tools.ts`, `query-tools.ts`, and related modules. This avoids logic drift and keeps validation, DB writes, file rendering, and recovery behavior consistent.
|
||||
|
||||
### 2. Add a workflow-tool MCP surface
|
||||
|
||||
GSD will expose the workflow tools required for discuss, planning, execution, and completion over MCP.
|
||||
|
||||
Initial minimum set:
|
||||
|
||||
- `gsd_summary_save`
|
||||
- `gsd_decision_save`
|
||||
- `gsd_plan_milestone`
|
||||
- `gsd_plan_slice`
|
||||
- `gsd_plan_task`
|
||||
- `gsd_task_complete`
|
||||
- `gsd_slice_complete`
|
||||
- `gsd_complete_milestone`
|
||||
- `gsd_validate_milestone`
|
||||
- `gsd_replan_slice`
|
||||
- `gsd_reassess_roadmap`
|
||||
- `gsd_save_gate_result`
|
||||
- selected read/query tools such as `gsd_milestone_status`
|
||||
|
||||
Aliases should be treated conservatively. MCP should prefer canonical names unless compatibility requires exposing aliases.
|
||||
|
||||
### 3. Preserve safety semantics
|
||||
|
||||
The current GSD safety model includes write gates, discussion gates, queue-mode restrictions, and state integrity guarantees.
|
||||
|
||||
Those guarantees must continue to apply when tools are invoked over MCP. In particular:
|
||||
|
||||
- MCP must not create a path that bypasses write gating
|
||||
- MCP mutations must preserve the same DB/file/state invariants as native tools
|
||||
- provider-specific fallback behavior must not allow manual summary writing in place of canonical completion tools
|
||||
|
||||
### 4. Make provider capability checks explicit
|
||||
|
||||
Before dispatching a workflow that requires GSD workflow tools, GSD should check whether the selected provider/session can access the required tool surface.
|
||||
|
||||
If a provider cannot access either:
|
||||
|
||||
- native in-process GSD tools, or
|
||||
- the GSD MCP workflow tool surface
|
||||
|
||||
then GSD must fail early with a clear compatibility error rather than allowing execution to continue in a degraded, state-breaking mode.
|
||||
|
||||
### 5. Keep the existing session/read MCP server
|
||||
|
||||
The existing MCP server in `packages/mcp-server` remains valid. It serves a different purpose:
|
||||
|
||||
- remote session orchestration
|
||||
- status/result polling
|
||||
- filesystem-backed project inspection
|
||||
|
||||
The new workflow-tool MCP surface is complementary, not a replacement.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### Alternative A: Reroute away from Claude Code whenever tool-backed execution is needed
|
||||
|
||||
This would fix the immediate failure for multi-provider users, but it does not solve provider parity. It also fails completely for users who only have Claude Code configured.
|
||||
|
||||
**Rejected** because it treats the symptom, not the architectural gap.
|
||||
|
||||
### Alternative B: Hard-fail Claude Code and require another provider
|
||||
|
||||
This is a valid short-term guardrail and may still be used before MCP support is complete.
|
||||
|
||||
**Rejected as the long-term architecture** because it permanently excludes a supported provider from first-class GSD execution.
|
||||
|
||||
### Alternative C: Inject the internal GSD tool registry directly into the Claude Agent SDK without MCP
|
||||
|
||||
This would tightly couple GSD’s internal extension runtime to a provider-specific integration path. It would not generalize well to other providers or external tool clients.
|
||||
|
||||
**Rejected** because it creates a provider-specific bridge instead of a transport-neutral contract.
|
||||
|
||||
### Alternative D: Replace native GSD tools entirely with MCP
|
||||
|
||||
This would simplify the conceptual model, but it would force all runtimes through an external protocol boundary even when the native in-process path is faster and already works well.
|
||||
|
||||
**Rejected** because MCP is needed for portability, not because the native tool system is flawed.
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
1. **Provider parity improves.** Providers that can consume MCP tools can participate in full GSD workflow execution.
|
||||
2. **The workflow contract becomes transport-neutral.** Prompts can rely on capabilities rather than a specific runtime implementation detail.
|
||||
3. **One compatibility story for external clients.** Claude Code, Cursor, and other MCP-capable clients can use the same workflow tool surface.
|
||||
4. **Better long-term architecture.** Internal tools and external transports converge on shared handlers instead of diverging implementations.
|
||||
|
||||
### Negative
|
||||
|
||||
1. **Larger surface area to secure and test.** Mutation tools over MCP are higher risk than read-only inspection tools.
|
||||
2. **Migration complexity.** Tool registration, gating, and handler extraction must be refactored carefully.
|
||||
3. **Two transport paths must remain aligned.** Native and MCP invocation semantics must stay behaviorally identical.
|
||||
|
||||
### Neutral / Tradeoff
|
||||
|
||||
The system will now support:
|
||||
|
||||
- native in-process tool execution when available
|
||||
- MCP-backed tool execution when native access is unavailable
|
||||
|
||||
That is more complex than a single-path system, but it is the cost of provider portability without sacrificing native runtime quality.
|
||||
|
||||
## Migration Plan
|
||||
|
||||
### Phase 1: Extract shared handlers
|
||||
|
||||
Refactor workflow tools so MCP and native registration can call the same transport-neutral functions.
|
||||
|
||||
Priority targets:
|
||||
|
||||
- `gsd_summary_save`
|
||||
- `gsd_task_complete`
|
||||
- `gsd_plan_milestone`
|
||||
- `gsd_plan_slice`
|
||||
- `gsd_plan_task`
|
||||
|
||||
### Phase 2: Stand up the workflow-tool MCP server
|
||||
|
||||
Add a new MCP surface for workflow tool execution. This may extend the existing MCP package or live as a sibling package, but it must be clearly separated from the current session/read API.
|
||||
|
||||
### Phase 3: Port safety enforcement
|
||||
|
||||
Move or centralize write gates and related policy checks so MCP mutations cannot bypass the existing safety model.
|
||||
|
||||
### Phase 4: Attach MCP workflow tools to Claude Code sessions
|
||||
|
||||
Update the Claude Code provider integration to pass a GSD-managed `mcpServers` configuration into the Claude Agent SDK session when required.
|
||||
|
||||
### Phase 5: Add provider capability gating
|
||||
|
||||
Before tool-dependent flows begin, verify that the active provider can access the required GSD workflow tools via either native registration or MCP.
|
||||
|
||||
### Phase 6: Update prompts and docs
|
||||
|
||||
Prompt contracts should remain strict about using canonical GSD completion/planning tools, but documentation and runtime messaging must no longer assume that only native in-process tool registration satisfies that contract.
|
||||
|
||||
## Validation
|
||||
|
||||
Success is defined by all of the following:
|
||||
|
||||
1. A Claude Code-backed execution session can complete a task using canonical GSD workflow tools without manual summary writing.
|
||||
2. Native provider behavior remains unchanged.
|
||||
3. MCP-invoked workflow tools produce the same DB updates, rendered artifacts, and state transitions as native tool calls.
|
||||
4. Write-gate and discussion-gate protections still hold under MCP invocation.
|
||||
5. When required capabilities are unavailable, GSD fails early with a precise compatibility error.
|
||||
|
||||
## Scope Notes
|
||||
|
||||
This ADR establishes the architectural direction. It does **not** require full MCP exposure of every historical alias or every auxiliary tool in the first implementation.
|
||||
|
||||
The first implementation should prioritize the minimum workflow tool set needed to make discuss/plan/execute/complete flows work safely for MCP-capable providers.
|
||||
|
|
@ -14,7 +14,7 @@ gsd (CLI binary)
|
|||
├─ resource-loader.ts Syncs bundled extensions + agents to ~/.gsd/agent/
|
||||
└─ src/resources/
|
||||
├─ extensions/gsd/ Core GSD extension
|
||||
├─ extensions/... 12 supporting extensions
|
||||
├─ extensions/... 23 supporting extensions
|
||||
├─ agents/ scout, researcher, worker
|
||||
├─ AGENTS.md Agent routing instructions
|
||||
└─ GSD-WORKFLOW.md Manual bootstrap protocol
|
||||
|
|
@ -73,6 +73,12 @@ Every dispatch creates a new agent session. The LLM starts with a clean context
|
|||
| **Remote Questions** | Discord, Slack, and Telegram integration for headless question routing |
|
||||
| **TTSR** | Tool-triggered system rules — conditional context injection based on tool usage |
|
||||
| **Universal Config** | Discovery of existing AI tool configurations (Claude Code, Cursor, Windsurf, etc.) |
|
||||
| **AWS Auth** | AWS credential management and authentication |
|
||||
| **Claude Code CLI** | Claude Code CLI integration |
|
||||
| **cmux** | Context multiplexing for multi-session coordination |
|
||||
| **GitHub Sync** | GitHub issue and PR synchronization |
|
||||
| **Ollama** | Local Ollama model integration |
|
||||
| **Shared** | Shared utilities across extensions |
|
||||
|
||||
## Bundled Agents
|
||||
|
||||
|
|
@ -122,7 +128,7 @@ The auto mode dispatch pipeline:
|
|||
|
||||
Phase skipping (from token profile) gates steps 2-3: if a phase is skipped, the corresponding unit type is never dispatched.
|
||||
|
||||
## Key Modules (v2.33)
|
||||
## Key Modules (v2.67)
|
||||
|
||||
| Module | Purpose |
|
||||
|--------|---------|
|
||||
|
|
@ -160,3 +166,11 @@ Phase skipping (from token profile) gates steps 2-3: if a phase is skipped, the
|
|||
| `memory-extractor.ts` | Extract reusable knowledge from session transcripts |
|
||||
| `memory-store.ts` | Persistent memory store for cross-session knowledge |
|
||||
| `queue-order.ts` | Milestone queue ordering |
|
||||
| `context-masker.ts` | Context masking for model routing optimization |
|
||||
| `phase-anchor.ts` | Phase anchoring for dispatch pipeline |
|
||||
| `slice-parallel-orchestrator.ts` | Slice-level parallelism with dependency-aware dispatch |
|
||||
| `slice-parallel-eligibility.ts` | Slice parallel eligibility checks |
|
||||
| `slice-parallel-conflict.ts` | Slice parallel conflict detection |
|
||||
| `preferences-models.ts` | Model preferences configuration |
|
||||
| `preferences-validation.ts` | Preferences validation |
|
||||
| `preferences-types.ts` | Preferences type definitions |
|
||||
198
docs/dev/pi-context-optimization-opportunities.md
Normal file
198
docs/dev/pi-context-optimization-opportunities.md
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
# pi-coding-agent: Context Optimization Opportunities
|
||||
|
||||
> **Status**: Research only — not planned for implementation.
|
||||
> Scope: `packages/pi-coding-agent` and `packages/pi-agent-core` infrastructure.
|
||||
> These changes would benefit every consumer of the pi engine, not just GSD.
|
||||
|
||||
---
|
||||
|
||||
## 1. Prompt Caching (`cache_control`) — Highest Impact
|
||||
|
||||
**Current state**: Every LLM call re-pays full input token cost for the system prompt, tool definitions, and context files. No `cache_control` breakpoints are set anywhere in the API call path.
|
||||
|
||||
**Opportunity**: Anthropic's KV cache delivers 90% cost reduction on cached tokens (0.1x input rate). Claude Code achieves 92–98% cache hit rates by placing stable content before volatile content.
|
||||
|
||||
**Where to instrument** (`packages/pi-ai/src/providers/anthropic.ts`):
|
||||
- Set `cache_control: { type: "ephemeral" }` on the last tool definition block
|
||||
- Set `cache_control` after the static system prompt sections (base boilerplate + context files)
|
||||
- Leave the per-turn user message uncached
|
||||
|
||||
**Critical constraint**: The cache breakpoint must be placed *after* all static content and *before* any dynamic content (timestamps, per-request variables). Moving a timestamp before a cache breakpoint defeats it on every call.
|
||||
|
||||
**Cache hierarchy**: Tools → system → messages. Changing a tool definition invalidates system and message caches. Tool definitions should be sorted deterministically (alphabetically) to prevent spurious cache misses.
|
||||
|
||||
**Expected savings**: 80–90% reduction in input token cost for multi-turn sessions (the dominant cost pattern in GSD auto-mode).
|
||||
|
||||
---
|
||||
|
||||
## 2. Observation Masking in the Message Pipeline
|
||||
|
||||
**Current state**: `agent-loop.ts` passes the full `context.messages` array to the LLM on every turn. Tool results from 50 turns ago are re-read in full on every subsequent call. The `transformContext` hook exists on `AgentContext` and fires before every LLM call, but has no default implementation — extensions are responsible for any pruning.
|
||||
|
||||
**Opportunity**: Replace old tool result content with lightweight placeholders after N turns. JetBrains Research tested this on SWE-bench Verified (500 tasks, up to 250-turn trajectories) and found:
|
||||
- 50%+ cost reduction vs. unmanaged history
|
||||
- Performance matched or slightly exceeded LLM summarization
|
||||
- Zero overhead (no extra LLM call required)
|
||||
|
||||
**Proposed implementation** (default `transformContext` in `pi-agent-core`):
|
||||
```typescript
|
||||
// Keep last KEEP_RECENT_TURNS verbatim; mask older tool results
|
||||
const KEEP_RECENT_TURNS = 8;
|
||||
|
||||
function defaultObservationMask(messages: AgentMessage[]): AgentMessage[] {
|
||||
const cutoff = findTurnBoundary(messages, KEEP_RECENT_TURNS);
|
||||
return messages.map((m, i) => {
|
||||
if (i >= cutoff) return m;
|
||||
if (m.type === "toolResult" || m.type === "bashExecution") {
|
||||
return { ...m, content: "[result masked — within summarized history]", excludeFromContext: false };
|
||||
}
|
||||
return m;
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
**Compaction interaction**: Observation masking reduces the token accumulation rate, pushing the compaction threshold further out. The two mechanisms are complementary — masking handles the steady state, compaction handles the rare deep-session case.
|
||||
|
||||
---
|
||||
|
||||
## 3. Earlier Compaction Threshold
|
||||
|
||||
**Current state** (`packages/pi-coding-agent/src/core/constants.ts`):
|
||||
```typescript
|
||||
COMPACTION_RESERVE_TOKENS = 16_384 // triggers at contextWindow - 16K
|
||||
COMPACTION_KEEP_RECENT_TOKENS = 20_000
|
||||
```
|
||||
|
||||
For a 200K context window, compaction fires at ~183K tokens — 91.5% utilization.
|
||||
|
||||
**Problem**: Context drift (not raw exhaustion) causes ~65% of enterprise agent failures. Performance degrades measurably beyond ~30K tokens per Zylos production data. The current threshold lets sessions run degraded for a long stretch before compaction fires.
|
||||
|
||||
**Opportunity**: Lower the trigger to 70% utilization. For a 200K window, this means compacting at ~140K tokens — 43K tokens earlier.
|
||||
|
||||
```typescript
|
||||
// Proposed
|
||||
COMPACTION_THRESHOLD_PERCENT = 0.70 // fire at 70% of contextWindow
|
||||
COMPACTION_RESERVE_TOKENS = contextWindow * (1 - COMPACTION_THRESHOLD_PERCENT)
|
||||
```
|
||||
|
||||
**Trade-off**: More frequent compactions, each happening earlier when there's more "fresh" content to keep. Summary quality improves because less material needs to be discarded at each cut.
|
||||
|
||||
---
|
||||
|
||||
## 4. Tool Result Truncation at Write Time
|
||||
|
||||
**Current state**: `TOOL_RESULT_MAX_CHARS = 2_000` in `constants.ts`, but this limit is only applied *during compaction summarization*, not when the tool result enters the message store. A bash result returning 50KB of log output is stored and re-sent verbatim until compaction fires.
|
||||
|
||||
**Opportunity**: Truncate at write time in `messages.ts` → `convertToLlm()` or in the tool result handler. Two strategies:
|
||||
|
||||
- **Hard truncation**: Slice at N chars, append `"\n[truncated — {original_length} chars]"`. Simple, zero overhead.
|
||||
- **Semantic head/tail**: Keep first 500 chars (context, command echo) + last 1000 chars (final output, errors). Better for bash results where the end contains the error.
|
||||
|
||||
**Recommendation**: Semantic head/tail as the default, configurable per tool type. File read results benefit from head; bash/test output benefits from head+tail.
|
||||
|
||||
---
|
||||
|
||||
## 5. Context File Deduplication and Trim
|
||||
|
||||
**Current state** (`packages/pi-coding-agent/src/core/resource-loader.ts`, lines 84–109):
|
||||
- Searches from `~/.gsd/agent/` → ancestor dirs → cwd
|
||||
- Deduplicates by *file path* but not by *content*
|
||||
- Entire file content concatenated verbatim into system prompt — no trimming, no summarization
|
||||
|
||||
**Anti-pattern**: A project with AGENTS.md at 3 ancestor levels (repo root, workspace, home) injects all three in full. If they share common boilerplate, that content is re-injected multiple times.
|
||||
|
||||
**Opportunities**:
|
||||
1. **Content deduplication**: Hash paragraph-level chunks; skip any chunk already seen in a previously-loaded file
|
||||
2. **Section-aware loading**: Parse `## ` headings in AGENTS.md; only include sections relevant to the current task type (e.g., `## Testing` section only when running tests)
|
||||
3. **Token budget enforcement**: If total context files exceed N tokens, summarize oldest/most-distant file rather than including verbatim
|
||||
|
||||
---
|
||||
|
||||
## 6. Skill Content Lazy Loading and Summarization
|
||||
|
||||
**Current state**: When `/skill:name` is invoked, the full skill file content is injected inline as `<skill>...</skill>` in the user message. No chunking, no summarization. A 10KB skill file adds ~2,500 tokens to that turn.
|
||||
|
||||
**Opportunity**:
|
||||
- **Cached skill injection**: If the same skill is used across multiple turns (rare but possible), it's re-injected each time. Cache with `cache_control` after first injection.
|
||||
- **Skill digest mode**: Inject a 200-token summary of the skill on first reference; full content only if the model requests it via a `get_skill_detail` tool call. Reduces cost for skills that don't end up being followed.
|
||||
- **Skill prefetching**: Before a known long session (e.g., auto-mode start), pre-inject all likely skills with `cache_control` so they're cached for the entire session.
|
||||
|
||||
---
|
||||
|
||||
## 7. Token Estimation Accuracy
|
||||
|
||||
**Current state** (`compaction.ts`, line 216): `chars / 4` heuristic. This overestimates token count for English prose (~3.5 chars/token) and underestimates for code with short identifiers or Unicode.
|
||||
|
||||
**Opportunity**: Use a proper tokenizer.
|
||||
- `@anthropic-ai/tokenizer` (tiktoken-compatible, ships with the SDK) — accurate but ~5ms per call
|
||||
- Tiered approach: use chars/4 for display; use proper tokenizer only for compaction threshold decisions (where accuracy matters)
|
||||
|
||||
**Impact**: More accurate compaction timing, fewer unnecessary compactions, slightly better `COMPACTION_KEEP_RECENT_TOKENS` boundary placement.
|
||||
|
||||
---
|
||||
|
||||
## 8. Format: Markdown over XML for Internal Context
|
||||
|
||||
**Current state**: The message pipeline uses `<skill>`, `<summary>`, `<compaction>` XML wrappers in several places. System prompt sections are largely prose Markdown.
|
||||
|
||||
**Findings**: XML tags carry 15–40% more tokens than equivalent Markdown for the same semantic content, due to paired open/close tags. However, Claude was optimized for XML and shows higher accuracy on tasks requiring precise section parsing.
|
||||
|
||||
**Recommendation**: Audit XML usage in the pipeline and convert to Markdown where the content is:
|
||||
- Non-nested (flat instructions, status messages)
|
||||
- Human-readable rather than machine-parsed by the model
|
||||
- Not requiring precise boundary detection
|
||||
|
||||
Keep XML for: few-shot examples with ambiguous boundaries, skill content (requires precise isolation from surrounding text), compaction summaries that the model must treat as authoritative history.
|
||||
|
||||
**Estimated savings**: 5–15% reduction in system prompt token count.
|
||||
|
||||
---
|
||||
|
||||
## 9. Dynamic Tool Set Delivery
|
||||
|
||||
**Current state**: All tool definitions are included in every LLM request. Tool descriptions consume 60–80% of input tokens in static configurations. As new extensions register tools, the baseline grows linearly.
|
||||
|
||||
**Opportunity** (higher complexity): Implement the three-function Dynamic Toolset pattern:
|
||||
1. `search_tools(query)` — semantic search over tool catalog
|
||||
2. `describe_tools(ids[])` — fetch full schemas on demand
|
||||
3. `execute_tool(id, params)` — unchanged execution
|
||||
|
||||
Speakeasy measured 91–97% token reduction with 100% task success rate. Trade-off: 2–3x more tool calls, ~50% longer wall time. Net cost dramatically lower.
|
||||
|
||||
**Feasibility for pi**: The tool registry (`packages/pi-coding-agent/src/core/tool-registry.ts`) already stores tool metadata separately from definitions. The primary engineering work is the semantic search index and the `describe_tools` / `search_tools` tool implementations.
|
||||
|
||||
---
|
||||
|
||||
## 10. Cost Attribution and Per-Phase Reporting
|
||||
|
||||
**Current state**: `SessionManager.getUsageTotals()` accumulates cost across the entire session. No per-phase or per-agent breakdown is stored. Cost visibility is limited to the footer total and `GSD_SHOW_TOKEN_COST=1` per-turn display.
|
||||
|
||||
**Opportunity**: Emit structured cost events that extensions can subscribe to:
|
||||
```typescript
|
||||
interface CostCheckpointEvent {
|
||||
type: "cost_checkpoint";
|
||||
label: string; // "discuss-phase", "execute-slice-3"
|
||||
deltaTokens: Usage; // tokens since last checkpoint
|
||||
cumulativeTokens: Usage;
|
||||
cumulativeCost: number;
|
||||
}
|
||||
```
|
||||
|
||||
GSD extension could consume these events to surface per-milestone cost in `/gsd stats` and flag milestones that are disproportionately expensive — enabling budget-aware planning.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Ordering (if pursued)
|
||||
|
||||
| Priority | Item | Effort | Expected Impact |
|
||||
|----------|------|--------|-----------------|
|
||||
| 1 | Prompt caching (`cache_control`) | Low | 80–90% input cost reduction |
|
||||
| 2 | Earlier compaction threshold (70%) | Trivial | Reduces drift in long sessions |
|
||||
| 3 | Tool result truncation at write time | Low | Reduces context bloat between compactions |
|
||||
| 4 | Context file deduplication | Medium | Variable — high for multi-level AGENTS.md setups |
|
||||
| 5 | Observation masking (default `transformContext`) | Medium | 50%+ on long-running agents |
|
||||
| 6 | Token estimation (proper tokenizer) | Low | Accuracy improvement, minor cost impact |
|
||||
| 7 | Markdown over XML audit | Low | 5–15% system prompt reduction |
|
||||
| 8 | Skill caching with `cache_control` | Low | Meaningful for skill-heavy sessions |
|
||||
| 9 | Dynamic tool set delivery | High | 90%+ on large tool catalogs; major architecture change |
|
||||
| 10 | Per-phase cost attribution events | Medium | Visibility only; enables future budget routing |
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue