diff --git a/.plans/extension-loading-multi-path.md b/.plans/extension-loading-multi-path.md
new file mode 100644
index 000000000..1cc76f735
--- /dev/null
+++ b/.plans/extension-loading-multi-path.md
@@ -0,0 +1,138 @@
+# Extension Loading: Dependency Sort + Unified Enable/Disable
+
+## Context
+
+GSD-2 has a well-structured extension system with three discovery paths (bundled, global/community, project-local) that are **already wired up** through pi's `DefaultPackageManager.addAutoDiscoveredResources()`. However, two critical gaps remain:
+
+1. `sortExtensionPaths()` (topological dependency sort) is implemented but **never called** — `dependencies.extensions` in manifests is decorative
+2. The GSD extension registry (enable/disable) only applies to **bundled** extensions — community extensions bypass it entirely
+
+### Architecture (Current Flow)
+
+```
+GSD loader.ts
+  → discoverExtensionEntryPaths(bundledExtDir)
+  → filter by GSD registry (isExtensionEnabled)
+  → set GSD_BUNDLED_EXTENSION_PATHS env var
+      ↓
+DefaultResourceLoader.reload()
+  → packageManager.resolve()
+    → addAutoDiscoveredResources()
+      → project: cwd/.gsd/extensions/     (CONFIG_DIR_NAME = ".gsd")
+      → global:  ~/.gsd/agent/extensions/  (includes synced bundled)
+  → loadExtensions(mergedPaths)            ← NO sort, NO registry check on community
+```
+
+### Key Files
+
+| File | Role |
+|------|------|
+| `src/loader.ts` (lines 146-161) | GSD startup — bundled discovery + registry filter |
+| `src/extension-sort.ts` | Topological sort (Kahn's BFS) — EXISTS but NEVER CALLED |
+| `src/extension-registry.ts` | Registry I/O, enable/disable, tier checks |
+| `src/resource-loader.ts` (lines 589-607) | `buildResourceLoader()` — constructs DefaultResourceLoader |
+| `packages/pi-coding-agent/src/core/resource-loader.ts` (lines 311-395) | `reload()` — merges paths, calls `loadExtensions()` |
+| `packages/pi-coding-agent/src/core/package-manager.ts` (lines 1585-1700) | `addAutoDiscoveredResources()` — auto-discovers from .gsd/ dirs |
+| `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) | `discoverAndLoadExtensions()` — DEAD CODE, never invoked |
+
+---
+
+## Plan
+
+### Task 1: Wire topological sort into extension loading
+
+**What:** Call `sortExtensionPaths()` on the merged extension paths before passing them to `loadExtensions()`.
+
+**Where:** `packages/pi-coding-agent/src/core/resource-loader.ts` ~line 381-385
+
+**Before:**
+```typescript
+const extensionsResult = await loadExtensions(extensionPaths, this.cwd, this.eventBus);
+```
+
+**After:**
+```typescript
+import { sortExtensionPaths } from '../../../src/extension-sort.js';
+
+const { sortedPaths, warnings } = sortExtensionPaths(extensionPaths);
+for (const w of warnings) {
+  // emit as diagnostic, not hard error
+}
+const extensionsResult = await loadExtensions(sortedPaths, this.cwd, this.eventBus);
+```
+
+**Consideration:** `sortExtensionPaths` lives in `src/` (GSD side), not in `packages/pi-coding-agent/`. Need to either:
+- (a) Move it into pi-coding-agent as a shared utility, OR
+- (b) Import it cross-package (already done for other GSD→pi imports), OR
+- (c) Call it on the GSD side before paths reach pi — harder since auto-discovered paths are added inside pi's package manager
+
+Option (a) is cleanest — the sort logic only depends on `readManifestFromEntryPath` which is also in `src/extension-registry.ts` but could be duplicated or shared.
+
+### Task 2: Apply GSD registry to community extensions
+
+**What:** When `buildResourceLoader()` in `src/resource-loader.ts` constructs the DefaultResourceLoader, also discover and filter community extensions from `~/.gsd/agent/extensions/` through the GSD registry — same as it already does for `~/.pi/agent/extensions/` paths.
+
+**Where:** `src/resource-loader.ts` → `buildResourceLoader()` (lines 589-607)
+
+**Current code already filters pi extensions:**
+```typescript
+const piExtensionPaths = discoverExtensionEntryPaths(piExtensionsDir)
+  .filter((entryPath) => !bundledKeys.has(getExtensionKey(entryPath, piExtensionsDir)))
+  .filter((entryPath) => {
+    const manifest = readManifestFromEntryPath(entryPath)
+    if (!manifest) return true
+    return isExtensionEnabled(registry, manifest.id)
+  })
+```
+
+**Add similar filtering for community extensions in agentDir:**
+- Discover extensions in `~/.gsd/agent/extensions/` that are NOT bundled
+- Filter through `isExtensionEnabled(registry, manifest.id)`
+- Pass as disabled (via override patterns or pre-filtering) to the resource loader
+
+**Alternative approach:** Hook into `addAutoDiscoveredResources` or the `addResource` call to check the GSD registry. This might be cleaner since the auto-discovery already happens inside pi's package manager.
+
+### Task 3: Emit sort warnings as diagnostics
+
+**What:** Surface dependency warnings (missing deps, cycles) through GSD's diagnostic system so users see them.
+
+**Where:** Wherever the sort is invoked from Task 1.
+
+**Format:**
+```
+⚠ Extension 'gsd-watch' declares dependency 'gsd' which is not installed — loading anyway
+⚠ Extensions 'foo' and 'bar' form a dependency cycle — loading in alphabetical order
+```
+
+### Task 4: Clean up dead code
+
+**What:** The `discoverAndLoadExtensions()` function in `packages/pi-coding-agent/src/core/extensions/loader.ts` (lines 945-1002) is exported but never invoked. The project-local trust model inside it (`getUntrustedExtensionPaths`) also never runs.
+
+**Options:**
+- (a) Remove it entirely — it's dead
+- (b) Mark deprecated — in case upstream pi uses it
+- (c) Leave it — lowest risk
+
+Recommend (b) for now — add `@deprecated` JSDoc so it doesn't grow new callers.
+
+### Task 5: Tests
+
+- **Sort integration test:** Create two extensions where A depends on B. Verify B loads before A after sort.
+- **Registry community test:** Drop a community extension in `~/.gsd/agent/extensions/`, run `gsd extensions disable <id>`, verify it doesn't load.
+- **Conflict test:** Same extension ID in project-local and global — verify project-local wins.
+- **Missing dep test:** Extension declares dependency on non-existent extension — verify warning emitted, extension still loads.
+- **Cycle test:** Two extensions that depend on each other — verify warning, both load.
+
+---
+
+## Follow-up PR (separate)
+
+**Subagent extension forwarding:** Update `src/resources/extensions/subagent/index.ts` to forward ALL extension paths (not just bundled) to child processes. May need a second env var like `GSD_COMMUNITY_EXTENSION_PATHS` or consolidate into `GSD_EXTENSION_PATHS`.
+
+---
+
+## Open Questions
+
+1. **Where should `sortExtensionPaths` live?** Currently in `src/` (GSD side). Needs to be callable from pi's resource-loader. Options: move to pi, keep and import cross-package, or duplicate.
+2. **Should community extensions respect the same registry as bundled?** Or should they have their own enable/disable mechanism? Current plan unifies them.
+3. **Project-local trust:** The TOFU model in the dead `discoverAndLoadExtensions()` never runs. Should `addAutoDiscoveredResources` also gate project-local extensions behind trust? Or is `.gsd/extensions/` in your own project always trusted?
diff --git a/.plans/ollama-native-provider.md b/.plans/ollama-native-provider.md
new file mode 100644
index 000000000..312743c95
--- /dev/null
+++ b/.plans/ollama-native-provider.md
@@ -0,0 +1,241 @@
+# Ollama Extension — First-Class Local LLM Support
+
+## Status: DRAFT — Awaiting approval
+
+## Problem
+
+Ollama support in GSD2 currently requires manual `models.json` configuration. Users must:
+1. Know the OpenAI-compatibility endpoint (`localhost:11434/v1`)
+2. Manually list every model they want to use
+3. Set compat flags (`supportsDeveloperRole: false`, etc.)
+4. Use a dummy API key
+
+There's an `ollama-cloud` provider for hosted Ollama, and a discovery adapter that can list models, but no first-class **local Ollama** extension that "just works."
+
+## Goal
+
+Make Ollama the easiest way to use GSD2 — zero config when Ollama is running locally. All Ollama functionality lives in a single extension: `src/resources/extensions/ollama/`.
+
+## Architecture
+
+Everything is a self-contained extension under `src/resources/extensions/ollama/`. The extension:
+- Auto-detects Ollama on startup via health check
+- Discovers and registers local models with the model registry
+- Provides native Ollama API streaming (not OpenAI shim)
+- Exposes `/ollama` slash commands for model management
+- Registers an LLM-callable tool for model pull/status
+
+Minimal core changes — only `KnownProvider` and `KnownApi` type additions in `pi-ai`, and `env-api-keys.ts` for key resolution. Everything else is in the extension.
+
+## File Structure
+
+```
+src/resources/extensions/ollama/
+├── index.ts                  # Extension entry — wires everything on session_start
+├── ollama-client.ts          # HTTP client for Ollama REST API (/api/*)
+├── ollama-discovery.ts       # Model discovery + capability detection
+├── ollama-provider.ts        # Native /api/chat streaming provider (registers with pi-ai)
+├── ollama-commands.ts        # /ollama slash commands (status, pull, list, remove, ps)
+├── ollama-tool.ts            # LLM-callable tool for model management
+├── model-capabilities.ts     # Known model capability table (context window, vision, reasoning)
+└── types.ts                  # Shared types for Ollama API responses
+```
+
+## Scope
+
+### Phase 1: Auto-Discovery + OpenAI-Compat Routing
+
+**What:** Extension that auto-detects Ollama, discovers models, registers them using the existing `openai-completions` API provider. Zero config needed.
+
+**Extension files:**
+- `ollama/index.ts` — Main entry. On `session_start`:
+  1. Probe `localhost:11434` (or `OLLAMA_HOST`) with 1.5s timeout
+  2. If reachable, discover models via `/api/tags`
+  3. Register discovered models with `ctx.modelRegistry` using correct defaults
+  4. Show status widget if Ollama is detected
+- `ollama/ollama-client.ts` — Low-level HTTP client:
+  - `isRunning()` — `GET /` health check
+  - `getVersion()` — `GET /api/version`
+  - `listModels()` — `GET /api/tags`
+  - `showModel(name)` — `POST /api/show` (details, template, parameters, size)
+  - `getRunningModels()` — `GET /api/ps` (loaded models, VRAM usage)
+  - `pullModel(name, onProgress)` — `POST /api/pull` (streaming progress)
+  - `deleteModel(name)` — `DELETE /api/delete`
+  - `copyModel(source, dest)` — `POST /api/copy`
+  - Respects `OLLAMA_HOST` env var for non-default endpoints
+- `ollama/ollama-discovery.ts` — Enhanced model discovery:
+  - Calls `/api/tags` to get model list
+  - Calls `/api/show` per model (batch, cached) to get:
+    - `details.parameter_size` → estimate context window
+    - `details.families` → detect vision (clip), reasoning (deepseek-r1)
+    - `modelfile` → extract default parameters
+  - Returns enriched `DiscoveredModel[]` with proper capabilities
+- `ollama/model-capabilities.ts` — Known model lookup table:
+  - Maps well-known model families to capabilities
+  - e.g., `llama3.1` → `{ contextWindow: 131072, input: ["text"] }`
+  - e.g., `llava` → `{ contextWindow: 4096, input: ["text", "image"] }`
+  - e.g., `deepseek-r1` → `{ reasoning: true, contextWindow: 131072 }`
+  - e.g., `qwen2.5-coder` → `{ contextWindow: 131072, input: ["text"] }`
+  - Fallback: estimate from parameter count if not in table
+- `ollama/types.ts` — Ollama API response types
+
+**Core changes (minimal):**
+- `packages/pi-ai/src/types.ts` — Add `"ollama"` to `KnownProvider`
+- `packages/pi-ai/src/env-api-keys.ts` — Add `"ollama"` key resolution (returns `"ollama"` placeholder — no real key needed)
+- `src/onboarding.ts` — Add `"ollama"` to provider selection list
+- `src/wizard.ts` — Add `ollama` entry (no key required)
+
+**Model registration details:**
+Each discovered model registers as:
+```typescript
+{
+  id: "llama3.1:8b",           // from /api/tags
+  name: "Llama 3.1 8B",        // humanized
+  api: "openai-completions",    // uses existing provider
+  provider: "ollama",
+  baseUrl: "http://localhost:11434/v1",
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+  reasoning: false,             // from capabilities table
+  input: ["text"],              // from capabilities table
+  contextWindow: 131072,        // from capabilities table or /api/show
+  maxTokens: 16384,             // conservative default
+  compat: {
+    supportsDeveloperRole: false,
+    supportsReasoningEffort: false,
+    supportsUsageInStreaming: false,
+    maxTokensField: "max_tokens",
+  },
+}
+```
+
+**Behavior:**
+- `gsd --list-models` shows all locally-pulled Ollama models automatically
+- `/model ollama/llama3.1:8b` works without any config file
+- If Ollama isn't running, extension is silent — no errors, no models listed
+- `models.json` overrides still work (user config wins over auto-discovery)
+
+### Phase 2: Native Ollama API Provider (`/api/chat`)
+
+**What:** A dedicated streaming provider that talks Ollama's native protocol instead of the OpenAI compatibility shim.
+
+**Extension files:**
+- `ollama/ollama-provider.ts` — Native `/api/chat` streaming:
+  - Registers `"ollama-chat"` API with `registerApiProvider()`
+  - Implements `stream()` and `streamSimple()`:
+    - Maps GSD `Context` → Ollama messages format
+    - Maps GSD `Tool[]` → Ollama tool format
+    - Streams NDJSON responses, maps back to `AssistantMessage` events
+    - Extracts `<think>` blocks for reasoning models (deepseek-r1, qwq)
+  - Ollama-specific options:
+    - `keep_alive` — control model memory retention (default: "5m")
+    - `num_ctx` — pass through model's context window
+    - `num_predict` — max output tokens
+    - Temperature, top_p, top_k
+  - Response metadata:
+    - `eval_count` / `eval_duration` → tokens/sec in usage stats
+    - `total_duration`, `load_duration` → performance visibility
+  - Vision support: converts image content to base64 for multimodal models
+
+**Core changes:**
+- `packages/pi-ai/src/types.ts` — Add `"ollama-chat"` to `KnownApi`
+
+**Phase 1 models switch to `api: "ollama-chat"` by default.** Users can force OpenAI-compat via `models.json` override if needed.
+
+**Why native over OpenAI-compat:**
+- Full `keep_alive` / `num_ctx` control
+- Better error messages (Ollama-native vs generic OpenAI)
+- More reliable tool calling on Ollama's native format
+- Performance metrics in response (tokens/sec)
+- Foundation for model management commands
+
+### Phase 3: Local LLM Management UX
+
+**What:** `/ollama` slash commands and an LLM tool for model management.
+
+**Extension files:**
+- `ollama/ollama-commands.ts` — Slash commands registered via `pi.registerCommand()`:
+  - `/ollama` — Status overview:
+    ```
+    Ollama v0.5.7 — running (localhost:11434)
+
+    Loaded:
+      llama3.1:8b       4.7 GB VRAM   idle 3m
+
+    Available:
+      llama3.1:8b       (4.7 GB)
+      qwen2.5-coder:7b  (4.4 GB)
+      deepseek-r1:8b    (4.9 GB)
+    ```
+  - `/ollama pull <model>` — Pull with streaming progress via `ctx.ui.setWidget()`
+  - `/ollama list` — List all local models with sizes and families
+  - `/ollama remove <model>` — Delete a model (with confirmation)
+  - `/ollama ps` — Running models + VRAM usage
+- `ollama/ollama-tool.ts` — LLM-callable tool registered via `pi.registerTool()`:
+  - `ollama_manage` tool — lets the agent pull/list/check models
+  - Parameters: `{ action: "list" | "pull" | "status" | "ps", model?: string }`
+  - Use case: agent detects it needs a model, pulls it automatically
+
+**UX Flow:**
+```
+$ gsd
+> /ollama
+Ollama v0.5.7 — running (localhost:11434)
+Loaded:
+  llama3.1:8b    — 4.7 GB VRAM, idle 3m
+Available:
+  llama3.1:8b    (4.7 GB)
+  qwen2.5-coder:7b (4.4 GB)
+  deepseek-r1:8b (4.9 GB)
+
+> /ollama pull codestral:22b
+Pulling codestral:22b...
+████████████████████████████░░░░ 78% (14.2 GB / 18.1 GB)
+✓ codestral:22b ready
+
+> /model ollama/codestral:22b
+Switched to codestral:22b (local, Ollama)
+```
+
+## Implementation Order
+
+1. **Phase 1** — Auto-discovery with OpenAI-compat routing. Biggest user impact, smallest risk.
+2. **Phase 3** — Management UX (`/ollama` commands). Valuable even before native API.
+3. **Phase 2** — Native `/api/chat` provider. Optimization over OpenAI-compat; do last.
+
+## Core Changes Summary (minimal)
+
+| File | Change |
+|------|--------|
+| `packages/pi-ai/src/types.ts` | Add `"ollama"` to `KnownProvider`, `"ollama-chat"` to `KnownApi` (Phase 2) |
+| `packages/pi-ai/src/env-api-keys.ts` | Add `"ollama"` → always returns `"ollama"` placeholder |
+| `src/onboarding.ts` | Add `"ollama"` to provider picker |
+| `src/wizard.ts` | Add `"ollama"` key mapping (no key required) |
+
+Everything else lives in `src/resources/extensions/ollama/`.
+
+## Risks & Mitigations
+
+| Risk | Mitigation |
+|------|------------|
+| Ollama not running — startup probe latency | 1.5s timeout; cache result; probe async so it doesn't block TUI paint |
+| Model capabilities unknown | Known-model table + `/api/show` fallback + parameter_size estimation |
+| Tool calling unreliable on small models | Detect param count; warn on <7B models |
+| Ollama API changes between versions | Version detect via `/api/version`; stable endpoints only |
+| Conflicts with `models.json` Ollama config | User config always wins; auto-discovered models merge beneath manual config |
+| Extension disabled — no impact on core | Extension is additive; disabling removes all Ollama features cleanly |
+
+## Testing Strategy
+
+- Unit tests: `ollama-client.ts` with mocked fetch responses
+- Unit tests: `ollama-discovery.ts` model capability parsing
+- Unit tests: `ollama-provider.ts` message format mapping + NDJSON stream parsing
+- Unit tests: `model-capabilities.ts` known model lookups
+- Integration test: mock HTTP server simulating Ollama `/api/tags`, `/api/chat`, `/api/pull`
+- Manual test: real Ollama instance with llama3.1, qwen2.5-coder, deepseek-r1
+
+## Open Questions
+
+1. **Startup probe** — Probe Ollama on `session_start` (adds ~1.5s if not running) or lazy on first `/model`? **Recommendation: async probe on session_start (non-blocking), eager if `OLLAMA_HOST` is set.**
+2. **Auto-start** — Try to launch Ollama if installed but not running? **Recommendation: no — too invasive. Show helpful message in `/ollama` status.**
+3. **Vision support** — Support multimodal models (llava, etc.) in Phase 2 native API? **Recommendation: yes, detected via capabilities table.**
+4. **Model refresh** — How often to re-probe Ollama for new models? **Recommendation: on `/ollama list`, on `/model` command, and every 5 min (existing TTL).**
diff --git a/README.md b/README.md
index 6ecc9c053..d1c179368 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 [![npm version](https://img.shields.io/npm/v/gsd-pi?style=for-the-badge&logo=npm&logoColor=white&color=CB3837)](https://www.npmjs.com/package/gsd-pi)
 [![npm downloads](https://img.shields.io/npm/dm/gsd-pi?style=for-the-badge&logo=npm&logoColor=white&color=CB3837)](https://www.npmjs.com/package/gsd-pi)
 [![GitHub stars](https://img.shields.io/github/stars/gsd-build/GSD-2?style=for-the-badge&logo=github&color=181717)](https://github.com/gsd-build/GSD-2)
-[![Discord](https://img.shields.io/badge/Discord-Join%20us-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/gsd)
+[![Discord](https://img.shields.io/badge/Discord-Join%20us-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/nKXTsAcmbT)
 [![License](https://img.shields.io/badge/license-MIT-blue?style=for-the-badge)](LICENSE)
 [![$GSD Token](https://img.shields.io/badge/$GSD-Dexscreener-1C1C1C?style=for-the-badge&logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48Y2lyY2xlIGN4PSIxMiIgY3k9IjEyIiByPSIxMCIgZmlsbD0iIzAwRkYwMCIvPjwvc3ZnPg==&logoColor=00FF00)](https://dexscreener.com/solana/dwudwjvan7bzkw9zwlbyv6kspdlvhwzrqy6ebk8xzxkv)
 
diff --git a/docs/what-is-pi/15-pi-packages-the-ecosystem.md b/docs/what-is-pi/15-pi-packages-the-ecosystem.md
index 4e19de60a..7116cca99 100644
--- a/docs/what-is-pi/15-pi-packages-the-ecosystem.md
+++ b/docs/what-is-pi/15-pi-packages-the-ecosystem.md
@@ -38,6 +38,6 @@ Or just use conventional directory names (`extensions/`, `skills/`, `prompts/`,
 
 - [Package gallery](https://shittycodingagent.ai/packages)
 - [npm search](https://www.npmjs.com/search?q=keywords%3Api-package)
-- [Discord community](https://discord.com/invite/3cU7Bz4UPx)
+- [Discord community](https://discord.com/invite/nKXTsAcmbT)
 
 ---
diff --git a/package.json b/package.json
index c7f83dad5..fd99a34e9 100644
--- a/package.json
+++ b/package.json
@@ -54,7 +54,7 @@
     "copy-themes": "node scripts/copy-themes.cjs",
     "copy-export-html": "node scripts/copy-export-html.cjs",
     "test:compile": "node scripts/compile-tests.mjs",
-    "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test 'dist-test/src/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.mjs' 'dist-test/src/resources/extensions/shared/tests/*.test.js' 'dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js' 'dist-test/src/resources/extensions/github-sync/tests/*.test.js' 'dist-test/src/resources/extensions/universal-config/tests/*.test.js' 'dist-test/src/resources/extensions/voice/tests/*.test.js'",
+    "test:unit": "npm run test:compile && node --import ./scripts/dist-test-resolve.mjs --experimental-test-isolation=process --test-reporter=./scripts/test-reporter-compact.mjs --test 'dist-test/src/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.js' 'dist-test/src/resources/extensions/gsd/tests/*.test.mjs' 'dist-test/src/resources/extensions/shared/tests/*.test.js' 'dist-test/src/resources/extensions/claude-code-cli/tests/*.test.js' 'dist-test/src/resources/extensions/github-sync/tests/*.test.js' 'dist-test/src/resources/extensions/universal-config/tests/*.test.js' 'dist-test/src/resources/extensions/voice/tests/*.test.js' 'dist-test/src/resources/extensions/mcp-client/tests/*.test.js'",
     "test:packages": "node --test packages/pi-coding-agent/dist/core/*.test.js",
     "test:marketplace": "GSD_TEST_CLONE_MARKETPLACES=1 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --test src/resources/extensions/gsd/tests/claude-import-tui.test.ts src/resources/extensions/gsd/tests/plugin-importer-live.test.ts src/tests/marketplace-discovery.test.ts",
     "test:coverage": "c8 --reporter=text --reporter=lcov --exclude='src/resources/extensions/gsd/tests/**' --exclude='src/tests/**' --exclude='scripts/**' --exclude='native/**' --exclude='node_modules/**' --check-coverage --statements=40 --lines=40 --branches=20 --functions=20 node --import ./src/resources/extensions/gsd/tests/resolve-ts.mjs --experimental-strip-types --experimental-test-isolation=process --test src/resources/extensions/gsd/tests/*.test.ts src/resources/extensions/gsd/tests/*.test.mjs src/tests/*.test.ts src/resources/extensions/shared/tests/*.test.ts",
diff --git a/packages/native/package.json b/packages/native/package.json
index 1bb3b009d..42bc47668 100644
--- a/packages/native/package.json
+++ b/packages/native/package.json
@@ -2,7 +2,7 @@
   "name": "@gsd/native",
   "version": "0.1.0",
   "description": "Native Rust bindings for GSD \u2014 high-performance native modules via N-API",
-  "type": "module",
+  "type": "commonjs",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
   "scripts": {
@@ -14,75 +14,75 @@
   "exports": {
     ".": {
       "types": "./dist/index.d.ts",
-      "import": "./dist/index.js"
+      "default": "./dist/index.js"
     },
     "./grep": {
       "types": "./dist/grep/index.d.ts",
-      "import": "./dist/grep/index.js"
+      "default": "./dist/grep/index.js"
     },
     "./ps": {
       "types": "./dist/ps/index.d.ts",
-      "import": "./dist/ps/index.js"
+      "default": "./dist/ps/index.js"
     },
     "./glob": {
       "types": "./dist/glob/index.d.ts",
-      "import": "./dist/glob/index.js"
+      "default": "./dist/glob/index.js"
     },
     "./clipboard": {
       "types": "./dist/clipboard/index.d.ts",
-      "import": "./dist/clipboard/index.js"
+      "default": "./dist/clipboard/index.js"
     },
     "./ast": {
       "types": "./dist/ast/index.d.ts",
-      "import": "./dist/ast/index.js"
+      "default": "./dist/ast/index.js"
     },
     "./html": {
       "types": "./dist/html/index.d.ts",
-      "import": "./dist/html/index.js"
+      "default": "./dist/html/index.js"
     },
     "./text": {
       "types": "./dist/text/index.d.ts",
-      "import": "./dist/text/index.js"
+      "default": "./dist/text/index.js"
     },
     "./fd": {
       "types": "./dist/fd/index.d.ts",
-      "import": "./dist/fd/index.js"
+      "default": "./dist/fd/index.js"
     },
     "./image": {
       "types": "./dist/image/index.d.ts",
-      "import": "./dist/image/index.js"
+      "default": "./dist/image/index.js"
     },
     "./xxhash": {
       "types": "./dist/xxhash/index.d.ts",
-      "import": "./dist/xxhash/index.js"
+      "default": "./dist/xxhash/index.js"
     },
     "./diff": {
       "types": "./dist/diff/index.d.ts",
-      "import": "./dist/diff/index.js"
+      "default": "./dist/diff/index.js"
     },
     "./gsd-parser": {
       "types": "./dist/gsd-parser/index.d.ts",
-      "import": "./dist/gsd-parser/index.js"
+      "default": "./dist/gsd-parser/index.js"
     },
     "./highlight": {
       "types": "./dist/highlight/index.d.ts",
-      "import": "./dist/highlight/index.js"
+      "default": "./dist/highlight/index.js"
     },
     "./json-parse": {
       "types": "./dist/json-parse/index.d.ts",
-      "import": "./dist/json-parse/index.js"
+      "default": "./dist/json-parse/index.js"
     },
     "./stream-process": {
       "types": "./dist/stream-process/index.d.ts",
-      "import": "./dist/stream-process/index.js"
+      "default": "./dist/stream-process/index.js"
     },
     "./truncate": {
       "types": "./dist/truncate/index.d.ts",
-      "import": "./dist/truncate/index.js"
+      "default": "./dist/truncate/index.js"
     },
     "./ttsr": {
       "types": "./dist/ttsr/index.d.ts",
-      "import": "./dist/ttsr/index.js"
+      "default": "./dist/ttsr/index.js"
     }
   },
   "files": [
diff --git a/packages/native/src/__tests__/module-compat.test.mjs b/packages/native/src/__tests__/module-compat.test.mjs
new file mode 100644
index 000000000..949fd16d3
--- /dev/null
+++ b/packages/native/src/__tests__/module-compat.test.mjs
@@ -0,0 +1,91 @@
+/**
+ * Tests that the @gsd/native package.json is correctly configured
+ * for Node.js module resolution (ESM/CJS compatibility).
+ *
+ * Regression test for #2861: "type": "module" + "import"-only export
+ * conditions caused crashes on Node.js v24 when the parent package also
+ * declared "type": "module" and strict ESM resolution was enforced.
+ */
+
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const pkgPath = path.resolve(__dirname, "..", "..", "package.json");
+const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
+
+describe("@gsd/native module compatibility (#2861)", () => {
+  test("package.json must not declare type: module (compiled output is CJS-compatible)", () => {
+    // The compiled output uses createRequire() to load .node addons.
+    // Declaring "type": "module" forces Node.js to treat .js files as ESM,
+    // but the package needs "type": "commonjs" to override the parent
+    // package's "type": "module" and ensure correct CJS semantics.
+    assert.notEqual(
+      pkg.type,
+      "module",
+      'package.json must not set "type": "module" — this causes crashes on Node.js v24 ' +
+        "when the parent package also declares ESM (see #2861)",
+    );
+  });
+
+  test("package.json should explicitly declare type: commonjs", () => {
+    // When installed as a dependency under a parent with "type": "module"
+    // (e.g. gsd-pi), an absent "type" field would inherit the parent's
+    // ESM setting. Explicit "commonjs" overrides this.
+    assert.equal(
+      pkg.type,
+      "commonjs",
+      'package.json must explicitly set "type": "commonjs" to override ' +
+        "the parent package's ESM declaration",
+    );
+  });
+
+  test("all export conditions must use 'default' (not 'import'-only)", () => {
+    // The "import" condition key restricts resolution to ESM import
+    // statements only. Using "default" ensures the export works for both
+    // require() and import, which is essential for a CJS package that may
+    // be consumed from ESM code via Node's CJS interop.
+    const exportsMap = pkg.exports;
+    assert.ok(exportsMap, "package.json must have an exports map");
+
+    for (const [subpath, conditions] of Object.entries(exportsMap)) {
+      assert.ok(
+        !conditions.import || conditions.default,
+        `exports["${subpath}"] uses "import" condition without "default" — ` +
+          `this breaks CJS consumers and Node.js v24 strict resolution`,
+      );
+    }
+  });
+
+  test("native.ts source must not use bare import.meta.url (parse-time error in CJS)", () => {
+    // When compiled to CJS, import.meta is a *parse-time* syntax error --
+    // typeof guards don't help because Node rejects the syntax before
+    // executing any code.  The source must wrap import.meta access in
+    // an indirect eval so the CJS parser never sees the bare syntax.
+    const nativeSrc = readFileSync(
+      path.resolve(__dirname, "..", "native.ts"),
+      "utf8",
+    );
+
+    // Bare import.meta.url (NOT wrapped) would crash at parse time in CJS.
+    // These regexes match direct usage like fileURLToPath(import.meta.url)
+    // and createRequire(import.meta.url), but NOT indirect patterns that
+    // hide import.meta from the CJS parser.
+    const hasBareImportMetaDirname = /path\.dirname\(.*fileURLToPath\(import\.meta\.url\)\)/.test(nativeSrc);
+    const hasBareImportMetaRequire = /createRequire\(import\.meta\.url\)/.test(nativeSrc);
+
+    assert.ok(
+      !hasBareImportMetaDirname,
+      "native.ts must not use bare import.meta.url in fileURLToPath() -- " +
+        "this is a parse-time syntax error in CJS; use indirect eval",
+    );
+    assert.ok(
+      !hasBareImportMetaRequire,
+      "native.ts must not use bare import.meta.url in createRequire() -- " +
+        "this is a parse-time syntax error in CJS; use indirect eval",
+    );
+  });
+});
diff --git a/packages/native/src/native.ts b/packages/native/src/native.ts
index b310cef28..05d4288b1 100644
--- a/packages/native/src/native.ts
+++ b/packages/native/src/native.ts
@@ -8,14 +8,15 @@
  *   3. native/addon/gsd_engine.dev.node (local debug build)
  */
 
-import { createRequire } from "node:module";
 import * as path from "node:path";
-import { fileURLToPath } from "node:url";
 
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-const require = createRequire(import.meta.url);
+// __dirname and require are available in both execution contexts:
+//   - CJS (production build via tsc): provided natively by Node
+//   - ESM (CI test loader): injected by the dist-redirect.mjs preamble
+const _dirname = __dirname;
+const _require = require;
 
-const addonDir = path.resolve(__dirname, "..", "..", "..", "native", "addon");
+const addonDir = path.resolve(_dirname, "..", "..", "..", "native", "addon");
 const platformTag = `${process.platform}-${process.arch}`;
 
 /** Map Node.js platform/arch to the npm package suffix */
@@ -36,7 +37,7 @@ function loadNative(): Record<string, unknown> {
   const packageSuffix = platformPackageMap[platformTag];
   if (packageSuffix) {
     try {
-      _loadedSuccessfully = true; return require(`@gsd-build/engine-${packageSuffix}`) as Record<string, unknown>;
+      _loadedSuccessfully = true; return _require(`@gsd-build/engine-${packageSuffix}`) as Record<string, unknown>;
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
       errors.push(`@gsd-build/engine-${packageSuffix}: ${message}`);
@@ -46,7 +47,7 @@ function loadNative(): Record<string, unknown> {
   // 2. Try local release build (native/addon/gsd_engine.{platform}.node)
   const releasePath = path.join(addonDir, `gsd_engine.${platformTag}.node`);
   try {
-    _loadedSuccessfully = true; return require(releasePath) as Record<string, unknown>;
+    _loadedSuccessfully = true; return _require(releasePath) as Record<string, unknown>;
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
     errors.push(`${releasePath}: ${message}`);
@@ -55,7 +56,7 @@ function loadNative(): Record<string, unknown> {
   // 3. Try local dev build (native/addon/gsd_engine.dev.node)
   const devPath = path.join(addonDir, "gsd_engine.dev.node");
   try {
-    _loadedSuccessfully = true; return require(devPath) as Record<string, unknown>;
+    _loadedSuccessfully = true; return _require(devPath) as Record<string, unknown>;
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
     errors.push(`${devPath}: ${message}`);
diff --git a/packages/pi-agent-core/src/agent-loop.test.ts b/packages/pi-agent-core/src/agent-loop.test.ts
new file mode 100644
index 000000000..0e61d9701
--- /dev/null
+++ b/packages/pi-agent-core/src/agent-loop.test.ts
@@ -0,0 +1,45 @@
+// agent-loop pauseTurn handling tests
+// Verifies that pause_turn / pauseTurn stop reason causes the inner loop
+// to continue (re-invoke the LLM) instead of exiting.
+// Regression test for https://github.com/gsd-build/gsd-2/issues/2869
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+describe("agent-loop — pauseTurn handling (#2869)", () => {
+	it("sets hasMoreToolCalls when stopReason is pauseTurn", () => {
+		const source = readFileSync(join(__dirname, "agent-loop.ts"), "utf-8");
+
+		// The agent loop must treat pauseTurn as a reason to continue the inner
+		// loop, just like toolUse. This prevents incomplete server_tool_use blocks
+		// from being saved to history, which would cause a 400 on the next request.
+		assert.match(
+			source,
+			/pauseTurn/,
+			"agent-loop.ts must handle the pauseTurn stop reason",
+		);
+
+		// Verify it sets hasMoreToolCalls = true for pauseTurn
+		assert.match(
+			source,
+			/stopReason\s*===?\s*["']pauseTurn["']/,
+			'agent-loop.ts must check for stopReason === "pauseTurn"',
+		);
+	});
+
+	it("pauseTurn is in the StopReason union type", () => {
+		// Read the pi-ai types to ensure pauseTurn is a valid StopReason
+		const typesPath = join(__dirname, "..", "..", "pi-ai", "src", "types.ts");
+		const typesSource = readFileSync(typesPath, "utf-8");
+		assert.match(
+			typesSource,
+			/["']pauseTurn["']/,
+			'StopReason type must include "pauseTurn"',
+		);
+	});
+});
diff --git a/packages/pi-agent-core/src/agent-loop.ts b/packages/pi-agent-core/src/agent-loop.ts
index fad23b145..82254d3bf 100644
--- a/packages/pi-agent-core/src/agent-loop.ts
+++ b/packages/pi-agent-core/src/agent-loop.ts
@@ -231,9 +231,10 @@ async function runLoop(
 				return;
 			}
 
-			// Check for tool calls
+			// Check for tool calls or paused server turn
 			const toolCalls = message.content.filter((c) => c.type === "toolCall");
-			hasMoreToolCalls = toolCalls.length > 0;
+			hasMoreToolCalls =
+				toolCalls.length > 0 || message.stopReason === "pauseTurn";
 
 			const toolResults: ToolResultMessage[] = [];
 			if (hasMoreToolCalls && config.externalToolExecution) {
diff --git a/packages/pi-agent-core/src/proxy.ts b/packages/pi-agent-core/src/proxy.ts
index 619521bda..574ec2bf6 100644
--- a/packages/pi-agent-core/src/proxy.ts
+++ b/packages/pi-agent-core/src/proxy.ts
@@ -47,7 +47,7 @@ export type ProxyAssistantMessageEvent =
 	| { type: "toolcall_end"; contentIndex: number }
 	| {
 			type: "done";
-			reason: Extract<StopReason, "stop" | "length" | "toolUse">;
+			reason: Extract<StopReason, "stop" | "length" | "toolUse" | "pauseTurn">;
 			usage: AssistantMessage["usage"];
 	  }
 	| {
diff --git a/packages/pi-ai/src/env-api-keys.ts b/packages/pi-ai/src/env-api-keys.ts
index b6577d99d..1036c4b28 100644
--- a/packages/pi-ai/src/env-api-keys.ts
+++ b/packages/pi-ai/src/env-api-keys.ts
@@ -137,6 +137,7 @@ export function getEnvApiKey(provider: any): string | undefined {
 		"opencode-go": "OPENCODE_API_KEY",
 		"kimi-coding": "KIMI_API_KEY",
 		"alibaba-coding-plan": "ALIBABA_API_KEY",
+		ollama: "OLLAMA_API_KEY",
 		"ollama-cloud": "OLLAMA_API_KEY",
 		"custom-openai": "CUSTOM_OPENAI_API_KEY",
 	};
diff --git a/packages/pi-ai/src/index.ts b/packages/pi-ai/src/index.ts
index a75aaf7f4..c8d9e1e8c 100644
--- a/packages/pi-ai/src/index.ts
+++ b/packages/pi-ai/src/index.ts
@@ -27,4 +27,5 @@ export type {
 } from "./utils/oauth/types.js";
 export * from "./utils/overflow.js";
 export * from "./utils/typebox-helpers.js";
+export * from "./utils/repair-tool-json.js";
 export * from "./utils/validation.js";
diff --git a/packages/pi-ai/src/providers/anthropic-shared.test.ts b/packages/pi-ai/src/providers/anthropic-shared.test.ts
new file mode 100644
index 000000000..9b6718570
--- /dev/null
+++ b/packages/pi-ai/src/providers/anthropic-shared.test.ts
@@ -0,0 +1,29 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { mapStopReason } from "./anthropic-shared.js";
+
+describe("mapStopReason", () => {
+	it("maps end_turn to stop", () => {
+		assert.equal(mapStopReason("end_turn"), "stop");
+	});
+
+	it("maps max_tokens to length", () => {
+		assert.equal(mapStopReason("max_tokens"), "length");
+	});
+
+	it("maps tool_use to toolUse", () => {
+		assert.equal(mapStopReason("tool_use"), "toolUse");
+	});
+
+	it("maps pause_turn to pauseTurn (not stop)", () => {
+		// pause_turn means the server paused a long-running turn (e.g. native
+		// web search hit its iteration limit). Mapping it to "stop" causes the
+		// agent loop to exit, leaving an incomplete server_tool_use block in
+		// history which triggers a 400 on the next request.
+		assert.equal(mapStopReason("pause_turn"), "pauseTurn");
+	});
+
+	it("throws on unknown stop reason", () => {
+		assert.throws(() => mapStopReason("bogus"), /Unhandled stop reason/);
+	});
+});
diff --git a/packages/pi-ai/src/providers/anthropic-shared.ts b/packages/pi-ai/src/providers/anthropic-shared.ts
index 4425df7dd..b7229bf7e 100644
--- a/packages/pi-ai/src/providers/anthropic-shared.ts
+++ b/packages/pi-ai/src/providers/anthropic-shared.ts
@@ -31,6 +31,7 @@ import type {
 export type AnthropicApi = "anthropic-messages" | "anthropic-vertex";
 import type { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
+import { repairToolJson } from "../utils/repair-tool-json.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
 import { transformMessages } from "./transform-messages.js";
 
@@ -502,7 +503,7 @@ export function mapStopReason(reason: string): StopReason {
 		case "refusal":
 			return "error";
 		case "pause_turn":
-			return "stop";
+			return "pauseTurn";
 		case "stop_sequence":
 			return "stop";
 		case "sensitive":
@@ -696,7 +697,21 @@ export function processAnthropicStream(
 								partial: output,
 							});
 						} else if (block.type === "toolCall") {
-							block.arguments = parseStreamingJson(block.partialJson);
+							// Try strict parse first; if it fails, attempt YAML bullet
+							// repair (#2660) before falling back to the lenient streaming
+							// parser which silently swallows errors.
+							const raw = block.partialJson ?? "";
+							let parsed: Record<string, any> | undefined;
+							try {
+								parsed = JSON.parse(raw);
+							} catch {
+								try {
+									parsed = JSON.parse(repairToolJson(raw));
+								} catch {
+									// Fall through to streaming parser
+								}
+							}
+							block.arguments = parsed ?? parseStreamingJson(block.partialJson);
 							delete (block as any).partialJson;
 							stream.push({
 								type: "toolcall_end",
diff --git a/packages/pi-ai/src/types.ts b/packages/pi-ai/src/types.ts
index f4d63e1de..42a6b3478 100644
--- a/packages/pi-ai/src/types.ts
+++ b/packages/pi-ai/src/types.ts
@@ -43,6 +43,7 @@ export type KnownProvider =
 	| "opencode-go"
 	| "kimi-coding"
 	| "alibaba-coding-plan"
+	| "ollama"
 	| "ollama-cloud";
 export type Provider = KnownProvider | string;
 
@@ -192,7 +193,7 @@ export interface Usage {
 	};
 }
 
-export type StopReason = "stop" | "length" | "toolUse" | "error" | "aborted";
+export type StopReason = "stop" | "length" | "toolUse" | "pauseTurn" | "error" | "aborted";
 
 export interface UserMessage {
 	role: "user";
@@ -253,7 +254,7 @@ export type AssistantMessageEvent =
 	| { type: "toolcall_end"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage; malformedArguments?: boolean }
 	| { type: "server_tool_use"; contentIndex: number; partial: AssistantMessage }
 	| { type: "web_search_result"; contentIndex: number; partial: AssistantMessage }
-	| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse">; message: AssistantMessage }
+	| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse" | "pauseTurn">; message: AssistantMessage }
 	| { type: "error"; reason: Extract<StopReason, "aborted" | "error">; error: AssistantMessage };
 
 /**
diff --git a/packages/pi-ai/src/utils/json-parse.ts b/packages/pi-ai/src/utils/json-parse.ts
index ad907e8d0..727713132 100644
--- a/packages/pi-ai/src/utils/json-parse.ts
+++ b/packages/pi-ai/src/utils/json-parse.ts
@@ -1,14 +1,41 @@
 import { parseStreamingJson as nativeParseStreamingJson } from "@gsd/native";
+import { hasYamlBulletLists, repairToolJson } from "./repair-tool-json.js";
 
 /**
  * Attempts to parse potentially incomplete JSON during streaming.
  * Always returns a valid object, even if the JSON is incomplete.
  *
  * Uses the native Rust streaming JSON parser for performance.
+ * Falls back to YAML bullet-list repair when the native parser
+ * returns an empty object from input that contains YAML-style
+ * bullet lists copied from template formatting (#2660).
  *
  * @param partialJson The partial JSON string from streaming
  * @returns Parsed object or empty object if parsing fails
  */
 export function parseStreamingJson<T = any>(partialJson: string | undefined): T {
-	return nativeParseStreamingJson<T>(partialJson);
+	if (!partialJson || partialJson.trim() === "") {
+		return {} as T;
+	}
+
+	// Fast path: try native streaming parser first
+	const result = nativeParseStreamingJson<T>(partialJson);
+
+	// If the native parser returned a non-empty result, use it.
+	// Only attempt repair when the result is empty AND the input
+	// contains YAML bullet patterns (avoids unnecessary work).
+	if (
+		result &&
+		typeof result === "object" &&
+		Object.keys(result as object).length === 0 &&
+		hasYamlBulletLists(partialJson)
+	) {
+		try {
+			return JSON.parse(repairToolJson(partialJson)) as T;
+		} catch {
+			// Repair failed — return the empty object from native parser
+		}
+	}
+
+	return result;
 }
diff --git a/packages/pi-ai/src/utils/repair-tool-json.ts b/packages/pi-ai/src/utils/repair-tool-json.ts
new file mode 100644
index 000000000..166e8ce21
--- /dev/null
+++ b/packages/pi-ai/src/utils/repair-tool-json.ts
@@ -0,0 +1,88 @@
+/**
+ * Repair malformed JSON in LLM tool-call arguments.
+ *
+ * LLMs sometimes copy YAML template formatting into JSON tool arguments,
+ * producing patterns like:
+ *
+ *   "keyDecisions": - Used Web Notification API...,
+ *   "keyFiles": - src-tauri/src/lib.rs — Extended...
+ *
+ * instead of valid JSON arrays:
+ *
+ *   "keyDecisions": ["Used Web Notification API..."],
+ *   "keyFiles": ["src-tauri/src/lib.rs — Extended..."]
+ *
+ * This module detects and repairs such patterns before JSON.parse is called.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/2660
+ */
+
+/**
+ * Detect whether a JSON string contains YAML-style bullet-list values
+ * (i.e. `"key": - item` instead of `"key": ["item"]`).
+ */
+export function hasYamlBulletLists(json: string): boolean {
+	// Match: "key": followed by whitespace then a dash-space pattern (YAML bullet)
+	// The negative lookahead excludes negative numbers (e.g. "key": -1)
+	return /"\s*:\s*-\s+(?!\d)/.test(json);
+}
+
+/**
+ * Attempt to repair YAML-style bullet lists embedded in a JSON string.
+ *
+ * Converts patterns like:
+ *   "keyDecisions": - Used Web Notification API..., "keyFiles": - file1
+ *
+ * Into:
+ *   "keyDecisions": ["Used Web Notification API..."], "keyFiles": ["file1"]
+ *
+ * Returns the original string unchanged if no YAML patterns are detected
+ * or if the repair itself would produce invalid JSON.
+ */
+export function repairToolJson(json: string): string {
+	if (!hasYamlBulletLists(json)) {
+		return json;
+	}
+
+	// Strategy: find each `"key": - item1\n  - item2\n  - item3` region and
+	// wrap items in a JSON array.
+	//
+	// We work on the raw string because the JSON is not parseable yet.
+	// The pattern we target:
+	//   "someKey":\s*- item text (possibly multiline)
+	//   optionally followed by more `- item` lines
+	//   terminated by the next `"key":` or `}` or end of string.
+
+	let repaired = json;
+
+	// Match a key followed by YAML-style bullet list.
+	// Capture: (1) the key portion including colon, (2) the bullet-list body,
+	// (3) the separator (comma or empty) before the next key/bracket.
+	// The bullet list body ends at the next `"key":` or `}` or `]` or end of string.
+	const keyBulletPattern =
+		/("(?:[^"\\]|\\.)*"\s*:\s*)(- .+?)(,?\s*)(?="(?:[^"\\]|\\.)*"\s*:|[}\]]|$)/gs;
+
+	repaired = repaired.replace(
+		keyBulletPattern,
+		(_match, keyPart: string, bulletBody: string, separator: string) => {
+			// Split the bullet body into individual items on `- ` boundaries.
+			// Items may contain embedded newlines for multi-line values.
+			const items = bulletBody
+				.split(/\n?\s*- /)
+				.filter((s) => s.trim().length > 0)
+				.map((s) => s.replace(/,\s*$/, "").trim());
+
+			// JSON-encode each item as a string, then wrap in an array.
+			const jsonArray = "[" + items.map((item) => JSON.stringify(item)).join(", ") + "]";
+
+			// Re-emit the separator (comma) so the next key is properly delimited
+			const sep = separator.trim() ? separator : (/^\s*"/.test(separator + "x") ? ", " : "");
+			return keyPart + jsonArray + sep;
+		},
+	);
+
+	// Strip trailing commas before } or ] (common in repaired JSON)
+	repaired = repaired.replace(/,(\s*[}\]])/g, "$1");
+
+	return repaired;
+}
diff --git a/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts
new file mode 100644
index 000000000..35bd03cbb
--- /dev/null
+++ b/packages/pi-ai/src/utils/tests/repair-tool-json.test.ts
@@ -0,0 +1,102 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { repairToolJson, hasYamlBulletLists } from "../repair-tool-json.js";
+
+describe("repairToolJson — YAML bullet list repair (#2660)", () => {
+	// ── Detection ──────────────────────────────────────────────────────────
+
+	test("hasYamlBulletLists detects YAML-style bullets", () => {
+		assert.equal(
+			hasYamlBulletLists('"keyDecisions": - Used Web Notification API'),
+			true,
+		);
+	});
+
+	test("hasYamlBulletLists ignores negative numbers", () => {
+		assert.equal(
+			hasYamlBulletLists('"offset": -1'),
+			false,
+			"negative number should not be detected as YAML bullet",
+		);
+	});
+
+	test("hasYamlBulletLists returns false for valid JSON", () => {
+		assert.equal(
+			hasYamlBulletLists('{"keyDecisions": ["item1", "item2"]}'),
+			false,
+		);
+	});
+
+	// ── Single bullet item ────────────────────────────────────────────────
+
+	test("repairs single YAML bullet to JSON array", () => {
+		const malformed = '{"keyDecisions": - Used Web Notification API}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.deepEqual(parsed.keyDecisions, ["Used Web Notification API"]);
+	});
+
+	// ── Multiple bullet items (newline-separated) ─────────────────────────
+
+	test("repairs multiple YAML bullets separated by newlines", () => {
+		const malformed =
+			'{"keyDecisions": - Used Web Notification API\n  - Chose Tauri over Electron\n  - Adopted SQLite for storage, "title": "M005"}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.deepEqual(parsed.keyDecisions, [
+			"Used Web Notification API",
+			"Chose Tauri over Electron",
+			"Adopted SQLite for storage",
+		]);
+		assert.equal(parsed.title, "M005");
+	});
+
+	// ── Multiple fields with YAML bullets ─────────────────────────────────
+
+	test("repairs multiple fields each with YAML bullet lists", () => {
+		const malformed =
+			'{"keyDecisions": - decision one\n  - decision two, "keyFiles": - src/lib.rs — Extended menu\n  - src/main.ts — Entry point, "title": "done"}';
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+		assert.deepEqual(parsed.keyDecisions, ["decision one", "decision two"]);
+		assert.deepEqual(parsed.keyFiles, [
+			"src/lib.rs \u2014 Extended menu",
+			"src/main.ts \u2014 Entry point",
+		]);
+		assert.equal(parsed.title, "done");
+	});
+
+	// ── Exact reproduction from issue #2660 ───────────────────────────────
+
+	test("repairs the exact malformed JSON from issue #2660", () => {
+		const malformed = `{"milestoneId": "M005", "title": "Native Desktop Polish", "oneLiner": "summary", "narrative": "details", "successCriteriaResults": "all pass", "definitionOfDoneResults": "all done", "requirementOutcomes": "met", "keyDecisions": - Used Web Notification API (new window.Notification()) instead of Tauri sendNotification wrapper, "keyFiles": - src-tauri/src/lib.rs \u2014 Extended menu builder with notification toggle, "lessonsLearned": - Always test notification permissions before sending, "followUps": "none", "deviations": "none", "verificationPassed": true}`;
+
+		const repaired = repairToolJson(malformed);
+		const parsed = JSON.parse(repaired);
+
+		assert.equal(parsed.milestoneId, "M005");
+		assert.equal(parsed.title, "Native Desktop Polish");
+		assert.ok(Array.isArray(parsed.keyDecisions), "keyDecisions should be an array");
+		assert.ok(parsed.keyDecisions[0].includes("Web Notification API"));
+		assert.ok(Array.isArray(parsed.keyFiles), "keyFiles should be an array");
+		assert.ok(parsed.keyFiles[0].includes("src-tauri/src/lib.rs"));
+		assert.ok(Array.isArray(parsed.lessonsLearned), "lessonsLearned should be an array");
+		assert.equal(parsed.verificationPassed, true);
+	});
+
+	// ── Passthrough for valid JSON ────────────────────────────────────────
+
+	test("returns valid JSON unchanged", () => {
+		const valid = '{"keyDecisions": ["item1", "item2"], "count": -5}';
+		const result = repairToolJson(valid);
+		assert.equal(result, valid, "valid JSON should be returned unchanged");
+	});
+
+	// ── Negative numbers are preserved ────────────────────────────────────
+
+	test("does not mangle negative numbers", () => {
+		const valid = '{"offset": -1, "limit": -100}';
+		const result = repairToolJson(valid);
+		assert.equal(result, valid);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/agent-session.ts b/packages/pi-coding-agent/src/core/agent-session.ts
index fb84b9209..fe5e1f853 100644
--- a/packages/pi-coding-agent/src/core/agent-session.ts
+++ b/packages/pi-coding-agent/src/core/agent-session.ts
@@ -72,6 +72,7 @@ import type { ModelRegistry } from "./model-registry.js";
 import { expandPromptTemplate, type PromptTemplate } from "./prompt-templates.js";
 import type { ResourceExtensionPaths, ResourceLoader } from "./resource-loader.js";
 import { RetryHandler } from "./retry-handler.js";
+import { isImageDimensionError, downsizeConversationImages } from "./image-overflow-recovery.js";
 import type { BranchSummaryEntry, SessionManager } from "./session-manager.js";
 import { getLatestCompactionEntry } from "./session-manager.js";
 import type { SettingsManager } from "./settings-manager.js";
@@ -136,7 +137,8 @@ export type AgentSessionEvent =
 	| { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string }
 	| { type: "fallback_provider_switch"; from: string; to: string; reason: string }
 	| { type: "fallback_provider_restored"; provider: string; reason: string }
-	| { type: "fallback_chain_exhausted"; reason: string };
+	| { type: "fallback_chain_exhausted"; reason: string }
+	| { type: "image_overflow_recovery"; strippedCount: number; imageCount: number };
 
 /** Listener function for agent session events */
 export type AgentSessionEventListener = (event: AgentSessionEvent) => void;
@@ -487,6 +489,36 @@ export class AgentSession {
 				if (didRetry) return; // Retry was initiated, don't proceed to compaction
 			}
 
+			// Check for image dimension overflow (many-image 400 error).
+			// When a session accumulates many images, the API rejects requests
+			// whose images exceed the many-image dimension limit. Strip older
+			// images from the conversation and auto-retry. (#2874)
+			if (
+				msg.stopReason === "error" &&
+				isImageDimensionError(msg.errorMessage)
+			) {
+				const messages = this.agent.state.messages;
+				const result = downsizeConversationImages(messages as Message[]);
+				if (result.processed) {
+					// Remove the trailing error assistant message, then replace
+					if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
+						this.agent.replaceMessages(messages.slice(0, -1));
+					}
+
+					this._emit({
+						type: "image_overflow_recovery",
+						strippedCount: result.strippedCount,
+						imageCount: result.imageCount,
+					});
+
+					// Auto-retry after downsizing
+					setTimeout(() => {
+						this.agent.continue().catch(() => {});
+					}, 0);
+					return;
+				}
+			}
+
 			await this._compactionOrchestrator.checkCompaction(msg);
 		}
 	}
@@ -1986,6 +2018,11 @@ export class AgentSession {
 					const messages = this.agent.state.messages;
 					const last = messages[messages.length - 1];
 					if (last?.role === "assistant" && (last as AssistantMessage).stopReason === "error") {
+						// If the error was an image dimension overflow, downsize images
+						// before retrying so the retry doesn't hit the same error (#2874)
+						if (isImageDimensionError((last as AssistantMessage).errorMessage)) {
+							downsizeConversationImages(messages as Message[]);
+						}
 						this.agent.replaceMessages(messages.slice(0, -1));
 						this.agent.continue().catch((err) => {
 							runner.emitError({
diff --git a/packages/pi-coding-agent/src/core/compaction/compaction.test.ts b/packages/pi-coding-agent/src/core/compaction/compaction.test.ts
new file mode 100644
index 000000000..1fb5a2db2
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/compaction/compaction.test.ts
@@ -0,0 +1,236 @@
+/**
+ * Tests for chunked compaction fallback when messages exceed model context window.
+ * Regression test for #2932.
+ */
+
+import assert from "node:assert/strict";
+import { describe, it, mock } from "node:test";
+
+import type { AgentMessage } from "@gsd/pi-agent-core";
+import type { Model, AssistantMessage } from "@gsd/pi-ai";
+
+import { generateSummary, estimateTokens, chunkMessages } from "./compaction.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Create a user message with approximately `tokenCount` tokens (chars = tokens * 4). */
+function makeUserMessage(tokenCount: number): AgentMessage {
+	const text = "x".repeat(tokenCount * 4);
+	return { role: "user", content: text } as unknown as AgentMessage;
+}
+
+/** Create a mock model with a given context window. */
+function makeModel(contextWindow: number): Model<any> {
+	return {
+		id: "test-model",
+		name: "Test Model",
+		api: "anthropic-messages",
+		provider: "anthropic",
+		baseUrl: "https://api.test",
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow,
+		maxTokens: 4096,
+	} as Model<any>;
+}
+
+function makeFakeResponse(text: string): AssistantMessage {
+	return {
+		content: [{ type: "text", text }],
+		stopReason: "end_turn",
+	} as unknown as AssistantMessage;
+}
+
+// ---------------------------------------------------------------------------
+// chunkMessages tests
+// ---------------------------------------------------------------------------
+
+describe("chunkMessages", () => {
+	it("returns a single chunk when messages fit in budget", () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(1_000),
+			makeUserMessage(1_000),
+		];
+		const chunks = chunkMessages(messages, 100_000);
+		assert.equal(chunks.length, 1);
+		assert.equal(chunks[0].length, 2);
+	});
+
+	it("splits messages into multiple chunks when they exceed budget", () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(50_000),
+			makeUserMessage(50_000),
+			makeUserMessage(50_000),
+		];
+		// Budget of 80k tokens means each 50k message gets its own chunk
+		// (or two fit together if budget allows)
+		const chunks = chunkMessages(messages, 80_000);
+		assert.ok(chunks.length > 1, `Expected multiple chunks, got ${chunks.length}`);
+		// All messages should be present across chunks
+		const totalMessages = chunks.reduce((sum, c) => sum + c.length, 0);
+		assert.equal(totalMessages, 3);
+	});
+
+	it("puts a single oversized message in its own chunk", () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(200_000), // Way over any reasonable budget
+		];
+		const chunks = chunkMessages(messages, 80_000);
+		assert.equal(chunks.length, 1);
+		assert.equal(chunks[0].length, 1);
+	});
+
+	it("preserves message order across chunks", () => {
+		// Create messages with identifiable sizes
+		const messages: AgentMessage[] = [
+			makeUserMessage(30_000), // ~30k tokens
+			makeUserMessage(30_000),
+			makeUserMessage(30_000),
+			makeUserMessage(30_000),
+		];
+		const chunks = chunkMessages(messages, 50_000);
+		// Reconstruct original order
+		const flat = chunks.flat();
+		assert.equal(flat.length, 4);
+		for (let i = 0; i < flat.length; i++) {
+			assert.strictEqual(flat[i], messages[i], `Message ${i} should be in order`);
+		}
+	});
+});
+
+// ---------------------------------------------------------------------------
+// generateSummary chunked fallback tests
+// ---------------------------------------------------------------------------
+
+describe("generateSummary — chunked fallback (#2932)", () => {
+	it("calls _completeFn multiple times when messages exceed model context window", async () => {
+		// Arrange: 3 messages of ~80k tokens each = ~240k total, model has 200k window
+		const messages: AgentMessage[] = [
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+		];
+		const model = makeModel(200_000);
+		const reserveTokens = 16_384;
+
+		// Verify our test setup: messages really do exceed the model window
+		let totalTokens = 0;
+		for (const m of messages) totalTokens += estimateTokens(m);
+		assert.ok(
+			totalTokens > model.contextWindow,
+			`Test setup: ${totalTokens} tokens should exceed ${model.contextWindow} context window`,
+		);
+
+		// Track calls
+		const calls: string[] = [];
+		const mockComplete = mock.fn(async (_model: any, context: any, _options: any) => {
+			const userMsg = context.messages?.[0];
+			const text =
+				typeof userMsg?.content === "string"
+					? userMsg.content
+					: userMsg?.content?.[0]?.text ?? "";
+
+			if (text.includes("<previous-summary>")) {
+				calls.push("update");
+			} else {
+				calls.push("initial");
+			}
+			return makeFakeResponse("Summary of chunk");
+		});
+
+		const summary = await generateSummary(
+			messages,
+			model,
+			reserveTokens,
+			undefined, // apiKey
+			undefined, // signal
+			undefined, // customInstructions
+			undefined, // previousSummary
+			mockComplete, // _completeFn override for testing
+		);
+
+		// Assert: should have called completeSimple more than once (chunked)
+		assert.ok(
+			mockComplete.mock.callCount() > 1,
+			`Expected multiple calls for chunked summarization, got ${mockComplete.mock.callCount()}`,
+		);
+
+		// First call should be an initial summary, subsequent should be updates
+		assert.equal(calls[0], "initial", "First chunk should use initial summarization prompt");
+		for (let i = 1; i < calls.length; i++) {
+			assert.equal(calls[i], "update", `Chunk ${i + 1} should use update summarization prompt`);
+		}
+
+		// Should return a non-empty summary
+		assert.ok(summary.length > 0, "Summary should not be empty");
+	});
+
+	it("uses single-pass when messages fit within model context window", async () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(10_000),
+			makeUserMessage(10_000),
+		];
+		const model = makeModel(200_000);
+		const reserveTokens = 16_384;
+
+		// Verify test setup
+		let totalTokens = 0;
+		for (const m of messages) totalTokens += estimateTokens(m);
+		assert.ok(
+			totalTokens < model.contextWindow,
+			`Test setup: ${totalTokens} tokens should fit in ${model.contextWindow} context window`,
+		);
+
+		const mockComplete = mock.fn(async () => makeFakeResponse("Single pass summary"));
+
+		await generateSummary(messages, model, reserveTokens, undefined, undefined, undefined, undefined, mockComplete);
+
+		assert.equal(
+			mockComplete.mock.callCount(),
+			1,
+			"Should use single-pass summarization when messages fit in context window",
+		);
+	});
+
+	it("passes previousSummary through chunked summarization", async () => {
+		const messages: AgentMessage[] = [
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+			makeUserMessage(80_000),
+		];
+		const model = makeModel(200_000);
+		const reserveTokens = 16_384;
+		const previousSummary = "Previous session summary content";
+
+		const prompts: string[] = [];
+		const mockComplete = mock.fn(async (_model: any, context: any) => {
+			const userMsg = context.messages?.[0];
+			const text =
+				typeof userMsg?.content === "string"
+					? userMsg.content
+					: userMsg?.content?.[0]?.text ?? "";
+			prompts.push(text);
+			return makeFakeResponse("Chunk summary");
+		});
+
+		await generateSummary(
+			messages,
+			model,
+			reserveTokens,
+			undefined,
+			undefined,
+			undefined,
+			previousSummary,
+			mockComplete,
+		);
+
+		// First chunk should include the previousSummary
+		assert.ok(
+			prompts[0].includes(previousSummary),
+			"First chunk should incorporate the previousSummary",
+		);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/compaction/compaction.ts b/packages/pi-coding-agent/src/core/compaction/compaction.ts
index 66cdbcfb3..cd3183277 100644
--- a/packages/pi-coding-agent/src/core/compaction/compaction.ts
+++ b/packages/pi-coding-agent/src/core/compaction/compaction.ts
@@ -489,9 +489,49 @@ Use this EXACT format:
 
 Keep each section concise. Preserve exact file paths, function names, and error messages.`;
 
+/**
+ * Split messages into chunks where each chunk's estimated token count
+ * stays within `maxTokensPerChunk`. A single message that exceeds the
+ * budget is placed alone in its own chunk (never dropped).
+ */
+export function chunkMessages(messages: AgentMessage[], maxTokensPerChunk: number): AgentMessage[][] {
+	const chunks: AgentMessage[][] = [];
+	let currentChunk: AgentMessage[] = [];
+	let currentTokens = 0;
+
+	for (const msg of messages) {
+		const msgTokens = estimateTokens(msg);
+
+		if (currentChunk.length > 0 && currentTokens + msgTokens > maxTokensPerChunk) {
+			// Current chunk is full — start a new one
+			chunks.push(currentChunk);
+			currentChunk = [msg];
+			currentTokens = msgTokens;
+		} else {
+			currentChunk.push(msg);
+			currentTokens += msgTokens;
+		}
+	}
+
+	if (currentChunk.length > 0) {
+		chunks.push(currentChunk);
+	}
+
+	return chunks;
+}
+
+/** Type for the completion function, allowing injection for tests. */
+type CompleteFn = typeof completeSimple;
+
 /**
  * Generate a summary of the conversation using the LLM.
  * If previousSummary is provided, uses the update prompt to merge.
+ *
+ * When the messages exceed the model's context window, automatically
+ * falls back to chunked summarization: summarize the first chunk,
+ * then iteratively merge subsequent chunks using the update prompt.
+ *
+ * @param _completeFn - Internal override for testing; defaults to completeSimple.
  */
 export async function generateSummary(
 	currentMessages: AgentMessage[],
@@ -501,6 +541,59 @@ export async function generateSummary(
 	signal?: AbortSignal,
 	customInstructions?: string,
 	previousSummary?: string,
+	_completeFn?: CompleteFn,
+): Promise<string> {
+	const complete = _completeFn ?? completeSimple;
+
+	// Estimate total tokens for the messages to summarize
+	let totalTokens = 0;
+	for (const msg of currentMessages) {
+		totalTokens += estimateTokens(msg);
+	}
+
+	// Overhead for the prompt framing, system prompt, and response budget
+	const promptOverhead = 4_000;
+	const maxTokens = Math.floor(0.8 * reserveTokens);
+	const maxInputTokens = (model.contextWindow || 200_000) - reserveTokens - promptOverhead;
+
+	// If messages fit in the context window, use single-pass summarization
+	if (totalTokens <= maxInputTokens) {
+		return singlePassSummary(currentMessages, model, reserveTokens, apiKey, signal, customInstructions, previousSummary, complete);
+	}
+
+	// Chunked fallback: split messages and iteratively summarize
+	const chunks = chunkMessages(currentMessages, maxInputTokens);
+	let runningSummary = previousSummary;
+
+	for (let i = 0; i < chunks.length; i++) {
+		runningSummary = await singlePassSummary(
+			chunks[i],
+			model,
+			reserveTokens,
+			apiKey,
+			signal,
+			customInstructions,
+			runningSummary,
+			complete,
+		);
+	}
+
+	return runningSummary!;
+}
+
+/**
+ * Single-pass summarization of messages using the LLM.
+ * If previousSummary is provided, uses the update prompt to merge.
+ */
+async function singlePassSummary(
+	currentMessages: AgentMessage[],
+	model: Model<any>,
+	reserveTokens: number,
+	apiKey: string | undefined,
+	signal?: AbortSignal,
+	customInstructions?: string,
+	previousSummary?: string,
+	complete: CompleteFn = completeSimple,
 ): Promise<string> {
 	const maxTokens = Math.floor(0.8 * reserveTokens);
 
@@ -526,7 +619,7 @@ export async function generateSummary(
 		? { maxTokens, signal, apiKey, reasoning: "high" as const }
 		: { maxTokens, signal, apiKey };
 
-	const response = await completeSimple(
+	const response = await complete(
 		model,
 		{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: createSummarizationMessage(promptText) },
 		completionOptions,
diff --git a/packages/pi-coding-agent/src/core/exec.ts b/packages/pi-coding-agent/src/core/exec.ts
index b7dd046c4..9d12e8c23 100644
--- a/packages/pi-coding-agent/src/core/exec.ts
+++ b/packages/pi-coding-agent/src/core/exec.ts
@@ -39,7 +39,9 @@ export async function execCommand(
 	return new Promise((resolve) => {
 		const proc = spawn(command, args, {
 			cwd,
-			shell: false,
+			// On Windows, npm/npx/tsc etc. are .cmd scripts that require shell
+			// resolution.  Without this, spawn fails with ENOENT or EINVAL (#2854).
+			shell: process.platform === "win32",
 			stdio: ["ignore", "pipe", "pipe"],
 		});
 
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts b/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts
new file mode 100644
index 000000000..3796ab071
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-manifest.test.ts
@@ -0,0 +1,77 @@
+// GSD-2 — Extension Manifest Tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { readManifest, readManifestFromEntryPath } from "./extension-manifest.js";
+
+describe("readManifest", () => {
+	it("returns null for missing directory", () => {
+		assert.equal(readManifest("/nonexistent/path"), null);
+	});
+
+	it("returns null for directory without manifest", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		assert.equal(readManifest(dir), null);
+	});
+
+	it("returns null for invalid JSON", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		writeFileSync(join(dir, "extension-manifest.json"), "not json{{{", "utf-8");
+		assert.equal(readManifest(dir), null);
+	});
+
+	it("returns null for manifest missing required fields", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		writeFileSync(
+			join(dir, "extension-manifest.json"),
+			JSON.stringify({ id: "test", name: "test" }),
+		);
+		assert.equal(readManifest(dir), null);
+	});
+
+	it("returns valid manifest", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		const manifest = {
+			id: "test-ext",
+			name: "Test Extension",
+			version: "1.0.0",
+			tier: "bundled",
+			requires: { platform: ">=2.29.0" },
+		};
+		writeFileSync(join(dir, "extension-manifest.json"), JSON.stringify(manifest));
+		const result = readManifest(dir);
+		assert.equal(result?.id, "test-ext");
+		assert.equal(result?.tier, "bundled");
+	});
+});
+
+describe("readManifestFromEntryPath", () => {
+	it("reads manifest from parent of entry path", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		const extDir = join(dir, "my-ext");
+		mkdirSync(extDir);
+		writeFileSync(
+			join(extDir, "extension-manifest.json"),
+			JSON.stringify({
+				id: "my-ext",
+				name: "My Extension",
+				version: "1.0.0",
+				tier: "community",
+			}),
+		);
+		writeFileSync(join(extDir, "index.ts"), "");
+
+		const result = readManifestFromEntryPath(join(extDir, "index.ts"));
+		assert.equal(result?.id, "my-ext");
+		assert.equal(result?.tier, "community");
+	});
+
+	it("returns null when entry path parent has no manifest", () => {
+		const dir = mkdtempSync(join(tmpdir(), "ext-manifest-"));
+		assert.equal(readManifestFromEntryPath(join(dir, "index.ts")), null);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts b/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts
new file mode 100644
index 000000000..673f5a410
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-manifest.ts
@@ -0,0 +1,62 @@
+// GSD-2 — Extension Manifest: Types and reading for extension-manifest.json
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { existsSync, readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+export interface ExtensionManifest {
+	id: string;
+	name: string;
+	version: string;
+	description: string;
+	tier: "core" | "bundled" | "community";
+	requires: { platform: string };
+	provides?: {
+		tools?: string[];
+		commands?: string[];
+		hooks?: string[];
+		shortcuts?: string[];
+	};
+	dependencies?: {
+		extensions?: string[];
+		runtime?: string[];
+	};
+}
+
+// ─── Validation ─────────────────────────────────────────────────────────────
+
+function isManifest(data: unknown): data is ExtensionManifest {
+	if (typeof data !== "object" || data === null) return false;
+	const obj = data as Record<string, unknown>;
+	return (
+		typeof obj.id === "string" &&
+		typeof obj.name === "string" &&
+		typeof obj.version === "string" &&
+		typeof obj.tier === "string"
+	);
+}
+
+// ─── Reading ────────────────────────────────────────────────────────────────
+
+/** Read extension-manifest.json from a directory. Returns null if missing or invalid. */
+export function readManifest(extensionDir: string): ExtensionManifest | null {
+	const manifestPath = join(extensionDir, "extension-manifest.json");
+	if (!existsSync(manifestPath)) return null;
+	try {
+		const raw = JSON.parse(readFileSync(manifestPath, "utf-8"));
+		return isManifest(raw) ? raw : null;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Given an entry path (e.g. `.../extensions/browser-tools/index.ts`),
+ * resolve the parent directory and read its manifest.
+ */
+export function readManifestFromEntryPath(entryPath: string): ExtensionManifest | null {
+	const dir = dirname(entryPath);
+	return readManifest(dir);
+}
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts b/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts
new file mode 100644
index 000000000..30a4b667e
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-sort.test.ts
@@ -0,0 +1,134 @@
+// GSD-2 — Extension Sort Tests
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { sortExtensionPaths } from "./extension-sort.js";
+
+function createExtDir(base: string, id: string, deps?: string[]): string {
+	const dir = join(base, id);
+	mkdirSync(dir, { recursive: true });
+	writeFileSync(
+		join(dir, "extension-manifest.json"),
+		JSON.stringify({
+			id,
+			name: id,
+			version: "1.0.0",
+			tier: "bundled",
+			requires: { platform: ">=2.29.0" },
+			...(deps ? { dependencies: { extensions: deps } } : {}),
+		}),
+	);
+	writeFileSync(join(dir, "index.ts"), `export default function() {}`);
+	return join(dir, "index.ts");
+}
+
+describe("sortExtensionPaths", () => {
+	it("returns empty for empty input", () => {
+		const result = sortExtensionPaths([]);
+		assert.deepEqual(result.sortedPaths, []);
+		assert.deepEqual(result.warnings, []);
+	});
+
+	it("sorts independent extensions alphabetically", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathC = createExtDir(base, "charlie");
+		const pathA = createExtDir(base, "alpha");
+		const pathB = createExtDir(base, "bravo");
+
+		const result = sortExtensionPaths([pathC, pathA, pathB]);
+		assert.deepEqual(result.sortedPaths, [pathA, pathB, pathC]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("sorts dependencies before dependents", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathBase = createExtDir(base, "base-ext");
+		const pathDependent = createExtDir(base, "dependent-ext", ["base-ext"]);
+
+		// Pass dependent first — sort should reorder
+		const result = sortExtensionPaths([pathDependent, pathBase]);
+		assert.deepEqual(result.sortedPaths, [pathBase, pathDependent]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("handles deep dependency chains", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathA = createExtDir(base, "a");
+		const pathB = createExtDir(base, "b", ["a"]);
+		const pathC = createExtDir(base, "c", ["b"]);
+
+		const result = sortExtensionPaths([pathC, pathB, pathA]);
+		assert.deepEqual(result.sortedPaths, [pathA, pathB, pathC]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("warns about missing dependencies but still loads", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathExt = createExtDir(base, "my-ext", ["nonexistent"]);
+
+		const result = sortExtensionPaths([pathExt]);
+		assert.equal(result.sortedPaths.length, 1);
+		assert.equal(result.sortedPaths[0], pathExt);
+		assert.equal(result.warnings.length, 1);
+		assert.match(result.warnings[0].message, /nonexistent.*not installed/);
+	});
+
+	it("warns about cycles but still loads both", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathA = createExtDir(base, "cycle-a", ["cycle-b"]);
+		const pathB = createExtDir(base, "cycle-b", ["cycle-a"]);
+
+		const result = sortExtensionPaths([pathA, pathB]);
+		assert.equal(result.sortedPaths.length, 2);
+		assert.ok(result.warnings.length > 0);
+		assert.ok(result.warnings.some((w) => w.message.includes("cycle")));
+	});
+
+	it("silently ignores self-dependencies", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const pathExt = createExtDir(base, "self-dep", ["self-dep"]);
+
+		const result = sortExtensionPaths([pathExt]);
+		assert.deepEqual(result.sortedPaths, [pathExt]);
+		assert.equal(result.warnings.length, 0);
+	});
+
+	it("prepends extensions without manifests", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const noManifestDir = join(base, "no-manifest");
+		mkdirSync(noManifestDir, { recursive: true });
+		writeFileSync(join(noManifestDir, "index.ts"), `export default function() {}`);
+		const noManifestPath = join(noManifestDir, "index.ts");
+
+		const pathWithManifest = createExtDir(base, "with-manifest");
+
+		const result = sortExtensionPaths([pathWithManifest, noManifestPath]);
+		assert.equal(result.sortedPaths[0], noManifestPath);
+		assert.equal(result.sortedPaths[1], pathWithManifest);
+	});
+
+	it("handles non-array dependencies gracefully", () => {
+		const base = mkdtempSync(join(tmpdir(), "ext-sort-"));
+		const dir = join(base, "bad-deps");
+		mkdirSync(dir, { recursive: true });
+		writeFileSync(
+			join(dir, "extension-manifest.json"),
+			JSON.stringify({
+				id: "bad-deps",
+				name: "bad-deps",
+				version: "1.0.0",
+				tier: "bundled",
+				dependencies: { extensions: "not-an-array" },
+			}),
+		);
+		writeFileSync(join(dir, "index.ts"), `export default function() {}`);
+
+		const result = sortExtensionPaths([join(dir, "index.ts")]);
+		assert.equal(result.sortedPaths.length, 1);
+		assert.equal(result.warnings.length, 0);
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/extensions/extension-sort.ts b/packages/pi-coding-agent/src/core/extensions/extension-sort.ts
new file mode 100644
index 000000000..07a3e67d6
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/extensions/extension-sort.ts
@@ -0,0 +1,137 @@
+// GSD-2 — Extension Sort: Topological dependency ordering
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { readManifestFromEntryPath } from "./extension-manifest.js";
+
+export interface SortWarning {
+	declaringId: string;
+	missingId: string;
+	message: string;
+}
+
+export interface SortResult {
+	sortedPaths: string[];
+	warnings: SortWarning[];
+}
+
+/**
+ * Sort extension entry paths in topological dependency-first order using Kahn's BFS algorithm.
+ *
+ * - Extensions without manifests are prepended in input order.
+ * - Missing dependencies produce a structured warning but do not block loading.
+ * - Cycles produce warnings; cycle participants are appended alphabetically.
+ * - Self-dependencies are silently ignored.
+ */
+export function sortExtensionPaths(paths: string[]): SortResult {
+	const warnings: SortWarning[] = [];
+	const pathsWithoutId: string[] = [];
+	const idToPath = new Map<string, string>();
+
+	// Step 1: Build ID map
+	for (const p of paths) {
+		const manifest = readManifestFromEntryPath(p);
+		if (!manifest) {
+			pathsWithoutId.push(p);
+		} else {
+			idToPath.set(manifest.id, p);
+		}
+	}
+
+	// Step 2: Build graph — inDegree and dependents adjacency
+	const inDegree = new Map<string, number>();
+	const dependents = new Map<string, string[]>(); // dep → [ids that depend on dep]
+
+	for (const id of idToPath.keys()) {
+		if (!inDegree.has(id)) inDegree.set(id, 0);
+		if (!dependents.has(id)) dependents.set(id, []);
+	}
+
+	for (const [id, entryPath] of idToPath) {
+		const manifest = readManifestFromEntryPath(entryPath);
+		const rawDeps = manifest?.dependencies?.extensions ?? [];
+		const deps = Array.isArray(rawDeps) ? rawDeps : [];
+
+		for (const depId of deps) {
+			// Silently ignore self-deps
+			if (depId === id) continue;
+
+			if (!idToPath.has(depId)) {
+				// Missing dependency — warn and skip edge
+				warnings.push({
+					declaringId: id,
+					missingId: depId,
+					message: `Extension '${id}' declares dependency '${depId}' which is not installed — loading anyway`,
+				});
+				continue;
+			}
+
+			// Valid edge: id depends on depId → increment inDegree[id], add id to dependents[depId]
+			inDegree.set(id, (inDegree.get(id) ?? 0) + 1);
+			const depDependents = dependents.get(depId) ?? [];
+			depDependents.push(id);
+			dependents.set(depId, depDependents);
+		}
+	}
+
+	// Step 3: Kahn's algorithm — start with nodes that have inDegree 0
+	const sorted: string[] = [];
+	// Ready queue: IDs with inDegree 0, maintained in alphabetical order
+	const ready: string[] = [...idToPath.keys()]
+		.filter((id) => inDegree.get(id) === 0)
+		.sort();
+
+	while (ready.length > 0) {
+		const id = ready.shift()!;
+		sorted.push(idToPath.get(id)!);
+
+		const deps = dependents.get(id) ?? [];
+		for (const depId of deps) {
+			const newDegree = (inDegree.get(depId) ?? 0) - 1;
+			inDegree.set(depId, newDegree);
+			if (newDegree === 0) {
+				// Insert into ready queue maintaining alphabetical order
+				const insertIdx = ready.findIndex((r) => r > depId);
+				if (insertIdx === -1) {
+					ready.push(depId);
+				} else {
+					ready.splice(insertIdx, 0, depId);
+				}
+			}
+		}
+	}
+
+	// Step 4: Cycle handling — any remaining IDs with inDegree > 0
+	const cycleIds = [...idToPath.keys()]
+		.filter((id) => (inDegree.get(id) ?? 0) > 0)
+		.sort();
+
+	if (cycleIds.length > 0) {
+		const cycleSet = new Set(cycleIds);
+
+		for (const id of cycleIds) {
+			const entryPath = idToPath.get(id)!;
+			const manifest = readManifestFromEntryPath(entryPath);
+			const rawDeps = manifest?.dependencies?.extensions ?? [];
+			const deps = Array.isArray(rawDeps) ? rawDeps : [];
+
+			for (const depId of deps) {
+				if (depId === id) continue;
+				if (!cycleSet.has(depId)) continue;
+
+				// Both id and depId are in cycle — emit warning
+				warnings.push({
+					declaringId: id,
+					missingId: depId,
+					message: `Extension '${id}' and '${depId}' form a dependency cycle — loading both anyway (alphabetical order)`,
+				});
+			}
+
+			sorted.push(entryPath);
+		}
+	}
+
+	return {
+		sortedPaths: [...pathsWithoutId, ...sorted],
+		warnings,
+	};
+}
diff --git a/packages/pi-coding-agent/src/core/extensions/index.ts b/packages/pi-coding-agent/src/core/extensions/index.ts
index 1ef9b82a7..70525095a 100644
--- a/packages/pi-coding-agent/src/core/extensions/index.ts
+++ b/packages/pi-coding-agent/src/core/extensions/index.ts
@@ -2,6 +2,10 @@
  * Extension system for lifecycle events and custom tools.
  */
 
+export type { ExtensionManifest } from "./extension-manifest.js";
+export { readManifest, readManifestFromEntryPath } from "./extension-manifest.js";
+export type { SortResult, SortWarning } from "./extension-sort.js";
+export { sortExtensionPaths } from "./extension-sort.js";
 export type { SlashCommandInfo, SlashCommandLocation, SlashCommandSource } from "../slash-commands.js";
 export {
 	createExtensionRuntime,
diff --git a/packages/pi-coding-agent/src/core/extensions/loader.ts b/packages/pi-coding-agent/src/core/extensions/loader.ts
index 24a4385b5..96d689e67 100644
--- a/packages/pi-coding-agent/src/core/extensions/loader.ts
+++ b/packages/pi-coding-agent/src/core/extensions/loader.ts
@@ -941,6 +941,11 @@ function discoverExtensionsInDir(dir: string): string[] {
 
 /**
  * Discover and load extensions from standard locations.
+ *
+ * @deprecated Use DefaultResourceLoader.reload() instead — this function is
+ * not called in the GSD loading flow. Extension discovery happens through
+ * DefaultPackageManager.resolve() → addAutoDiscoveredResources(). Kept for
+ * backwards compatibility with direct pi-coding-agent consumers.
  */
 export async function discoverAndLoadExtensions(
 	configuredPaths: string[],
diff --git a/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts b/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts
new file mode 100644
index 000000000..de075c280
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/image-overflow-recovery.test.ts
@@ -0,0 +1,228 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import {
+	isImageDimensionError,
+	MANY_IMAGE_MAX_DIMENSION,
+	downsizeConversationImages,
+} from "./image-overflow-recovery.js";
+import type { Message } from "@gsd/pi-ai";
+
+// ─── isImageDimensionError ────────────────────────────────────────────────────
+
+describe("isImageDimensionError", () => {
+	it("returns true for Anthropic many-image dimension error", () => {
+		const errorMessage =
+			'Error: 400 {"type":"error","error":{"type":"invalid_request_error","message":"messages.125.content.38.image.source.base64.data: At least one of the image dimensions exceed max allowed size for many-image requests: 2000 pixels"}}';
+		assert.equal(isImageDimensionError(errorMessage), true);
+	});
+
+	it("returns true for bare dimension exceed message", () => {
+		const errorMessage =
+			"image dimensions exceed max allowed size for many-image requests: 2000 pixels";
+		assert.equal(isImageDimensionError(errorMessage), true);
+	});
+
+	it("returns false for unrelated 400 error", () => {
+		const errorMessage =
+			'Error: 400 {"type":"error","error":{"type":"invalid_request_error","message":"max_tokens: 4096 > 2048"}}';
+		assert.equal(isImageDimensionError(errorMessage), false);
+	});
+
+	it("returns false for rate limit error", () => {
+		assert.equal(isImageDimensionError("429 rate limit exceeded"), false);
+	});
+
+	it("returns false for empty string", () => {
+		assert.equal(isImageDimensionError(""), false);
+	});
+
+	it("returns false for undefined", () => {
+		assert.equal(isImageDimensionError(undefined), false);
+	});
+});
+
+// ─── MANY_IMAGE_MAX_DIMENSION ─────────────────────────────────────────────────
+
+describe("MANY_IMAGE_MAX_DIMENSION", () => {
+	it("is less than 2000 (the API-enforced limit)", () => {
+		assert.ok(MANY_IMAGE_MAX_DIMENSION < 2000);
+	});
+
+	it("is a positive integer", () => {
+		assert.ok(MANY_IMAGE_MAX_DIMENSION > 0);
+		assert.equal(MANY_IMAGE_MAX_DIMENSION, Math.floor(MANY_IMAGE_MAX_DIMENSION));
+	});
+});
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeUserMsg(content: Message["content"] & any): Message {
+	return { role: "user", content, timestamp: Date.now() } as Message;
+}
+
+function makeAssistantMsg(text: string): Message {
+	return {
+		role: "assistant",
+		content: [{ type: "text", text }],
+		api: "anthropic-messages",
+		provider: "anthropic",
+		model: "claude-opus-4-6",
+		usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+		stopReason: "stop",
+		timestamp: Date.now(),
+	} as Message;
+}
+
+function makeToolResultMsg(images: number): Message {
+	const content: any[] = [];
+	for (let i = 0; i < images; i++) {
+		content.push({ type: "image", data: `img${i}`, mimeType: "image/png" });
+	}
+	return {
+		role: "toolResult",
+		toolCallId: `tc${Math.random()}`,
+		toolName: "screenshot",
+		content,
+		isError: false,
+		timestamp: Date.now(),
+	} as Message;
+}
+
+// ─── downsizeConversationImages ───────────────────────────────────────────────
+
+describe("downsizeConversationImages", () => {
+	it("counts images in user and toolResult messages", () => {
+		const messages: Message[] = [
+			makeUserMsg([
+				{ type: "image", data: "img1", mimeType: "image/png" },
+				{ type: "image", data: "img2", mimeType: "image/png" },
+			]),
+			makeAssistantMsg("I see them"),
+			makeToolResultMsg(1),
+		];
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 3);
+	});
+
+	it("returns processed=false when no images present", () => {
+		const messages: Message[] = [
+			makeUserMsg("just text"),
+			makeAssistantMsg("reply"),
+		];
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 0);
+		assert.equal(result.processed, false);
+	});
+
+	it("returns processed=false when image count <= RECENT_IMAGES_TO_KEEP", () => {
+		const messages: Message[] = [
+			makeUserMsg([
+				{ type: "image", data: "img1", mimeType: "image/png" },
+			]),
+			makeAssistantMsg("got it"),
+		];
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 1);
+		assert.equal(result.processed, false);
+	});
+
+	it("strips older images when many images present, preserves recent ones", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 25; i++) {
+			messages.push(
+				makeUserMsg([
+					{ type: "text", text: `message ${i}` },
+					{ type: "image", data: `img${i}`, mimeType: "image/png" },
+				]),
+			);
+			messages.push(makeAssistantMsg(`reply ${i}`));
+		}
+
+		const result = downsizeConversationImages(messages);
+		assert.ok(result.processed);
+		assert.equal(result.imageCount, 25);
+		assert.equal(result.strippedCount, 20); // 25 - 5 recent
+
+		// Count remaining images
+		let remainingImages = 0;
+		for (const msg of messages) {
+			if (msg.role === "assistant") continue;
+			if (typeof msg.content === "string") continue;
+			const arr = msg.content as any[];
+			for (const block of arr) {
+				if (block.type === "image") remainingImages++;
+			}
+		}
+		assert.equal(remainingImages, 5, "Should keep exactly 5 most recent images");
+
+		// The 5 most recent user messages (indices 40,42,44,46,48) should have images
+		for (let i = 20; i < 25; i++) {
+			const userMsg = messages[i * 2]; // user messages at even indices
+			const arr = userMsg.content as any[];
+			const hasImage = arr.some((c: any) => c.type === "image");
+			assert.ok(hasImage, `Recent message ${i} should retain its image`);
+		}
+	});
+
+	it("adds text placeholder when stripping an image", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 10; i++) {
+			messages.push(
+				makeUserMsg([
+					{ type: "image", data: `img${i}`, mimeType: "image/jpeg" },
+				]),
+			);
+			messages.push(makeAssistantMsg(`reply ${i}`));
+		}
+
+		downsizeConversationImages(messages);
+
+		// First message's image should have been replaced with text
+		const firstMsg = messages[0];
+		const arr = firstMsg.content as any[];
+		const placeholder = arr.find(
+			(c: any) => c.type === "text" && c.text.includes("[image removed"),
+		);
+		assert.ok(placeholder, "Stripped image should be replaced with text placeholder");
+		assert.ok(
+			placeholder.text.includes("image/jpeg"),
+			"Placeholder should mention original mime type",
+		);
+	});
+
+	it("handles toolResult messages with images", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 10; i++) {
+			messages.push(makeToolResultMsg(1));
+			messages.push(makeAssistantMsg(`reply ${i}`));
+		}
+
+		const result = downsizeConversationImages(messages);
+		assert.equal(result.imageCount, 10);
+		assert.equal(result.strippedCount, 5);
+		assert.ok(result.processed);
+	});
+
+	it("handles mixed user and toolResult images", () => {
+		const messages: Message[] = [];
+		for (let i = 0; i < 8; i++) {
+			messages.push(
+				makeUserMsg([
+					{ type: "text", text: `check ${i}` },
+					{ type: "image", data: `uimg${i}`, mimeType: "image/png" },
+				]),
+			);
+			messages.push(makeAssistantMsg(`processing ${i}`));
+			messages.push(makeToolResultMsg(1));
+			messages.push(makeAssistantMsg(`done ${i}`));
+		}
+
+		const result = downsizeConversationImages(messages);
+		// 8 user images + 8 tool result images = 16 total
+		assert.equal(result.imageCount, 16);
+		assert.equal(result.strippedCount, 11); // 16 - 5 recent
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/image-overflow-recovery.ts b/packages/pi-coding-agent/src/core/image-overflow-recovery.ts
new file mode 100644
index 000000000..3573514e4
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/image-overflow-recovery.ts
@@ -0,0 +1,118 @@
+/**
+ * Image overflow recovery for many-image sessions.
+ *
+ * When a conversation accumulates many images (screenshots, file reads, etc.),
+ * the Anthropic API enforces a stricter per-image dimension limit (2000px) for
+ * "many-image requests." This module detects the resulting 400 error and
+ * recovers by stripping older images from the conversation history, preserving
+ * the most recent ones to maintain session continuity.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/2874
+ */
+
+import type { Message, ImageContent, TextContent } from "@gsd/pi-ai";
+
+/**
+ * Maximum image dimension (px) that the Anthropic API allows in many-image
+ * requests. Images at or above this size in a large conversation will be
+ * rejected with a 400 error. We use 1568 as the safe ceiling (Anthropic's
+ * recommended max for multi-image requests).
+ */
+export const MANY_IMAGE_MAX_DIMENSION = 1568;
+
+/**
+ * Number of recent images to preserve when stripping old images.
+ * Keeps the most recent screenshots/images so the model retains visual context
+ * for the current task.
+ */
+const RECENT_IMAGES_TO_KEEP = 5;
+
+/**
+ * Regex matching the Anthropic API error for oversized images in many-image requests.
+ */
+const IMAGE_DIMENSION_ERROR_RE =
+	/image.dimensions?.exceed.*max.*allowed.*size.*many.image/i;
+
+/**
+ * Detect whether an error message is the Anthropic "image dimensions exceed max
+ * allowed size for many-image requests" 400 error.
+ */
+export function isImageDimensionError(errorMessage: string | undefined | null): boolean {
+	if (!errorMessage) return false;
+	return IMAGE_DIMENSION_ERROR_RE.test(errorMessage);
+}
+
+export interface DownsizeResult {
+	/** Total number of images found in the conversation */
+	imageCount: number;
+	/** Whether any images were stripped */
+	processed: boolean;
+	/** Number of images that were stripped */
+	strippedCount: number;
+}
+
+/**
+ * Strip older images from conversation messages to recover from many-image
+ * dimension errors. Preserves the N most recent images and replaces older ones
+ * with a text placeholder.
+ *
+ * Mutates messages in place (same pattern as replaceMessages/compaction).
+ *
+ * Accepts Message[] (the LLM message union) so it works with both
+ * agent.state.messages and session entries.
+ */
+export function downsizeConversationImages(messages: Message[]): DownsizeResult {
+	// First pass: collect all image locations (message index + content index)
+	const imageLocations: Array<{ msgIdx: number; contentIdx: number }> = [];
+
+	for (let msgIdx = 0; msgIdx < messages.length; msgIdx++) {
+		const msg = messages[msgIdx];
+		if (msg.role === "assistant") continue;
+
+		// UserMessage can have string content; ToolResultMessage always has array
+		if (msg.role === "user" && typeof msg.content === "string") continue;
+
+		const contentArr = msg.content as (TextContent | ImageContent)[];
+		if (!Array.isArray(contentArr)) continue;
+
+		for (let contentIdx = 0; contentIdx < contentArr.length; contentIdx++) {
+			if (contentArr[contentIdx].type === "image") {
+				imageLocations.push({ msgIdx, contentIdx });
+			}
+		}
+	}
+
+	const imageCount = imageLocations.length;
+	if (imageCount === 0) {
+		return { imageCount: 0, processed: false, strippedCount: 0 };
+	}
+
+	// Determine which images to strip (all except the N most recent)
+	const stripCount = Math.max(0, imageCount - RECENT_IMAGES_TO_KEEP);
+	if (stripCount === 0) {
+		return { imageCount, processed: false, strippedCount: 0 };
+	}
+
+	const toStrip = imageLocations.slice(0, stripCount);
+
+	// Second pass: replace stripped images with text placeholder.
+	// Process in reverse order to maintain content indices.
+	for (let i = toStrip.length - 1; i >= 0; i--) {
+		const { msgIdx, contentIdx } = toStrip[i];
+		const msg = messages[msgIdx];
+		if (msg.role === "assistant") continue;
+		if (msg.role === "user" && typeof msg.content === "string") continue;
+
+		const contentArr = msg.content as (TextContent | ImageContent)[];
+		const imageBlock = contentArr[contentIdx] as ImageContent;
+		const mimeType = imageBlock.mimeType || "image/unknown";
+
+		// Replace the image block with a text placeholder
+		(contentArr as any[])[contentIdx] = {
+			type: "text",
+			text: `[image removed to reduce context size — was ${mimeType}]`,
+		} as TextContent;
+	}
+
+	return { imageCount, processed: true, strippedCount: stripCount };
+}
diff --git a/packages/pi-coding-agent/src/core/index.ts b/packages/pi-coding-agent/src/core/index.ts
index 10c6f1753..5dd346548 100644
--- a/packages/pi-coding-agent/src/core/index.ts
+++ b/packages/pi-coding-agent/src/core/index.ts
@@ -29,6 +29,7 @@ export {
 	type ExecResult,
 	type Extension,
 	type ExtensionAPI,
+	type ExtensionManifest,
 	type ExtensionCommandContext,
 	type ExtensionContext,
 	type ExtensionError,
@@ -53,6 +54,11 @@ export {
 	type SessionSwitchEvent,
 	type SessionTreeEvent,
 	type ToolCallEvent,
+	readManifest,
+	readManifestFromEntryPath,
+	type SortResult,
+	type SortWarning,
+	sortExtensionPaths,
 	type ToolDefinition,
 	type ToolRenderResultOptions,
 	type ToolResultEvent,
diff --git a/packages/pi-coding-agent/src/core/lsp/index.ts b/packages/pi-coding-agent/src/core/lsp/index.ts
index 61237e7eb..bd2718634 100644
--- a/packages/pi-coding-agent/src/core/lsp/index.ts
+++ b/packages/pi-coding-agent/src/core/lsp/index.ts
@@ -340,6 +340,9 @@ async function runWorkspaceDiagnostics(
 	const proc = spawn(cmd, cmdArgs, {
 		cwd,
 		stdio: ["ignore", "pipe", "pipe"],
+		// On Windows, project-type commands (tsc, cargo, etc.) may be .cmd
+		// wrappers that need shell resolution to avoid ENOENT/EINVAL (#2854).
+		shell: process.platform === "win32",
 	});
 	const abortHandler = () => {
 		proc.kill();
diff --git a/packages/pi-coding-agent/src/core/lsp/lspmux.ts b/packages/pi-coding-agent/src/core/lsp/lspmux.ts
index 05ef13b38..6e01d7807 100644
--- a/packages/pi-coding-agent/src/core/lsp/lspmux.ts
+++ b/packages/pi-coding-agent/src/core/lsp/lspmux.ts
@@ -90,6 +90,9 @@ async function checkServerRunning(binaryPath: string): Promise<boolean> {
 	try {
 		const proc = spawn(binaryPath, ["status"], {
 			stdio: ["ignore", "pipe", "pipe"],
+			// On Windows, the binary may be a .cmd wrapper requiring shell
+			// resolution to avoid ENOENT/EINVAL (#2854).
+			shell: process.platform === "win32",
 		});
 
 		const exited = await Promise.race([
diff --git a/packages/pi-coding-agent/src/core/messages.test.ts b/packages/pi-coding-agent/src/core/messages.test.ts
new file mode 100644
index 000000000..6741da93c
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/messages.test.ts
@@ -0,0 +1,114 @@
+/**
+ * messages.test.ts — Tests for convertToLlm custom message handling.
+ *
+ * Reproduction test for #3026: background job completion notifications
+ * delivered as custom messages must be clearly distinguishable from
+ * user-typed input when converted to LLM messages.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { convertToLlm, type CustomMessage } from "./messages.js";
+
+/** Extract the first content block from a message, asserting array content. */
+function firstTextBlock(msg: ReturnType<typeof convertToLlm>[number]) {
+	const { content } = msg;
+	assert.ok(Array.isArray(content), "Expected content to be an array");
+	const block = content[0];
+	assert.ok(typeof block === "object" && block !== null, "Expected first block to be an object");
+	return block;
+}
+
+test("convertToLlm wraps custom messages with system notification prefix", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "async_job_result",
+		content: "**Background job done: bg_abc123** (sleep 2, 2.1s)\n\ndone",
+		display: true,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	assert.equal(result.length, 1);
+	assert.equal(result[0].role, "user");
+
+	// The content must include a system notification wrapper so the LLM
+	// does not confuse it with user input (#3026).
+	const text = firstTextBlock(result[0]);
+	assert.equal(text.type, "text");
+	assert.ok(
+		"text" in text && text.text.includes("[system notification"),
+		"Custom message should be wrapped with system notification marker",
+	);
+});
+
+test("convertToLlm wraps custom messages with array content", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "bg-shell-status",
+		content: [{ type: "text", text: "Background processes:\n  ✓ bg1 dev-server :3000" }],
+		display: false,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	assert.equal(result.length, 1);
+	assert.equal(result[0].role, "user");
+
+	const text = firstTextBlock(result[0]);
+	assert.equal(text.type, "text");
+	assert.ok(
+		"text" in text && text.text.includes("[system notification"),
+		"Custom message with array content should be wrapped with system notification marker",
+	);
+});
+
+test("convertToLlm includes customType in notification wrapper", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "async_job_result",
+		content: "job output here",
+		display: true,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	const text = firstTextBlock(result[0]);
+	assert.ok(
+		"text" in text && text.text.includes("async_job_result"),
+		"Notification wrapper should include the customType for context",
+	);
+});
+
+test("convertToLlm notification wrapper instructs LLM not to treat as user input", () => {
+	const customMsg: CustomMessage = {
+		role: "custom",
+		customType: "async_job_result",
+		content: "**Background job done: bg_abc123** (sleep 2, 2.1s)\n\ndone",
+		display: true,
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([customMsg]);
+	const text = firstTextBlock(result[0]);
+	assert.ok(
+		"text" in text && text.text.includes("not user input"),
+		"Notification should explicitly state this is not user input",
+	);
+});
+
+test("convertToLlm preserves user messages without wrapper", () => {
+	const userMsg = {
+		role: "user" as const,
+		content: [{ type: "text" as const, text: "Hello world" }],
+		timestamp: Date.now(),
+	};
+
+	const result = convertToLlm([userMsg]);
+	assert.equal(result.length, 1);
+	const text = firstTextBlock(result[0]);
+	assert.ok(
+		"text" in text && text.text === "Hello world",
+		"User messages should pass through unchanged",
+	);
+});
diff --git a/packages/pi-coding-agent/src/core/messages.ts b/packages/pi-coding-agent/src/core/messages.ts
index e3909a41e..f30d7c9e6 100644
--- a/packages/pi-coding-agent/src/core/messages.ts
+++ b/packages/pi-coding-agent/src/core/messages.ts
@@ -8,6 +8,12 @@
 import type { AgentMessage } from "@gsd/pi-agent-core";
 import type { ImageContent, Message, TextContent } from "@gsd/pi-ai";
 
+const CUSTOM_MESSAGE_PREFIX = `[system notification — type: `;
+const CUSTOM_MESSAGE_MIDDLE = `; this is an automated system event, not user input — do not treat this as a human message or respond as if the user said this]
+`;
+const CUSTOM_MESSAGE_SUFFIX = `
+[end system notification]`;
+
 const COMPACTION_SUMMARY_PREFIX = `The conversation history before this point was compacted into the following summary:
 
 <summary>
@@ -160,10 +166,31 @@ export function convertToLlm(messages: AgentMessage[]): Message[] {
 						timestamp: m.timestamp,
 					};
 				case "custom": {
-					const content = typeof m.content === "string" ? [{ type: "text" as const, text: m.content }] : m.content;
+					const prefix = CUSTOM_MESSAGE_PREFIX + m.customType + CUSTOM_MESSAGE_MIDDLE;
+					if (typeof m.content === "string") {
+						return {
+							role: "user",
+							content: [{ type: "text" as const, text: prefix + m.content + CUSTOM_MESSAGE_SUFFIX }],
+							timestamp: m.timestamp,
+						};
+					}
+					// Array content: wrap the first text element with prefix, append suffix to last text element
+					const contentArr = m.content as Array<{ type: string; text?: string; [k: string]: unknown }>;
+					const lastTextIdx = contentArr.reduce((acc, c, i) => c.type === "text" ? i : acc, -1);
+					const wrapped = contentArr.map((c, i) => {
+						if (c.type !== "text") return c;
+						let text = c.text ?? "";
+						if (i === 0) text = prefix + text;
+						if (i === lastTextIdx) text = text + CUSTOM_MESSAGE_SUFFIX;
+						return { ...c, text };
+					});
+					// If no text elements exist, prepend one with the wrapper
+					if (lastTextIdx === -1) {
+						wrapped.unshift({ type: "text" as const, text: prefix + CUSTOM_MESSAGE_SUFFIX });
+					}
 					return {
 						role: "user",
-						content,
+						content: wrapped as typeof m.content,
 						timestamp: m.timestamp,
 					};
 				}
diff --git a/packages/pi-coding-agent/src/core/model-resolver.ts b/packages/pi-coding-agent/src/core/model-resolver.ts
index bfe6ee86f..6d07b940b 100644
--- a/packages/pi-coding-agent/src/core/model-resolver.ts
+++ b/packages/pi-coding-agent/src/core/model-resolver.ts
@@ -37,6 +37,7 @@ const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"opencode-go": "kimi-k2.5",
 	"kimi-coding": "kimi-k2-thinking",
 	"alibaba-coding-plan": "qwen3.5-plus",
+	ollama: "llama3.1:8b",
 	"ollama-cloud": "qwen3:32b",
 };
 
diff --git a/packages/pi-coding-agent/src/core/resource-loader.ts b/packages/pi-coding-agent/src/core/resource-loader.ts
index 6eb040829..eed291f46 100644
--- a/packages/pi-coding-agent/src/core/resource-loader.ts
+++ b/packages/pi-coding-agent/src/core/resource-loader.ts
@@ -129,6 +129,12 @@ export interface DefaultResourceLoaderOptions {
 	appendSystemPrompt?: string;
 	/** Names of bundled extensions (used to identify built-in extensions in conflict detection). */
 	bundledExtensionNames?: Set<string>;
+	/**
+	 * Transform extension paths before loading. Receives the merged list of all
+	 * discovered extension paths and returns a (possibly reordered/filtered) list.
+	 * Use this to apply dependency sorting or registry-based filtering.
+	 */
+	extensionPathsTransform?: (paths: string[]) => { paths: string[]; diagnostics?: string[] };
 	extensionsOverride?: (base: LoadExtensionsResult) => LoadExtensionsResult;
 	skillsOverride?: (base: { skills: Skill[]; diagnostics: ResourceDiagnostic[] }) => {
 		skills: Skill[];
@@ -167,6 +173,7 @@ export class DefaultResourceLoader implements ResourceLoader {
 	private systemPromptSource?: string;
 	private appendSystemPromptSource?: string;
 	private bundledExtensionNames: Set<string>;
+	private extensionPathsTransform?: (paths: string[]) => { paths: string[]; diagnostics?: string[] };
 	private extensionsOverride?: (base: LoadExtensionsResult) => LoadExtensionsResult;
 	private skillsOverride?: (base: { skills: Skill[]; diagnostics: ResourceDiagnostic[] }) => {
 		skills: Skill[];
@@ -223,6 +230,7 @@ export class DefaultResourceLoader implements ResourceLoader {
 		this.systemPromptSource = options.systemPrompt;
 		this.appendSystemPromptSource = options.appendSystemPrompt;
 		this.bundledExtensionNames = options.bundledExtensionNames ?? new Set();
+		this.extensionPathsTransform = options.extensionPathsTransform;
 		this.extensionsOverride = options.extensionsOverride;
 		this.skillsOverride = options.skillsOverride;
 		this.promptsOverride = options.promptsOverride;
@@ -378,10 +386,21 @@ export class DefaultResourceLoader implements ResourceLoader {
 		const cliEnabledPrompts = getEnabledPaths(cliExtensionPaths.prompts);
 		const cliEnabledThemes = getEnabledPaths(cliExtensionPaths.themes);
 
-		const extensionPaths = this.noExtensions
+		let extensionPaths = this.noExtensions
 			? cliEnabledExtensions
 			: this.mergePaths(cliEnabledExtensions, enabledExtensions);
 
+		// Apply path transform (dependency sorting, registry filtering) if provided
+		if (this.extensionPathsTransform) {
+			const transformed = this.extensionPathsTransform(extensionPaths);
+			extensionPaths = transformed.paths;
+			if (transformed.diagnostics?.length) {
+				for (const msg of transformed.diagnostics) {
+					process.stderr.write(`[extensions] ${msg}\n`);
+				}
+			}
+		}
+
 		const extensionsResult = await loadExtensions(extensionPaths, this.cwd, this.eventBus);
 		const inlineExtensions = await this.loadExtensionFactories(extensionsResult.runtime);
 		extensionsResult.extensions.push(...inlineExtensions.extensions);
diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts
new file mode 100644
index 000000000..04a0aba09
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts
@@ -0,0 +1,255 @@
+/**
+ * RetryHandler tests — long-context entitlement 429 error handling (#2803)
+ *
+ * Verifies that "Extra usage is required for long context requests" errors
+ * are classified as quota_exhausted (not rate_limit) and trigger a model
+ * downgrade from [1m] to base when no cross-provider fallback exists.
+ */
+
+import { describe, it, beforeEach, mock, type Mock } from "node:test";
+import assert from "node:assert/strict";
+import { RetryHandler, type RetryHandlerDeps } from "./retry-handler.js";
+import type { Api, AssistantMessage, Model } from "@gsd/pi-ai";
+import type { FallbackResolver } from "./fallback-resolver.js";
+import type { ModelRegistry } from "./model-registry.js";
+import type { SettingsManager } from "./settings-manager.js";
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+function createMockModel(provider: string, id: string): Model<Api> {
+	return {
+		id,
+		name: id,
+		api: "anthropic" as Api,
+		provider,
+		baseUrl: "https://api.anthropic.com",
+		reasoning: false,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 1_000_000,
+		maxTokens: 16384,
+	} as Model<Api>;
+}
+
+function errorMessage(msg: string): AssistantMessage {
+	return {
+		role: "assistant",
+		content: [],
+		api: "anthropic-messages",
+		provider: "anthropic",
+		model: "claude-opus-4-6[1m]",
+		usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+		stopReason: "error",
+		errorMessage: msg,
+		timestamp: Date.now(),
+	} as AssistantMessage;
+}
+
+interface MockDeps {
+	deps: RetryHandlerDeps;
+	emittedEvents: Array<Record<string, any>>;
+	continueFn: Mock<() => Promise<void>>;
+	onModelChangeFn: Mock<(model: Model<any>) => void>;
+	markUsageLimitReached: Mock<(...args: any[]) => boolean>;
+	findFallback: Mock<(...args: any[]) => Promise<any>>;
+	findModel: Mock<(provider: string, modelId: string) => Model<Api> | undefined>;
+}
+
+function createMockDeps(overrides?: {
+	model?: Model<Api>;
+	retryEnabled?: boolean;
+	markUsageLimitReachedResult?: boolean;
+	fallbackResult?: any;
+	findModelResult?: (provider: string, modelId: string) => Model<Api> | undefined;
+}): MockDeps {
+	const model = overrides?.model ?? createMockModel("anthropic", "claude-opus-4-6[1m]");
+	const emittedEvents: Array<Record<string, any>> = [];
+	const continueFn = mock.fn(async () => {});
+	const onModelChangeFn = mock.fn((_model: Model<any>) => {});
+	const markUsageLimitReached = mock.fn(
+		() => overrides?.markUsageLimitReachedResult ?? false,
+	);
+	const findFallback = mock.fn(async () => overrides?.fallbackResult ?? null);
+	const findModel = mock.fn(
+		overrides?.findModelResult ?? ((_provider: string, _modelId: string) => undefined),
+	);
+
+	const messages: Array<{ role: string } & Record<string, any>> = [];
+
+	const deps: RetryHandlerDeps = {
+		agent: {
+			continue: continueFn,
+			state: { messages },
+			setModel: mock.fn(),
+			replaceMessages: mock.fn((newMessages: any[]) => {
+				messages.length = 0;
+				messages.push(...newMessages);
+			}),
+		} as any,
+		settingsManager: {
+			getRetryEnabled: () => overrides?.retryEnabled ?? true,
+			getRetrySettings: () => ({
+				enabled: overrides?.retryEnabled ?? true,
+				maxRetries: 5,
+				baseDelayMs: 1000,
+				maxDelayMs: 30000,
+			}),
+		} as unknown as SettingsManager,
+		modelRegistry: {
+			authStorage: {
+				markUsageLimitReached,
+			},
+			find: findModel,
+		} as unknown as ModelRegistry,
+		fallbackResolver: {
+			findFallback,
+		} as unknown as FallbackResolver,
+		getModel: () => model,
+		getSessionId: () => "test-session",
+		emit: (event: any) => emittedEvents.push(event),
+		onModelChange: onModelChangeFn,
+	};
+
+	return { deps, emittedEvents, continueFn, onModelChangeFn, markUsageLimitReached, findFallback, findModel };
+}
+
+// ─── _classifyErrorType (tested via handleRetryableError behavior) ──────────
+
+describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
+
+	describe("error classification", () => {
+		it("classifies 'Extra usage is required for long context requests' as quota_exhausted, not rate_limit", async () => {
+			// When the error is classified as quota_exhausted AND no alternate credentials
+			// AND no fallback, the handler should emit fallback_chain_exhausted and stop.
+			// If misclassified as rate_limit, it would enter the backoff loop instead.
+			const { deps, emittedEvents, findModel } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false, // no alternate credentials
+				fallbackResult: null, // no cross-provider fallback
+				findModelResult: () => undefined, // no base model either
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage(
+				'429 {"type":"error","error":{"type":"rate_limit_error","message":"Extra usage is required for long context requests."}}'
+			);
+
+			const result = await handler.handleRetryableError(msg);
+
+			// Should NOT retry (would be true if misclassified as rate_limit entering backoff)
+			assert.equal(result, false);
+
+			// Should emit fallback_chain_exhausted (quota_exhausted path), NOT auto_retry_start (backoff path)
+			const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
+			assert.ok(chainExhausted, "Expected fallback_chain_exhausted event for entitlement error");
+
+			const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
+			assert.equal(retryStart, undefined, "Should NOT emit auto_retry_start for entitlement error");
+		});
+
+		it("still classifies regular 429 rate limits as rate_limit", async () => {
+			// A normal "rate limit" 429 should still be classified as rate_limit
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("429 Too Many Requests");
+
+			const result = await handler.handleRetryableError(msg);
+
+			// Should enter the backoff loop (rate_limit path, not quota_exhausted)
+			assert.equal(result, true);
+
+			const retryStart = emittedEvents.find((e) => e.type === "auto_retry_start");
+			assert.ok(retryStart, "Regular 429 should enter backoff retry");
+		});
+	});
+
+	describe("long-context model downgrade", () => {
+		it("downgrades from [1m] to base model when entitlement error and no fallback", async () => {
+			const baseModel = createMockModel("anthropic", "claude-opus-4-6");
+			const { deps, emittedEvents, onModelChangeFn, continueFn } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+				findModelResult: (provider: string, modelId: string) => {
+					if (provider === "anthropic" && modelId === "claude-opus-4-6") return baseModel;
+					return undefined;
+				},
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, true, "Should retry after downgrade");
+
+			// Should have called setModel with the base model
+			const setModelCalls = (deps.agent.setModel as any).mock.calls;
+			assert.equal(setModelCalls.length, 1);
+			assert.equal(setModelCalls[0].arguments[0].id, "claude-opus-4-6");
+
+			// Should have notified about model change
+			assert.equal(onModelChangeFn.mock.calls.length, 1);
+
+			// Should emit a fallback_provider_switch event indicating downgrade
+			const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
+			assert.ok(switchEvent, "Expected fallback_provider_switch event for downgrade");
+			assert.ok(switchEvent!.reason.includes("long context downgrade"), `reason should mention downgrade: ${switchEvent!.reason}`);
+		});
+
+		it("emits fallback_chain_exhausted when base model is also unavailable", async () => {
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6[1m]"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+				findModelResult: () => undefined, // base model not found
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, false);
+			const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
+			assert.ok(chainExhausted, "Expected fallback_chain_exhausted when base model unavailable");
+		});
+
+		it("does not attempt downgrade for non-[1m] models", async () => {
+			// When a regular model (no [1m] suffix) gets a quota_exhausted error
+			// with no fallback, it should just stop — no downgrade attempt.
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("anthropic", "claude-opus-4-6"),
+				markUsageLimitReachedResult: false,
+				fallbackResult: null,
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+
+			const result = await handler.handleRetryableError(msg);
+
+			assert.equal(result, false);
+			const chainExhausted = emittedEvents.find((e) => e.type === "fallback_chain_exhausted");
+			assert.ok(chainExhausted);
+
+			// No downgrade switch should occur
+			const switchEvent = emittedEvents.find((e) => e.type === "fallback_provider_switch");
+			assert.equal(switchEvent, undefined, "Should not switch for non-[1m] models");
+		});
+	});
+
+	describe("isRetryableError", () => {
+		it("considers long-context entitlement error as retryable", () => {
+			const { deps } = createMockDeps();
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage("Extra usage is required for long context requests.");
+			assert.equal(handler.isRetryableError(msg), true);
+		});
+	});
+});
diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts
index 9bdeac8f6..3e1f50daf 100644
--- a/packages/pi-coding-agent/src/core/retry-handler.ts
+++ b/packages/pi-coding-agent/src/core/retry-handler.ts
@@ -107,7 +107,7 @@ export class RetryHandler {
 		if (isContextOverflow(message, contextWindow)) return false;
 
 		const err = message.errorMessage;
-		return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off/i.test(
+		return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay|network.?(?:is\s+)?unavailable|credentials.*expired|temporarily backed off|extra usage is required/i.test(
 			err,
 		);
 	}
@@ -202,6 +202,10 @@ export class RetryHandler {
 
 				// No fallback available either
 				if (errorType === "quota_exhausted") {
+					// Try long-context model downgrade ([1m] → base) before giving up
+					const downgraded = this._tryLongContextDowngrade(message);
+					if (downgraded) return true;
+
 					this._deps.emit({
 						type: "fallback_chain_exhausted",
 						reason: `All providers exhausted for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`,
@@ -343,12 +347,59 @@ export class RetryHandler {
 	 */
 	private _classifyErrorType(errorMessage: string): UsageLimitErrorType {
 		const err = errorMessage.toLowerCase();
+		// Long-context entitlement errors are billing gates, not transient rate limits.
+		// Must be checked before the generic 429/rate_limit regex.
+		if (/extra usage is required|long context required/i.test(err)) return "quota_exhausted";
 		if (/quota|billing|exceeded.*limit|usage.*limit/i.test(err)) return "quota_exhausted";
 		if (/rate.?limit|too many requests|429/i.test(err)) return "rate_limit";
 		if (/500|502|503|504|server.?error|internal.?error|service.?unavailable/i.test(err)) return "server_error";
 		return "unknown";
 	}
 
+	/**
+	 * Attempt to downgrade a long-context model (e.g. claude-opus-4-6[1m]) to its
+	 * base model (claude-opus-4-6) when the account lacks the long-context billing
+	 * entitlement. Returns true if the downgrade was initiated.
+	 */
+	private _tryLongContextDowngrade(message: AssistantMessage): boolean {
+		const currentModel = this._deps.getModel();
+		if (!currentModel) return false;
+
+		// Only attempt downgrade for [1m] (or similar long-context) model IDs
+		const match = currentModel.id.match(/^(.+)\[\d+m\]$/);
+		if (!match) return false;
+
+		const baseModelId = match[1];
+		const baseModel = this._deps.modelRegistry.find(currentModel.provider, baseModelId);
+		if (!baseModel) return false;
+
+		const previousId = currentModel.id;
+		this._deps.agent.setModel(baseModel);
+		this._deps.onModelChange(baseModel);
+		this._removeLastAssistantError();
+
+		this._deps.emit({
+			type: "fallback_provider_switch",
+			from: `${currentModel.provider}/${previousId}`,
+			to: `${baseModel.provider}/${baseModel.id}`,
+			reason: `long context downgrade: ${previousId} → ${baseModel.id}`,
+		});
+
+		this._deps.emit({
+			type: "auto_retry_start",
+			attempt: this._retryAttempt + 1,
+			maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries,
+			delayMs: 0,
+			errorMessage: `${message.errorMessage} (long context downgrade)`,
+		});
+
+		setTimeout(() => {
+			this._deps.agent.continue().catch(() => {});
+		}, 0);
+
+		return true;
+	}
+
 	/** Remove the last assistant error message from agent state */
 	private _removeLastAssistantError(): void {
 		const messages = this._deps.agent.state.messages;
diff --git a/packages/pi-coding-agent/src/core/tools/hashline-read.ts b/packages/pi-coding-agent/src/core/tools/hashline-read.ts
index fc2da81eb..f7d944d14 100644
--- a/packages/pi-coding-agent/src/core/tools/hashline-read.ts
+++ b/packages/pi-coding-agent/src/core/tools/hashline-read.ts
@@ -123,12 +123,15 @@ export function createHashlineReadTool(cwd: string, options?: HashlineReadToolOp
 								const allLines = textContent.split("\n");
 								const totalFileLines = allLines.length;
 
-								const startLine = offset ? Math.max(0, offset - 1) : 0;
-								const startLineDisplay = startLine + 1;
+								let startLine = offset ? Math.max(0, offset - 1) : 0;
 
+								// Clamp offset to file bounds instead of throwing (#3007)
+								let offsetClamped = false;
 								if (startLine >= allLines.length) {
-									throw new Error(`Offset ${offset} is beyond end of file (${allLines.length} lines total)`);
+									startLine = Math.max(0, allLines.length - 1);
+									offsetClamped = true;
 								}
+								const startLineDisplay = startLine + 1;
 
 								let selectedContent: string;
 								let userLimitedLines: number | undefined;
@@ -172,6 +175,11 @@ export function createHashlineReadTool(cwd: string, options?: HashlineReadToolOp
 									outputText = formatHashLines(truncation.content, startLineDisplay);
 								}
 
+								// Prepend clamp notice so the agent knows offset was adjusted
+								if (offsetClamped) {
+									outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`;
+								}
+
 								content = [{ type: "text", text: outputText }];
 							}
 
diff --git a/packages/pi-coding-agent/src/core/tools/read.ts b/packages/pi-coding-agent/src/core/tools/read.ts
index c2f23e60a..309e43b57 100644
--- a/packages/pi-coding-agent/src/core/tools/read.ts
+++ b/packages/pi-coding-agent/src/core/tools/read.ts
@@ -133,13 +133,18 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo
 								const totalFileLines = allLines.length;
 
 								// Apply offset if specified (1-indexed to 0-indexed)
-								const startLine = offset ? Math.max(0, offset - 1) : 0;
-								const startLineDisplay = startLine + 1; // For display (1-indexed)
+								let startLine = offset ? Math.max(0, offset - 1) : 0;
 
-								// Check if offset is out of bounds
+								// Clamp offset to file bounds instead of throwing (#3007).
+								// When an agent requests offset:30 on a 13-line file, return
+								// the last line with a notice rather than an error that
+								// propagates as invalid JSON downstream.
+								let offsetClamped = false;
 								if (startLine >= allLines.length) {
-									throw new Error(`Offset ${offset} is beyond end of file (${allLines.length} lines total)`);
+									startLine = Math.max(0, allLines.length - 1);
+									offsetClamped = true;
 								}
+								const startLineDisplay = startLine + 1; // For display (1-indexed)
 
 								// If limit is specified by user, use it; otherwise we'll let truncateHead decide
 								let selectedContent: string;
@@ -187,6 +192,11 @@ export function createReadTool(cwd: string, options?: ReadToolOptions): AgentToo
 									outputText = truncation.content;
 								}
 
+								// Prepend clamp notice so the agent knows offset was adjusted
+								if (offsetClamped) {
+									outputText = `[Offset ${offset} beyond end of file (${totalFileLines} lines). Clamped to line ${startLineDisplay}.]\n\n${outputText}`;
+								}
+
 								content = [{ type: "text", text: outputText }];
 							}
 
diff --git a/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts b/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts
new file mode 100644
index 000000000..a7929a1dd
--- /dev/null
+++ b/packages/pi-coding-agent/src/core/tools/spawn-shell-windows.test.ts
@@ -0,0 +1,92 @@
+/**
+ * spawn-shell-windows.test.ts — Regression test for Windows spawn ENOENT/EINVAL.
+ *
+ * On Windows, npm/npx/tsc and other tools are installed as .cmd batch scripts.
+ * Node's `spawn()` without `shell: true` cannot execute .cmd files, resulting
+ * in ENOENT or EINVAL errors. Every spawn site that may invoke a user-installed
+ * binary (not `node` or a shell like `sh`/`bash`/`cmd`) must include
+ * `shell: process.platform === "win32"` so the call is resolved through cmd.exe
+ * on Windows while remaining a direct exec on POSIX.
+ *
+ * This test structurally scans all spawn sites and verifies the guard is present.
+ *
+ * Fixes: gsd-build/gsd-2#2854
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname, relative } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const coreDir = join(__dirname, "..");
+
+/**
+ * Files that call `spawn()` with a user-facing binary (not `node`, `sh`, `bash`,
+ * or `cmd`) and therefore need the Windows shell guard.
+ *
+ * If a file spawns only hardcoded system binaries (like `node` in rpc-client.ts),
+ * it does not need the guard and should NOT appear here.
+ */
+const SPAWN_FILES_NEEDING_SHELL_GUARD = [
+	// Extension's GSD client — spawns the `gsd` binary which is a .cmd on Windows
+	join(coreDir, "..", "..", "..", "vscode-extension", "src", "gsd-client.ts"),
+	// exec.ts — used by extensions to run arbitrary commands
+	join(coreDir, "exec.ts"),
+	// LSP index — spawns project-type commands (tsc, cargo, etc.)
+	join(coreDir, "lsp", "index.ts"),
+	// LSP client — spawns LSP server binaries (npx, etc.)
+	join(coreDir, "lsp", "client.ts"),
+	// LSP mux — spawns lspmux binary
+	join(coreDir, "lsp", "lspmux.ts"),
+	// Package manager — spawns npm/yarn/pnpm
+	join(coreDir, "package-manager.ts"),
+];
+
+test("all spawn sites that invoke user-facing binaries include shell: process.platform === 'win32'", () => {
+	const failures: string[] = [];
+
+	for (const file of SPAWN_FILES_NEEDING_SHELL_GUARD) {
+		let content: string;
+		try {
+			content = readFileSync(file, "utf-8");
+		} catch {
+			// File may not exist in this checkout — skip
+			continue;
+		}
+
+		const lines = content.split("\n");
+
+		// Find all spawn(..., { ... }) call sites and check each one
+		// for the presence of `shell: process.platform === "win32"` within
+		// 5 lines after the spawn call.
+		for (let i = 0; i < lines.length; i++) {
+			const line = lines[i]!;
+			// Skip comments
+			if (line.trim().startsWith("//") || line.trim().startsWith("*")) continue;
+
+			// Detect a spawn() call
+			if (/\bspawn\(/.test(line)) {
+				// Look ahead up to 8 lines for the shell guard
+				const lookahead = lines.slice(i, i + 8).join("\n");
+				const hasShellGuard =
+					/shell:\s*process\.platform\s*===\s*["']win32["']/.test(lookahead);
+
+				if (!hasShellGuard) {
+					const relPath = relative(join(coreDir, "..", ".."), file);
+					failures.push(`${relPath}:${i + 1}`);
+				}
+			}
+		}
+	}
+
+	assert.deepEqual(
+		failures,
+		[],
+		`The following spawn sites are missing 'shell: process.platform === "win32"':\n` +
+		failures.map(f => `  - ${f}`).join("\n") +
+		`\nOn Windows, .cmd wrapper scripts (npm, npx, tsc, gsd) require shell ` +
+		`resolution. Without this guard, spawn fails with ENOENT or EINVAL.`,
+	);
+});
diff --git a/packages/pi-coding-agent/src/index.ts b/packages/pi-coding-agent/src/index.ts
index 12327173b..9b0a50fc7 100644
--- a/packages/pi-coding-agent/src/index.ts
+++ b/packages/pi-coding-agent/src/index.ts
@@ -68,6 +68,7 @@ export type {
 	Extension,
 	ExtensionActions,
 	ExtensionAPI,
+	ExtensionManifest,
 	ExtensionCommandContext,
 	ExtensionCommandContextActions,
 	ExtensionContext,
@@ -119,6 +120,8 @@ export type {
 	ToolCallEvent,
 	ToolDefinition,
 	ToolInfo,
+	SortResult,
+	SortWarning,
 	ToolRenderResultOptions,
 	ToolResultEvent,
 	TurnEndEvent,
@@ -137,6 +140,9 @@ export {
 	importExtensionModule,
 	isToolCallEventType,
 	isToolResultEventType,
+	readManifest,
+	readManifestFromEntryPath,
+	sortExtensionPaths,
 	wrapRegisteredTool,
 	wrapRegisteredTools,
 	wrapToolsWithExtensions,
diff --git a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
index ebe9231ed..aeb2be064 100644
--- a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
@@ -337,5 +337,12 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 			host.showError(event.reason);
 			host.ui.requestRender();
 			break;
+
+		case "image_overflow_recovery":
+			host.showStatus(
+				`Removed ${event.strippedCount} older image(s) to comply with API limits. Retrying...`,
+			);
+			host.ui.requestRender();
+			break;
 	}
 }
diff --git a/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts b/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts
index 84f78f950..4dda9b0c9 100644
--- a/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/remote-terminal.ts
@@ -49,6 +49,12 @@ export class RemoteTerminal implements Terminal {
 		return this._rows;
 	}
 
+	get isTTY(): boolean {
+		// RemoteTerminal renders to a browser-based terminal emulator via
+		// the RPC bridge — it behaves like a real TTY for rendering purposes.
+		return true;
+	}
+
 	get kittyProtocolActive(): boolean {
 		return false;
 	}
diff --git a/packages/pi-tui/src/terminal.ts b/packages/pi-tui/src/terminal.ts
index 52bb27ad3..ff84a6283 100644
--- a/packages/pi-tui/src/terminal.ts
+++ b/packages/pi-tui/src/terminal.ts
@@ -9,6 +9,9 @@ const cjsRequire = createRequire(import.meta.url);
  * Minimal terminal interface for TUI
  */
 export interface Terminal {
+	// Whether stdout is a real TTY (false for pipes, e.g. RPC bridge processes)
+	readonly isTTY: boolean;
+
 	// Start the terminal with input and resize handlers
 	start(onInput: (data: string) => void, onResize: () => void): void;
 
@@ -63,11 +66,22 @@ export class ProcessTerminal implements Terminal {
 	private stdinDataHandler?: (data: string) => void;
 	private writeLogPath = process.env.PI_TUI_WRITE_LOG || "";
 
+	get isTTY(): boolean {
+		return !!process.stdout.isTTY;
+	}
+
 	get kittyProtocolActive(): boolean {
 		return this._kittyProtocolActive;
 	}
 
 	start(onInput: (data: string) => void, onResize: () => void): void {
+		// Non-TTY stdout (pipe) — skip TUI initialization entirely.
+		// RPC bridge processes communicate via JSON, not terminal escape codes.
+		// Without this guard, the render loop burns 500%+ CPU. (issue #3095)
+		if (!this.isTTY) {
+			return;
+		}
+
 		this.inputHandler = onInput;
 		this.resizeHandler = onResize;
 
diff --git a/packages/pi-tui/src/tui.ts b/packages/pi-tui/src/tui.ts
index d0154b0ce..8e3db6f05 100644
--- a/packages/pi-tui/src/tui.ts
+++ b/packages/pi-tui/src/tui.ts
@@ -399,6 +399,12 @@ export class TUI extends Container {
 
 	start(): void {
 		this.stopped = false;
+		// Non-TTY stdout (pipe) — skip TUI entirely to avoid burning CPU.
+		// RPC bridge processes have piped stdio; rendering ANSI escape codes
+		// to a pipe is pure waste and causes a runaway render loop. (issue #3095)
+		if (!this.terminal.isTTY) {
+			return;
+		}
 		this.terminal.start(
 			(data) => this.handleInput(data),
 			() => this.requestRender(),
@@ -458,6 +464,8 @@ export class TUI extends Container {
 	}
 
 	requestRender(force = false): void {
+		// Skip rendering on non-TTY stdout to prevent CPU burn (issue #3095)
+		if (!this.terminal.isTTY) return;
 		if (force) {
 			this.previousLines = [];
 			this.previousWidth = -1; // -1 triggers widthChanged, forcing a full clear
diff --git a/scripts/ensure-workspace-builds.cjs b/scripts/ensure-workspace-builds.cjs
index 44f7ea2c4..10a6638e4 100644
--- a/scripts/ensure-workspace-builds.cjs
+++ b/scripts/ensure-workspace-builds.cjs
@@ -37,6 +37,48 @@ function newestSrcMtime(dir) {
   return newest
 }
 
+/**
+ * Detects workspace packages whose dist/ is missing or stale.
+ *
+ * Missing dist/index.js is always reported (the package won't work at all).
+ *
+ * Staleness (src/ newer than dist/) is ONLY checked when a .git directory
+ * exists at root — indicating a development clone. In npm tarball installs,
+ * file timestamps are unreliable (npm sets all files to a canonical date,
+ * but extraction ordering can cause src/ to appear 1-2 seconds newer than
+ * dist/). Attempting to rebuild in that scenario is dangerous: devDependencies
+ * (including TypeScript) are not installed, and any globally-installed tsc
+ * may produce broken output that overwrites the known-good dist/.
+ *
+ * @param {string} root    Project root directory
+ * @param {string[]} packages  Package directory names to check
+ * @returns {string[]} Package names that need rebuilding
+ */
+function detectStalePackages(root, packages) {
+  const packagesDir = join(root, 'packages')
+  const isDevClone = existsSync(join(root, '.git'))
+
+  const stale = []
+  for (const pkg of packages) {
+    const distIndex = join(packagesDir, pkg, 'dist', 'index.js')
+    if (!existsSync(distIndex)) {
+      stale.push(pkg)
+      continue
+    }
+    // Only check src vs dist timestamps in development clones.
+    // In npm tarball installs, timestamps are unreliable and rebuilding
+    // without devDependencies can corrupt the pre-built dist/ (#2877).
+    if (isDevClone) {
+      const distMtime = statSync(distIndex).mtimeMs
+      const srcMtime = newestSrcMtime(join(packagesDir, pkg, 'src'))
+      if (srcMtime > distMtime) {
+        stale.push(pkg)
+      }
+    }
+  }
+  return stale
+}
+
 if (require.main === module) {
   const root = resolve(__dirname, '..')
   const packagesDir = join(root, 'packages')
@@ -57,19 +99,7 @@ if (require.main === module) {
     'pi-coding-agent',
   ]
 
-  const stale = []
-  for (const pkg of WORKSPACE_PACKAGES) {
-    const distIndex = join(packagesDir, pkg, 'dist', 'index.js')
-    if (!existsSync(distIndex)) {
-      stale.push(pkg)
-      continue
-    }
-    const distMtime = statSync(distIndex).mtimeMs
-    const srcMtime = newestSrcMtime(join(packagesDir, pkg, 'src'))
-    if (srcMtime > distMtime) {
-      stale.push(pkg)
-    }
-  }
+  const stale = detectStalePackages(root, WORKSPACE_PACKAGES)
 
   if (stale.length === 0) process.exit(0)
 
@@ -78,6 +108,7 @@ if (require.main === module) {
   for (const pkg of stale) {
     const pkgDir = join(packagesDir, pkg)
     try {
+      // execSync is safe here: the command is a hardcoded string, not user input
       execSync('npm run build', { cwd: pkgDir, stdio: 'pipe' })
       process.stderr.write(`  ✓ ${pkg}\n`)
     } catch (err) {
@@ -87,4 +118,4 @@ if (require.main === module) {
   }
 }
 
-module.exports = { newestSrcMtime }
+module.exports = { newestSrcMtime, detectStalePackages }
diff --git a/src/cli.ts b/src/cli.ts
index a5b255fa9..9df7baf4c 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -16,7 +16,8 @@ import { agentDir, sessionsDir, authFilePath } from './app-paths.js'
 import { initResources, buildResourceLoader, getNewerManagedResourceVersion } from './resource-loader.js'
 import { ensureManagedTools } from './tool-bootstrap.js'
 import { loadStoredEnvKeys } from './wizard.js'
-import { getPiDefaultModelAndProvider, migratePiCredentials } from './pi-migration.js'
+import { migratePiCredentials } from './pi-migration.js'
+import { validateConfiguredModel } from './startup-model-validation.js'
 import { shouldRunOnboarding, runOnboarding } from './onboarding.js'
 import chalk from 'chalk'
 import { checkForUpdates } from './update-check.js'
@@ -170,6 +171,7 @@ const hasSubcommand = cliFlags.messages.length > 0
 if (!process.stdin.isTTY && !isPrintMode && !hasSubcommand && !cliFlags.listModels && !cliFlags.web) {
   process.stderr.write('[gsd] Error: Interactive mode requires a terminal (TTY).\n')
   process.stderr.write('[gsd] Non-interactive alternatives:\n')
+  process.stderr.write('[gsd]   gsd auto                       Auto-mode (pipeable, no TUI)\n')
   process.stderr.write('[gsd]   gsd --print "your message"     Single-shot prompt\n')
   process.stderr.write('[gsd]   gsd --mode rpc                 JSON-RPC over stdin/stdout\n')
   process.stderr.write('[gsd]   gsd --mode mcp                 MCP server over stdin/stdout\n')
@@ -300,6 +302,23 @@ if (cliFlags.messages[0] === 'headless') {
   process.exit(0)
 }
 
+// `gsd auto [args...]` — shorthand for `gsd headless auto [args...]` (#2732)
+// Without this, `gsd auto` falls through to the interactive TUI which hangs
+// when stdin/stdout are piped (non-TTY environments).
+if (cliFlags.messages[0] === 'auto') {
+  await ensureRtkBootstrap()
+  const { runHeadless, parseHeadlessArgs } = await import('./headless.js')
+  // Rewrite argv so parseHeadlessArgs sees: [node, gsd, headless, auto, ...rest]
+  const rewrittenArgv = [
+    process.argv[0],
+    process.argv[1],
+    'headless',
+    ...cliFlags.messages,   // ['auto', ...extra args]
+  ]
+  await runHeadless(parseHeadlessArgs(rewrittenArgv))
+  process.exit(0)
+}
+
 // Pi's tool bootstrap can mis-detect already-installed fd/rg on some systems
 // because spawnSync(..., ["--version"]) returns EPERM despite a zero exit code.
 // Provision local managed binaries first so Pi sees them without probing PATH.
@@ -391,42 +410,6 @@ if (cliFlags.listModels !== undefined) {
   process.exit(0)
 }
 
-// Validate configured model on startup — catches stale settings from prior installs
-// (e.g. grok-2 which no longer exists) and fresh installs with no settings.
-// Only resets the default when the configured model no longer exists in the registry;
-// never overwrites a valid user choice.
-const configuredProvider = settingsManager.getDefaultProvider()
-const configuredModel = settingsManager.getDefaultModel()
-const allModels = modelRegistry.getAll()
-const availableModels = modelRegistry.getAvailable()
-const configuredExists = configuredProvider && configuredModel &&
-  allModels.some((m) => m.provider === configuredProvider && m.id === configuredModel)
-const configuredAvailable = configuredProvider && configuredModel &&
-  availableModels.some((m) => m.provider === configuredProvider && m.id === configuredModel)
-
-if (!configuredModel || !configuredExists) {
-  // Model not configured at all, or removed from registry — pick a fallback.
-  // Only fires when the model is genuinely unknown (not just temporarily unavailable).
-  const piDefault = getPiDefaultModelAndProvider()
-  const preferred =
-    (piDefault
-      ? availableModels.find((m) => m.provider === piDefault.provider && m.id === piDefault.model)
-      : undefined) ||
-    availableModels.find((m) => m.provider === 'openai' && m.id === 'gpt-5.4') ||
-    availableModels.find((m) => m.provider === 'openai') ||
-    availableModels.find((m) => m.provider === 'anthropic' && m.id === 'claude-opus-4-6') ||
-    availableModels.find((m) => m.provider === 'anthropic' && m.id.includes('opus')) ||
-    availableModels.find((m) => m.provider === 'anthropic') ||
-    availableModels[0]
-  if (preferred) {
-    settingsManager.setDefaultModelAndProvider(preferred.provider, preferred.id)
-  }
-}
-
-if (settingsManager.getDefaultThinkingLevel() !== 'off' && !configuredExists) {
-  settingsManager.setDefaultThinkingLevel('off')
-}
-
 // GSD always uses quiet startup — the gsd extension renders its own branded header
 if (!settingsManager.getQuietStartup()) {
   settingsManager.setQuietStartup(true)
@@ -477,6 +460,11 @@ if (isPrintMode) {
   })
   markStartup('createAgentSession')
 
+  // Validate configured model AFTER extensions have registered their models (#2626).
+  // Before this, extension-provided models (e.g. claude-code/*) were not yet in the
+  // registry, causing the user's valid choice to be silently overwritten.
+  validateConfiguredModel(modelRegistry, settingsManager)
+
   if (extensionsResult.errors.length > 0) {
     for (const err of extensionsResult.errors) {
       // Downgrade conflicts with built-in tools to warnings (#1347)
@@ -565,6 +553,20 @@ if (!cliFlags.worktree && !isPrintMode) {
   } catch { /* non-fatal */ }
 }
 
+// ---------------------------------------------------------------------------
+// Auto-redirect: `gsd auto` with piped stdout → headless mode (#2732)
+// When stdout is not a TTY (e.g. `gsd auto | cat`, `gsd auto > file`),
+// the TUI cannot render and the process hangs. Redirect to headless mode
+// which handles non-interactive output gracefully.
+// ---------------------------------------------------------------------------
+if (cliFlags.messages[0] === 'auto' && !process.stdout.isTTY) {
+  await ensureRtkBootstrap()
+  const { runHeadless, parseHeadlessArgs } = await import('./headless.js')
+  process.stderr.write('[gsd] stdout is not a terminal — running auto-mode in headless mode.\n')
+  await runHeadless(parseHeadlessArgs(['node', 'gsd', 'headless', ...cliFlags.messages.slice(1)]))
+  process.exit(0)
+}
+
 // ---------------------------------------------------------------------------
 // Interactive mode — normal TTY session
 // ---------------------------------------------------------------------------
@@ -611,6 +613,11 @@ const { session, extensionsResult } = await createAgentSession({
 })
 markStartup('createAgentSession')
 
+// Validate configured model AFTER extensions have registered their models (#2626).
+// Before this, extension-provided models (e.g. claude-code/*) were not yet in the
+// registry, causing the user's valid choice to be silently overwritten.
+validateConfiguredModel(modelRegistry, settingsManager)
+
 if (extensionsResult.errors.length > 0) {
   for (const err of extensionsResult.errors) {
     const isSuperseded = err.error.includes("supersedes");
@@ -662,14 +669,21 @@ if (enabledModelPatterns && enabledModelPatterns.length > 0) {
   }
 }
 
-if (!process.stdin.isTTY) {
-  process.stderr.write('[gsd] Error: Interactive mode requires a terminal (TTY).\n')
+if (!process.stdin.isTTY || !process.stdout.isTTY) {
+  const missing = !process.stdin.isTTY && !process.stdout.isTTY
+    ? 'stdin and stdout are'
+    : !process.stdin.isTTY
+      ? 'stdin is'
+      : 'stdout is'
+  process.stderr.write(`[gsd] Error: Interactive mode requires a terminal (TTY) but ${missing} not a TTY.\n`)
   process.stderr.write('[gsd] Non-interactive alternatives:\n')
+  process.stderr.write('[gsd]   gsd auto                       Auto-mode (pipeable, no TUI)\n')
   process.stderr.write('[gsd]   gsd --print "your message"     Single-shot prompt\n')
   process.stderr.write('[gsd]   gsd --web [path]               Browser-only web mode\n')
   process.stderr.write('[gsd]   gsd --mode rpc                 JSON-RPC over stdin/stdout\n')
   process.stderr.write('[gsd]   gsd --mode mcp                 MCP server over stdin/stdout\n')
   process.stderr.write('[gsd]   gsd --mode text "message"      Text output mode\n')
+  process.stderr.write('[gsd]   gsd headless                   Auto-mode without TUI\n')
   process.exit(1)
 }
 
diff --git a/src/help-text.ts b/src/help-text.ts
index 4976c0591..82f262268 100644
--- a/src/help-text.ts
+++ b/src/help-text.ts
@@ -169,6 +169,7 @@ export function printHelp(version: string): void {
   process.stdout.write('  update                   Update GSD to the latest version\n')
   process.stdout.write('  sessions                 List and resume a past session\n')
   process.stdout.write('  worktree <cmd>           Manage worktrees (list, merge, clean, remove)\n')
+  process.stdout.write('  auto [args]              Run auto-mode without TUI (pipeable)\n')
   process.stdout.write('  headless [cmd] [args]    Run /gsd commands without TUI (default: auto)\n')
   process.stdout.write('\nRun gsd <subcommand> --help for subcommand-specific help.\n')
 }
diff --git a/src/onboarding.ts b/src/onboarding.ts
index 93e39d0f5..6b21d94d6 100644
--- a/src/onboarding.ts
+++ b/src/onboarding.ts
@@ -74,6 +74,7 @@ const LLM_PROVIDER_IDS = [
   'xai',
   'openrouter',
   'mistral',
+  'ollama',
   'ollama-cloud',
   'custom-openai',
 ]
@@ -90,6 +91,7 @@ const OTHER_PROVIDERS = [
   { value: 'xai', label: 'xAI (Grok)' },
   { value: 'openrouter', label: 'OpenRouter' },
   { value: 'mistral', label: 'Mistral' },
+  { value: 'ollama', label: 'Ollama (Local)' },
   { value: 'ollama-cloud', label: 'Ollama Cloud' },
   { value: 'custom-openai', label: 'Custom (OpenAI-compatible)' },
 ]
@@ -335,6 +337,9 @@ async function runLlmStep(p: ClackModule, pc: PicoModule, authStorage: AuthStora
     if (provider === 'custom-openai') {
       return await runCustomOpenAIFlow(p, pc, authStorage)
     }
+    if (provider === 'ollama') {
+      return await runOllamaLocalFlow(p, pc, authStorage)
+    }
     const label = provider === 'anthropic' ? 'Anthropic'
       : provider === 'openai' ? 'OpenAI'
       : OTHER_PROVIDERS.find(op => op.value === provider)?.label ?? String(provider)
@@ -444,6 +449,54 @@ async function runApiKeyFlow(
   return true
 }
 
+// ─── Ollama Local Flow ───────────────────────────────────────────────────────
+
+async function runOllamaLocalFlow(
+  p: ClackModule,
+  pc: PicoModule,
+  authStorage: AuthStorage,
+): Promise<boolean> {
+  const host = process.env.OLLAMA_HOST || 'http://localhost:11434'
+
+  const s = p.spinner()
+  s.start(`Checking Ollama at ${host}...`)
+
+  try {
+    const controller = new AbortController()
+    const timeout = setTimeout(() => controller.abort(), 3000)
+    const response = await fetch(host, { signal: controller.signal })
+    clearTimeout(timeout)
+
+    if (response.ok) {
+      s.stop(`Ollama is running at ${pc.green(host)}`)
+      // Store a placeholder so the provider is recognized as authenticated
+      authStorage.set('ollama', { type: 'api_key', key: 'ollama' })
+      p.log.success(`${pc.green('Ollama (Local)')} configured — no API key needed`)
+      p.log.info(pc.dim('Models are discovered automatically from your local Ollama instance.'))
+      return true
+    } else {
+      s.stop('Ollama check failed')
+      p.log.warn(`Ollama responded with status ${response.status} at ${host}`)
+    }
+  } catch {
+    s.stop('Ollama not detected')
+    p.log.warn(`Could not reach Ollama at ${host}`)
+    p.log.info(pc.dim('Install Ollama from https://ollama.com and run "ollama serve"'))
+    p.log.info(pc.dim('Set OLLAMA_HOST if using a non-default address.'))
+  }
+
+  // Even if not reachable now, save the config — the extension will detect it at runtime
+  const proceed = await p.confirm({
+    message: 'Save Ollama as your provider anyway? (it will auto-detect when running)',
+  })
+
+  if (p.isCancel(proceed) || !proceed) return false
+
+  authStorage.set('ollama', { type: 'api_key', key: 'ollama' })
+  p.log.success(`${pc.green('Ollama (Local)')} saved — models will appear when Ollama is running`)
+  return true
+}
+
 // ─── Custom OpenAI-compatible Flow ────────────────────────────────────────────
 
 async function runCustomOpenAIFlow(
diff --git a/src/resource-loader.ts b/src/resource-loader.ts
index 690a2e788..ad60e1c03 100644
--- a/src/resource-loader.ts
+++ b/src/resource-loader.ts
@@ -1,4 +1,4 @@
-import { DefaultResourceLoader } from '@gsd/pi-coding-agent'
+import { DefaultResourceLoader, sortExtensionPaths } from '@gsd/pi-coding-agent'
 import { createHash } from 'node:crypto'
 import { homedir } from 'node:os'
 import { chmodSync, copyFileSync, cpSync, existsSync, lstatSync, mkdirSync, openSync, closeSync, readFileSync, readlinkSync, readdirSync, rmSync, statSync, symlinkSync, unlinkSync, writeFileSync } from 'node:fs'
@@ -603,5 +603,21 @@ export function buildResourceLoader(agentDir: string): DefaultResourceLoader {
     agentDir,
     additionalExtensionPaths: piExtensionPaths,
     bundledExtensionNames: bundledKeys,
+    extensionPathsTransform: (paths: string[]) => {
+      // 1. Filter community extensions through the GSD registry
+      const filteredPaths = paths.filter((entryPath) => {
+        const manifest = readManifestFromEntryPath(entryPath)
+        if (!manifest) return true // no manifest = always load
+        return isExtensionEnabled(registry, manifest.id)
+      })
+
+      // 2. Sort in topological dependency order
+      const { sortedPaths, warnings } = sortExtensionPaths(filteredPaths)
+
+      return {
+        paths: sortedPaths,
+        diagnostics: warnings.map((w) => w.message),
+      }
+    },
   } as ConstructorParameters<typeof DefaultResourceLoader>[0])
 }
diff --git a/src/resources/agents/researcher.md b/src/resources/agents/researcher.md
index 3c34ea0e3..ae8fba5da 100644
--- a/src/resources/agents/researcher.md
+++ b/src/resources/agents/researcher.md
@@ -1,7 +1,7 @@
 ---
 name: researcher
 description: Web researcher that finds and synthesizes current information using Brave Search
-tools: web_search, bash
+tools: search-the-web, bash
 ---
 
 You are a web researcher. You find current, accurate information using web search and synthesize it into a clear, well-structured report.
diff --git a/src/resources/extensions/ask-user-questions.ts b/src/resources/extensions/ask-user-questions.ts
index c227c1ad4..215ad635d 100644
--- a/src/resources/extensions/ask-user-questions.ts
+++ b/src/resources/extensions/ask-user-questions.ts
@@ -162,9 +162,27 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 					if (selected === undefined) {
 						return errorResult("ask_user_questions was cancelled", params.questions);
 					}
-					answers[q.id] = {
-						answers: Array.isArray(selected) ? selected : [selected],
-					};
+
+					// When the user picks "None of the above" on a single-select
+					// question, prompt for a free-text explanation so they are not
+					// trapped in a re-asking loop (bug #2715).
+					let freeTextNote = "";
+					const selectedStr = Array.isArray(selected) ? selected[0] : selected;
+					if (!q.allowMultiple && selectedStr === OTHER_OPTION_LABEL) {
+						const note = await ctx.ui.input(
+							`${q.header}: Please explain in your own words`,
+							"Type your answer here…",
+						);
+						if (note) {
+							freeTextNote = note;
+						}
+					}
+
+					const answerList = Array.isArray(selected) ? selected : [selected];
+					if (freeTextNote) {
+						answerList.push(`user_note: ${freeTextNote}`);
+					}
+					answers[q.id] = { answers: answerList };
 				}
 				const roundResult: RoundResult = {
 					endInterview: false,
diff --git a/src/resources/extensions/async-jobs/extension-manifest.json b/src/resources/extensions/async-jobs/extension-manifest.json
index d849a5cab..edb516dd7 100644
--- a/src/resources/extensions/async-jobs/extension-manifest.json
+++ b/src/resources/extensions/async-jobs/extension-manifest.json
@@ -8,6 +8,6 @@
   "provides": {
     "tools": ["async_bash", "await_job", "cancel_job"],
     "commands": ["jobs"],
-    "hooks": ["session_start"]
+    "hooks": ["session_start", "session_before_switch", "session_shutdown"]
   }
 }
diff --git a/src/resources/extensions/bg-shell/extension-manifest.json b/src/resources/extensions/bg-shell/extension-manifest.json
index 952ed8ace..ba2700935 100644
--- a/src/resources/extensions/bg-shell/extension-manifest.json
+++ b/src/resources/extensions/bg-shell/extension-manifest.json
@@ -8,7 +8,7 @@
   "provides": {
     "tools": ["bg_shell"],
     "commands": ["bg"],
-    "hooks": ["session_shutdown"],
+    "hooks": ["session_shutdown", "session_compact", "session_tree", "session_switch", "before_agent_start", "session_start", "turn_end", "agent_end", "tool_execution_end"],
     "shortcuts": ["Ctrl+Alt+B"]
   }
 }
diff --git a/src/resources/extensions/browser-tools/extension-manifest.json b/src/resources/extensions/browser-tools/extension-manifest.json
index f6156ebbd..40218a31b 100644
--- a/src/resources/extensions/browser-tools/extension-manifest.json
+++ b/src/resources/extensions/browser-tools/extension-manifest.json
@@ -29,7 +29,7 @@
       "browser_visual_diff", "browser_zoom_region",
       "browser_generate_test", "browser_action_cache", "browser_check_injection"
     ],
-    "hooks": ["session_shutdown"]
+    "hooks": ["session_start", "session_shutdown"]
   },
   "dependencies": {
     "runtime": ["playwright"]
diff --git a/src/resources/extensions/claude-code-cli/partial-builder.ts b/src/resources/extensions/claude-code-cli/partial-builder.ts
index 99bd7ca0f..533835505 100644
--- a/src/resources/extensions/claude-code-cli/partial-builder.ts
+++ b/src/resources/extensions/claude-code-cli/partial-builder.ts
@@ -16,6 +16,7 @@ import type {
 	Usage,
 	WebSearchResultContent,
 } from "@gsd/pi-ai";
+import { repairToolJson } from "@gsd/pi-ai";
 import type { BetaContentBlock, BetaRawMessageStreamEvent, NonNullableUsage } from "./sdk-types.js";
 
 // ---------------------------------------------------------------------------
@@ -244,12 +245,18 @@ export class PartialMessageBuilder {
 					try {
 						block.arguments = JSON.parse(jsonStr);
 					} catch {
-						// Stream was truncated mid-tool-call — JSON is garbage.
-						// Preserve the raw string for diagnostics but signal the
-						// malformation explicitly so downstream consumers can
-						// distinguish this from a healthy tool completion (#2574).
-						block.arguments = { _raw: jsonStr };
-						return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial, malformedArguments: true };
+						// JSON.parse failed — attempt repair for YAML-style bullet
+						// lists that LLMs copy from template formatting (#2660).
+						try {
+							block.arguments = JSON.parse(repairToolJson(jsonStr));
+						} catch {
+							// Repair also failed — stream was truncated or garbage.
+							// Preserve the raw string for diagnostics but signal the
+							// malformation explicitly so downstream consumers can
+							// distinguish this from a healthy tool completion (#2574).
+							block.arguments = { _raw: jsonStr };
+							return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial, malformedArguments: true };
+						}
 					}
 					return { type: "toolcall_end", contentIndex, toolCall: block, partial: this.partial };
 				}
diff --git a/src/resources/extensions/claude-code-cli/stream-adapter.ts b/src/resources/extensions/claude-code-cli/stream-adapter.ts
index 0be1512b6..70af68108 100644
--- a/src/resources/extensions/claude-code-cli/stream-adapter.ts
+++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts
@@ -23,9 +23,6 @@ import type {
 	SDKMessage,
 	SDKPartialAssistantMessage,
 	SDKResultMessage,
-	SDKSystemMessage,
-	SDKStatusMessage,
-	SDKUserMessage,
 } from "./sdk-types.js";
 
 // ---------------------------------------------------------------------------
@@ -71,30 +68,49 @@ function getClaudePath(): string {
 }
 
 // ---------------------------------------------------------------------------
-// Prompt extraction
+// Prompt construction
 // ---------------------------------------------------------------------------
 
 /**
- * Extract the last user prompt text from GSD's context messages.
- * The SDK manages its own conversation history — we only send
- * the latest user message as the prompt.
+ * Extract text content from a single message regardless of content shape.
  */
-function extractLastUserPrompt(context: Context): string {
-	for (let i = context.messages.length - 1; i >= 0; i--) {
-		const msg = context.messages[i];
-		if (msg.role === "user") {
-			if (typeof msg.content === "string") return msg.content;
-			if (Array.isArray(msg.content)) {
-				const textParts = msg.content
-					.filter((part: any) => part.type === "text")
-					.map((part: any) => part.text);
-				if (textParts.length > 0) return textParts.join("\n");
-			}
-		}
+function extractMessageText(msg: { role: string; content: unknown }): string {
+	if (typeof msg.content === "string") return msg.content;
+	if (Array.isArray(msg.content)) {
+		const textParts = msg.content
+			.filter((part: any) => part.type === "text")
+			.map((part: any) => part.text ?? part.thinking ?? "");
+		if (textParts.length > 0) return textParts.join("\n");
 	}
 	return "";
 }
 
+/**
+ * Build a full conversational prompt from GSD's context messages.
+ *
+ * Previous behaviour sent only the last user message, making every SDK
+ * call effectively stateless. This version serialises the complete
+ * conversation history (system prompt + all user/assistant turns) so
+ * Claude Code has full context for multi-turn continuity.
+ */
+export function buildPromptFromContext(context: Context): string {
+	const parts: string[] = [];
+
+	if (context.systemPrompt) {
+		parts.push(`[System]\n${context.systemPrompt}`);
+	}
+
+	for (const msg of context.messages) {
+		const text = extractMessageText(msg);
+		if (!text) continue;
+
+		const label = msg.role === "user" ? "User" : msg.role === "assistant" ? "Assistant" : "System";
+		parts.push(`[${label}]\n${text}`);
+	}
+
+	return parts.join("\n\n");
+}
+
 // ---------------------------------------------------------------------------
 // Error helper
 // ---------------------------------------------------------------------------
@@ -127,6 +143,31 @@ export function makeStreamExhaustedErrorMessage(model: string, lastTextContent:
 	return message;
 }
 
+// ---------------------------------------------------------------------------
+// SDK options builder
+// ---------------------------------------------------------------------------
+
+/**
+ * Build the options object passed to the Claude Agent SDK's `query()` call.
+ *
+ * Extracted for testability — callers can verify session persistence,
+ * beta flags, and other configuration without mocking the full SDK.
+ */
+export function buildSdkOptions(modelId: string, prompt: string): Record<string, unknown> {
+	return {
+		pathToClaudeCodeExecutable: getClaudePath(),
+		model: modelId,
+		includePartialMessages: true,
+		persistSession: true,
+		cwd: process.cwd(),
+		permissionMode: "bypassPermissions",
+		allowDangerouslySkipPermissions: true,
+		settingSources: ["project"],
+		systemPrompt: { type: "preset", preset: "claude_code" },
+		betas: modelId.includes("sonnet") ? ["context-1m-2025-08-07"] : [],
+	};
+}
+
 // ---------------------------------------------------------------------------
 // streamSimple implementation
 // ---------------------------------------------------------------------------
@@ -180,22 +221,14 @@ async function pumpSdkMessages(
 			options.signal.addEventListener("abort", () => controller.abort(), { once: true });
 		}
 
-		const prompt = extractLastUserPrompt(context);
+		const prompt = buildPromptFromContext(context);
+		const sdkOpts = buildSdkOptions(modelId, prompt);
 
 		const queryResult = sdk.query({
 			prompt,
 			options: {
-				pathToClaudeCodeExecutable: getClaudePath(),
-				model: modelId,
-				includePartialMessages: true,
-				persistSession: false,
+				...sdkOpts,
 				abortController: controller,
-				cwd: process.cwd(),
-				permissionMode: "bypassPermissions",
-				allowDangerouslySkipPermissions: true,
-				settingSources: ["project"],
-				systemPrompt: { type: "preset", preset: "claude_code" },
-				betas: modelId.includes("sonnet") ? ["context-1m-2025-08-07"] : [],
 			},
 		});
 
@@ -225,7 +258,6 @@ async function pumpSdkMessages(
 				// -- Streaming partial messages --
 				case "stream_event": {
 					const partial = msg as SDKPartialAssistantMessage;
-					if (partial.parent_tool_use_id !== null) break; // skip subagent
 
 					const event = partial.event;
 
@@ -256,7 +288,6 @@ async function pumpSdkMessages(
 				// -- Complete assistant message (non-streaming fallback) --
 				case "assistant": {
 					const sdkAssistant = msg as SDKAssistantMessage;
-					if (sdkAssistant.parent_tool_use_id !== null) break;
 
 					// Capture text content from complete messages
 					for (const block of sdkAssistant.message.content) {
@@ -271,9 +302,6 @@ async function pumpSdkMessages(
 
 				// -- User message (synthetic tool result — signals turn boundary) --
 				case "user": {
-					const userMsg = msg as SDKUserMessage;
-					if (userMsg.parent_tool_use_id !== null) break;
-
 					// Capture content from the completed turn before resetting
 					if (builder) {
 						for (const block of builder.message.content) {
diff --git a/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts
index 2a9612986..2ad1e6b0a 100644
--- a/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts
+++ b/src/resources/extensions/claude-code-cli/tests/partial-builder.test.ts
@@ -102,4 +102,32 @@ describe("PartialMessageBuilder — malformed tool arguments (#2574)", () => {
 			"non-JSON content should set malformedArguments: true",
 		);
 	});
+
+	test("YAML bullet lists repaired to JSON arrays (#2660)", () => {
+		const builder = new PartialMessageBuilder("claude-sonnet-4-20250514");
+		const malformedJson =
+			'{"milestoneId": "M005", "keyDecisions": - Used Web Notification API, "keyFiles": - src/lib.rs, "title": "done"}';
+		const event = feedToolCall(builder, [malformedJson]);
+
+		assert.ok(event, "event should not be null");
+		assert.equal(event!.type, "toolcall_end");
+		// Repaired YAML bullets should NOT set malformedArguments
+		assert.equal(
+			(event as any).malformedArguments,
+			undefined,
+			"repaired YAML bullets should not set malformedArguments",
+		);
+		if (event!.type === "toolcall_end") {
+			assert.equal(event!.toolCall.arguments.milestoneId, "M005");
+			assert.ok(
+				Array.isArray(event!.toolCall.arguments.keyDecisions),
+				"keyDecisions should be repaired to an array",
+			);
+			assert.ok(
+				Array.isArray(event!.toolCall.arguments.keyFiles),
+				"keyFiles should be repaired to an array",
+			);
+			assert.equal(event!.toolCall.arguments.title, "done");
+		}
+	});
 });
diff --git a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
index 052823590..983c7a369 100644
--- a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
+++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
@@ -1,6 +1,15 @@
 import { describe, test } from "node:test";
 import assert from "node:assert/strict";
-import { makeStreamExhaustedErrorMessage } from "../stream-adapter.ts";
+import {
+	makeStreamExhaustedErrorMessage,
+	buildPromptFromContext,
+	buildSdkOptions,
+} from "../stream-adapter.ts";
+import type { Context, Message } from "@gsd/pi-ai";
+
+// ---------------------------------------------------------------------------
+// Existing tests — exhausted stream fallback (#2575)
+// ---------------------------------------------------------------------------
 
 describe("stream-adapter — exhausted stream fallback (#2575)", () => {
 	test("generator exhaustion becomes an error message instead of clean completion", () => {
@@ -19,3 +28,101 @@ describe("stream-adapter — exhausted stream fallback (#2575)", () => {
 		assert.match(String((message.content[0] as any)?.text ?? ""), /Claude Code error: stream_exhausted_without_result/);
 	});
 });
+
+// ---------------------------------------------------------------------------
+// Bug #2859 — stateless provider regression tests
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — full context prompt (#2859)", () => {
+	test("buildPromptFromContext includes all user and assistant messages, not just the last user message", () => {
+		const context: Context = {
+			systemPrompt: "You are a helpful assistant.",
+			messages: [
+				{ role: "user", content: "What is 2+2?" } as Message,
+				{
+					role: "assistant",
+					content: [{ type: "text", text: "4" }],
+					api: "anthropic-messages",
+					provider: "claude-code",
+					model: "claude-sonnet-4-20250514",
+					usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+					stopReason: "stop",
+					timestamp: Date.now(),
+				} as Message,
+				{ role: "user", content: "Now multiply that by 3" } as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+
+		// Must contain content from BOTH user messages, not just the last
+		assert.ok(prompt.includes("2+2"), "prompt must include first user message");
+		assert.ok(prompt.includes("multiply"), "prompt must include second user message");
+		// Must contain assistant response for continuity
+		assert.ok(prompt.includes("4"), "prompt must include assistant reply for context");
+	});
+
+	test("buildPromptFromContext includes system prompt when present", () => {
+		const context: Context = {
+			systemPrompt: "You are a coding assistant.",
+			messages: [
+				{ role: "user", content: "Write a function" } as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+		assert.ok(prompt.includes("coding assistant"), "prompt must include system prompt");
+	});
+
+	test("buildPromptFromContext handles array content parts in user messages", () => {
+		const context: Context = {
+			messages: [
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "First part" },
+						{ type: "text", text: "Second part" },
+					],
+				} as Message,
+				{ role: "user", content: "Follow-up" } as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+		assert.ok(prompt.includes("First part"), "prompt must include array content parts");
+		assert.ok(prompt.includes("Second part"), "prompt must include all text parts");
+		assert.ok(prompt.includes("Follow-up"), "prompt must include follow-up message");
+	});
+
+	test("buildPromptFromContext returns empty string for empty messages", () => {
+		const context: Context = { messages: [] };
+		const prompt = buildPromptFromContext(context);
+		assert.equal(prompt, "");
+	});
+});
+
+describe("stream-adapter — session persistence (#2859)", () => {
+	test("buildSdkOptions enables persistSession by default", () => {
+		const options = buildSdkOptions("claude-sonnet-4-20250514", "test prompt");
+		assert.equal(options.persistSession, true, "persistSession must default to true");
+	});
+
+	test("buildSdkOptions sets model and prompt correctly", () => {
+		const options = buildSdkOptions("claude-sonnet-4-20250514", "hello world");
+		assert.equal(options.model, "claude-sonnet-4-20250514");
+	});
+
+	test("buildSdkOptions enables betas for sonnet models", () => {
+		const sonnetOpts = buildSdkOptions("claude-sonnet-4-20250514", "test");
+		assert.ok(
+			Array.isArray(sonnetOpts.betas) && sonnetOpts.betas.length > 0,
+			"sonnet models should have betas enabled",
+		);
+
+		const opusOpts = buildSdkOptions("claude-opus-4-20250514", "test");
+		assert.ok(
+			Array.isArray(opusOpts.betas) && opusOpts.betas.length === 0,
+			"non-sonnet models should have empty betas",
+		);
+	});
+});
diff --git a/src/resources/extensions/context7/extension-manifest.json b/src/resources/extensions/context7/extension-manifest.json
index e95788267..d5bf3098e 100644
--- a/src/resources/extensions/context7/extension-manifest.json
+++ b/src/resources/extensions/context7/extension-manifest.json
@@ -7,6 +7,6 @@
   "requires": { "platform": ">=2.29.0" },
   "provides": {
     "tools": ["resolve_library", "get_library_docs"],
-    "hooks": ["session_start"]
+    "hooks": ["session_start", "session_shutdown"]
   }
 }
diff --git a/src/resources/extensions/get-secrets-from-user.ts b/src/resources/extensions/get-secrets-from-user.ts
index 300852305..7fe418f59 100644
--- a/src/resources/extensions/get-secrets-from-user.ts
+++ b/src/resources/extensions/get-secrets-from-user.ts
@@ -54,6 +54,9 @@ function hydrateProcessEnv(key: string, value: string): void {
 }
 
 async function writeEnvKey(filePath: string, key: string, value: string): Promise<void> {
+	if (typeof value !== "string") {
+		throw new TypeError(`writeEnvKey expects a string value for key "${key}", got ${typeof value}`);
+	}
 	let content = "";
 	try {
 		content = await readFile(filePath, "utf8");
@@ -419,7 +422,7 @@ export async function collectSecretsFromManifest(
 	for (const { key, value } of collected) {
 		const entry = manifest.entries.find((e) => e.key === key);
 		if (entry) {
-			entry.status = value !== null ? "collected" : "skipped";
+			entry.status = value != null ? "collected" : "skipped";
 		}
 	}
 
@@ -427,14 +430,14 @@ export async function collectSecretsFromManifest(
 	await writeFile(manifestPath, formatSecretsManifest(manifest), "utf8");
 
 	// (j) Apply collected values to destination
-	const provided = collected.filter((c) => c.value !== null) as Array<{ key: string; value: string }>;
+	const provided = collected.filter((c) => c.value != null) as Array<{ key: string; value: string }>;
 	const { applied } = await applySecrets(provided, destination, {
 		envFilePath: resolve(ctx.cwd, ".env"),
 	});
 
 	const skipped = [
 		...alreadySkipped,
-		...collected.filter((c) => c.value === null).map((c) => c.key),
+		...collected.filter((c) => c.value == null).map((c) => c.key),
 	];
 
 	return { applied, skipped, existingSkipped };
@@ -505,8 +508,8 @@ export default function secureEnv(pi: ExtensionAPI) {
 				collected.push({ key: item.key, value });
 			}
 
-			const provided = collected.filter((c) => c.value !== null) as Array<{ key: string; value: string }>;
-			const skipped = collected.filter((c) => c.value === null).map((c) => c.key);
+			const provided = collected.filter((c) => c.value != null) as Array<{ key: string; value: string }>;
+			const skipped = collected.filter((c) => c.value == null).map((c) => c.key);
 
 			// Apply to destination via shared helper
 			const { applied, errors } = await applySecrets(provided, destination, {
diff --git a/src/resources/extensions/google-search/extension-manifest.json b/src/resources/extensions/google-search/extension-manifest.json
index b2938627d..c4a775a4d 100644
--- a/src/resources/extensions/google-search/extension-manifest.json
+++ b/src/resources/extensions/google-search/extension-manifest.json
@@ -7,6 +7,6 @@
   "requires": { "platform": ">=2.29.0" },
   "provides": {
     "tools": ["google_search"],
-    "hooks": ["session_start"]
+    "hooks": ["session_start", "session_shutdown"]
   }
 }
diff --git a/src/resources/extensions/google-search/index.ts b/src/resources/extensions/google-search/index.ts
index 4f4f0fff6..a4f9818f4 100644
--- a/src/resources/extensions/google-search/index.ts
+++ b/src/resources/extensions/google-search/index.ts
@@ -79,7 +79,7 @@ async function searchWithOAuth(
 	signal?: AbortSignal,
 ): Promise<SearchResult> {
 	const model = process.env.GEMINI_SEARCH_MODEL || "gemini-2.5-flash";
-	const url = `https://cloudcode-pa.googleapis.com/v1internal:streamGenerateContent`;
+	const url = `https://cloudcode-pa.googleapis.com/v1internal:streamGenerateContent?alt=sse`;
 
 	const GEMINI_CLI_HEADERS = {
 	        ideType: "IDE_UNSPECIFIED",
@@ -104,6 +104,7 @@ async function searchWithOAuth(
 	                                contents: [{ parts: [{ text: query }] }],
 	                                tools: [{ googleSearch: {} }],
 	                        },
+	                        userAgent: "pi-coding-agent",
 	                }),
 	                signal,
 	        });
diff --git a/src/resources/extensions/gsd/auto-artifact-paths.ts b/src/resources/extensions/gsd/auto-artifact-paths.ts
index df8b52ad2..6e54f5b07 100644
--- a/src/resources/extensions/gsd/auto-artifact-paths.ts
+++ b/src/resources/extensions/gsd/auto-artifact-paths.ts
@@ -56,7 +56,7 @@ export function resolveExpectedArtifactPath(
     }
     case "run-uat": {
       const dir = resolveSlicePath(base, mid, sid!);
-      return dir ? join(dir, buildSliceFileName(sid!, "UAT")) : null;
+      return dir ? join(dir, buildSliceFileName(sid!, "ASSESSMENT")) : null;
     }
     case "execute-task": {
       const dir = resolveSlicePath(base, mid, sid!);
@@ -124,7 +124,7 @@ export function diagnoseExpectedArtifact(
     case "reassess-roadmap":
       return `${relSliceFile(base, mid, sid!, "ASSESSMENT")} (roadmap reassessment)`;
     case "run-uat":
-      return `${relSliceFile(base, mid, sid!, "UAT")} (UAT result)`;
+      return `${relSliceFile(base, mid, sid!, "ASSESSMENT")} (UAT assessment result)`;
     case "validate-milestone":
       return `${relMilestoneFile(base, mid, "VALIDATION")} (milestone validation report)`;
     case "complete-milestone":
diff --git a/src/resources/extensions/gsd/auto-dashboard.ts b/src/resources/extensions/gsd/auto-dashboard.ts
index 98a6ff052..b385fa051 100644
--- a/src/resources/extensions/gsd/auto-dashboard.ts
+++ b/src/resources/extensions/gsd/auto-dashboard.ts
@@ -569,6 +569,13 @@ export function updateProgressWidget(
           : "";
         lines.push(rightAlign(headerLeft, headerRight, width));
 
+        // Worktree/branch right-aligned below header
+        if (worktreeName && cachedBranch) {
+          lines.push(rightAlign("", theme.fg("dim", `${worktreeName} (${cachedBranch})`), width));
+        } else if (cachedBranch) {
+          lines.push(rightAlign("", theme.fg("dim", cachedBranch), width));
+        }
+
         // Show health signal details when degraded (yellow/red)
         if (score.level !== "green" && score.signals.length > 0 && widgetMode !== "min") {
           // Show up to 3 most relevant signals in compact form
@@ -682,12 +689,12 @@ export function updateProgressWidget(
         const hasContext = !!(mid || (slice && unitType !== "research-milestone" && unitType !== "plan-milestone"));
         if (mid) {
           const modelTag = modelDisplay ? theme.fg("muted", `  ${modelDisplay}`) : "";
-          lines.push(truncateToWidth(`${pad}${theme.fg("dim", mid.title)}${modelTag}`, width));
+          lines.push(truncateToWidth(`${pad}${theme.fg("dim", mid.title)}${modelTag}`, width, "…"));
         }
         if (slice && unitType !== "research-milestone" && unitType !== "plan-milestone") {
           lines.push(truncateToWidth(
             `${pad}${theme.fg("text", theme.bold(`${slice.id}: ${slice.title}`))}`,
-            width,
+            width, "…",
           ));
         }
         if (hasContext) lines.push("");
@@ -733,6 +740,12 @@ export function updateProgressWidget(
         const rightLines: string[] = [];
         const maxVisibleTasks = 8;
 
+        // Max visible chars for task title text (before ANSI theming)
+        const maxTaskTitleLen = 45;
+        function truncTitle(s: string): string {
+          return s.length > maxTaskTitleLen ? s.slice(0, maxTaskTitleLen - 1) + "…" : s;
+        }
+
         function formatTaskLine(t: { id: string; title: string; done: boolean }, isCurrent: boolean): string {
           const glyph = t.done
             ? theme.fg("success", "*")
@@ -744,11 +757,12 @@ export function updateProgressWidget(
             : t.done
               ? theme.fg("muted", t.id)
               : theme.fg("dim", t.id);
+          const short = truncTitle(t.title);
           const title = isCurrent
-            ? theme.fg("text", t.title)
+            ? theme.fg("text", short)
             : t.done
-              ? theme.fg("muted", t.title)
-              : theme.fg("text", t.title);
+              ? theme.fg("muted", short)
+              : theme.fg("text", short);
           return `${glyph} ${id}: ${title}`;
         }
 
@@ -771,7 +785,7 @@ export function updateProgressWidget(
           if (maxRows > 0) {
             lines.push("");
             for (let i = 0; i < maxRows; i++) {
-              const left = padToWidth(truncateToWidth(leftLines[i] ?? "", leftColWidth), leftColWidth);
+              const left = padToWidth(truncateToWidth(leftLines[i] ?? "", leftColWidth, "…"), leftColWidth);
               const right = rightLines[i] ?? "";
               lines.push(`${left}${right}`);
             }
@@ -779,7 +793,7 @@ export function updateProgressWidget(
         } else {
           if (leftLines.length > 0) {
             lines.push("");
-            for (const l of leftLines) lines.push(truncateToWidth(l, width));
+            for (const l of leftLines) lines.push(truncateToWidth(l, width, "…"));
           }
         }
 
@@ -808,23 +822,27 @@ export function updateProgressWidget(
             lines.push(rightAlign("", theme.fg("dim", cachedRtkLabel), width));
           }
         }
-        // PWD line with last commit info right-aligned
+        // Last commit info
         const lastCommit = getLastCommit(accessors.getBasePath());
-        const commitStr = lastCommit
-          ? theme.fg("dim", `${lastCommit.timeAgo} ago: ${lastCommit.message}`)
+        const maxCommitLen = 65;
+        const commitMsg = lastCommit
+          ? lastCommit.message.length > maxCommitLen
+            ? lastCommit.message.slice(0, maxCommitLen - 1) + "…"
+            : lastCommit.message
           : "";
-        const pwdStr = theme.fg("dim", widgetPwd);
-        if (commitStr) {
-          lines.push(rightAlign(`${pad}${pwdStr}`, truncateToWidth(commitStr, Math.floor(width * 0.45)), width));
-        } else {
-          lines.push(`${pad}${pwdStr}`);
-        }
         // Hints line
         const hintParts: string[] = [];
         hintParts.push("esc pause");
         hintParts.push(process.platform === "darwin" ? "⌃⌥G dashboard" : "Ctrl+Alt+G dashboard");
         const hintStr = theme.fg("dim", hintParts.join(" | "));
-        lines.push(rightAlign("", hintStr, width));
+        const commitStr = lastCommit
+          ? theme.fg("dim", `${lastCommit.timeAgo} ago: ${commitMsg}`)
+          : "";
+        if (commitStr) {
+          lines.push(rightAlign(`${pad}${commitStr}`, hintStr, width));
+        } else {
+          lines.push(rightAlign("", hintStr, width));
+        }
 
         lines.push(...ui.bar());
 
@@ -851,12 +869,12 @@ function rightAlign(left: string, right: string, width: number): string {
   const leftVis = visibleWidth(left);
   const rightVis = visibleWidth(right);
   const gap = Math.max(1, width - leftVis - rightVis);
-  return truncateToWidth(left + " ".repeat(gap) + right, width);
+  return truncateToWidth(left + " ".repeat(gap) + right, width, "…");
 }
 
 /** Pad a string with trailing spaces to fill exactly `colWidth` (ANSI-aware). */
 function padToWidth(s: string, colWidth: number): string {
   const vis = visibleWidth(s);
-  if (vis >= colWidth) return truncateToWidth(s, colWidth);
+  if (vis >= colWidth) return truncateToWidth(s, colWidth, "…");
   return s + " ".repeat(colWidth - vis);
 }
diff --git a/src/resources/extensions/gsd/auto-dispatch.ts b/src/resources/extensions/gsd/auto-dispatch.ts
index 91918938f..839ba5fb1 100644
--- a/src/resources/extensions/gsd/auto-dispatch.ts
+++ b/src/resources/extensions/gsd/auto-dispatch.ts
@@ -28,6 +28,7 @@ import {
   buildSliceFileName,
 } from "./paths.js";
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { logError } from "./workflow-logger.js";
 import { join } from "node:path";
 import { hasImplementationArtifacts } from "./auto-recovery.js";
 import {
@@ -129,6 +130,21 @@ export function setRewriteCount(basePath: string, count: number): void {
   writeFileSync(filePath, JSON.stringify({ count, updatedAt: new Date().toISOString() }) + "\n");
 }
 
+// ─── Helpers ─────────────────────────────────────────────────────────────
+
+/**
+ * Returns true when the verification_operational value indicates that no
+ * operational verification is needed.  Covers common phrasings the planning
+ * agent may use: "None", "None required", "N/A", "Not applicable", etc.
+ *
+ * @see https://github.com/gsd-build/gsd-2/issues/2931
+ */
+export function isVerificationNotApplicable(value: string): boolean {
+  const v = (value ?? "").toLowerCase().trim();
+  if (!v || v === "none") return true;
+  return /^(?:none[\s._-]*(?:required|needed|planned)?|n\/?a|not[\s._-]+(?:applicable|required|needed)|no[\s._-]+operational[\s\S]*)$/i.test(v);
+}
+
 // ─── Rules ────────────────────────────────────────────────────────────────
 
 export const DISPATCH_RULES: DispatchRule[] = [
@@ -511,7 +527,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
         };
       } catch (err) {
         // Non-fatal — fall through to sequential execution
-        process.stderr.write(`gsd-reactive: graph derivation failed: ${(err as Error).message}\n`);
+        logError("dispatch", "reactive graph derivation failed", { error: (err as Error).message });
         return null;
       }
     },
@@ -672,7 +688,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
         if (isDbAvailable()) {
           const milestone = getMilestone(mid);
           if (milestone?.verification_operational &&
-              milestone.verification_operational.toLowerCase() !== "none") {
+              !isVerificationNotApplicable(milestone.verification_operational)) {
             const validationPath = resolveMilestoneFile(basePath, mid, "VALIDATION");
             if (validationPath) {
               const validationContent = await loadFile(validationPath);
diff --git a/src/resources/extensions/gsd/auto-model-selection.ts b/src/resources/extensions/gsd/auto-model-selection.ts
index c79ab55b2..60cca2663 100644
--- a/src/resources/extensions/gsd/auto-model-selection.ts
+++ b/src/resources/extensions/gsd/auto-model-selection.ts
@@ -222,9 +222,30 @@ export function resolveModelId<T extends { id: string; provider: string }>(
     );
   }
 
-  // Bare ID — prefer current provider, then first available
-  const exactProviderMatch = availableModels.find(
-    m => m.id === modelId && m.provider === currentProvider,
-  );
-  return exactProviderMatch ?? availableModels.find(m => m.id === modelId);
+  // Bare ID — resolve with provider precedence to avoid silent misrouting.
+  // Extension providers (e.g. claude-code) expose the same model IDs as their
+  // upstream API providers but route through a subprocess with different
+  // context, tool visibility, and cost characteristics (#2905).  Bare IDs in
+  // PREFERENCES.md must resolve to the canonical API provider, not to an
+  // extension wrapper that happens to be the current session provider.
+  const candidates = availableModels.filter(m => m.id === modelId);
+  if (candidates.length === 0) return undefined;
+  if (candidates.length === 1) return candidates[0];
+
+  // Extension / CLI-wrapper providers that should never win bare-ID resolution
+  // when a first-class API provider also offers the same model.
+  const EXTENSION_PROVIDERS = new Set(["claude-code"]);
+
+  // Prefer currentProvider only when it is a first-class API provider
+  if (currentProvider && !EXTENSION_PROVIDERS.has(currentProvider)) {
+    const providerMatch = candidates.find(m => m.provider === currentProvider);
+    if (providerMatch) return providerMatch;
+  }
+
+  // Prefer "anthropic" as the canonical provider for Anthropic models
+  const anthropicMatch = candidates.find(m => m.provider === "anthropic");
+  if (anthropicMatch) return anthropicMatch;
+
+  // Fall back to first non-extension candidate, or any candidate
+  return candidates.find(m => !EXTENSION_PROVIDERS.has(m.provider)) ?? candidates[0];
 }
diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts
index 3083a20fa..860e71bd1 100644
--- a/src/resources/extensions/gsd/auto-post-unit.ts
+++ b/src/resources/extensions/gsd/auto-post-unit.ts
@@ -13,6 +13,7 @@
 
 import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent";
 import { deriveState } from "./state.js";
+import { logWarning, logError } from "./workflow-logger.js";
 import { loadFile, parseSummary, resolveAllOverrides } from "./files.js";
 import { loadPrompt } from "./prompt-loader.js";
 import {
@@ -412,10 +413,10 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
           );
         }
         for (const action of triageResult.actions) {
-          process.stderr.write(`gsd-triage: ${action}\n`);
+          logWarning("engine", `triage resolution: ${action}`);
         }
       } catch (err) {
-        process.stderr.write(`gsd-triage: resolution execution failed: ${(err as Error).message}\n`);
+        logError("engine", "triage resolution failed", { error: (err as Error).message });
       }
     }
 
@@ -423,7 +424,7 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
     try {
       const rogueFiles = detectRogueFileWrites(s.currentUnit.type, s.currentUnit.id, s.basePath);
       for (const rogue of rogueFiles) {
-        process.stderr.write(`gsd-rogue: detected rogue file write: ${rogue.path} (unit: ${rogue.unitId})\n`);
+        logWarning("engine", "rogue file write detected", { path: rogue.path, unitId: rogue.unitId });
         ctx.ui.notify(`Rogue file write detected: ${rogue.path}`, "warning");
       }
     } catch (e) {
@@ -465,7 +466,20 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
       // When artifact verification fails for a unit type that has a known expected
       // artifact, return "retry" so the caller re-dispatches with failure context
       // instead of blindly re-dispatching the same unit (#1571).
-      if (!triggerArtifactVerified) {
+      //
+      // HOWEVER, if the DB is unavailable (db_unavailable), the artifact was never
+      // written because the completion tool failed at the infra level. Retrying
+      // can never succeed and produces a costly re-dispatch loop (#2517).
+      if (!triggerArtifactVerified && !isDbAvailable()) {
+        // DB infra failure — do NOT retry; the completion tool returned
+        // db_unavailable so the artifact was never written. Retrying would
+        // produce an infinite re-dispatch loop (#2517).
+        debugLog("postUnit", { phase: "artifact-verify-skip-db-unavailable", unitType: s.currentUnit.type, unitId: s.currentUnit.id });
+        ctx.ui.notify(
+          `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} but DB is unavailable — skipping retry to avoid loop (#2517)`,
+          "error",
+        );
+      } else if (!triggerArtifactVerified) {
         const hasExpectedArtifact = resolveExpectedArtifactPath(s.currentUnit.type, s.currentUnit.id, s.basePath) !== null;
         if (hasExpectedArtifact) {
           const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`;
diff --git a/src/resources/extensions/gsd/auto-prompts.ts b/src/resources/extensions/gsd/auto-prompts.ts
index 1ea0e3366..5b6e9de5b 100644
--- a/src/resources/extensions/gsd/auto-prompts.ts
+++ b/src/resources/extensions/gsd/auto-prompts.ts
@@ -1568,7 +1568,7 @@ export async function buildRunUatPrompt(
 
   const inlinedContext = capPreamble(`## Inlined Context (preloaded — do not re-read these files)\n\n${inlined.join("\n\n---\n\n")}`);
 
-  const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "UAT"));
+  const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "ASSESSMENT"));
   const uatType = getUatType(uatContent);
 
   return loadPrompt("run-uat", {
diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts
index 9181d7fe8..691deef1d 100644
--- a/src/resources/extensions/gsd/auto-recovery.ts
+++ b/src/resources/extensions/gsd/auto-recovery.ts
@@ -14,6 +14,7 @@ import { clearParseCache } from "./files.js";
 import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
 import { isDbAvailable, getTask, getSlice, getSliceTasks, updateTaskStatus } from "./gsd-db.js";
 import { isValidationTerminal } from "./state.js";
+import { getErrorMessage } from "./error-utils.js";
 import {
   nativeConflictFiles,
   nativeCommit,
@@ -476,11 +477,17 @@ export function reconcileMergeState(
   if (conflictedFiles.length === 0) {
     // All conflicts resolved — finalize the merge/squash commit
     try {
-      nativeCommit(basePath, ""); // --no-edit equivalent: use empty message placeholder
-      const mode = hasMergeHead ? "merge" : "squash commit";
-      ctx.ui.notify(`Finalized leftover ${mode} from prior session.`, "info");
-    } catch {
-      // Commit may already exist; non-fatal
+      const commitSha = nativeCommit(basePath, ""); // --no-edit equivalent: use empty message placeholder
+      if (commitSha) {
+        const mode = hasMergeHead ? "merge" : "squash commit";
+        ctx.ui.notify(`Finalized leftover ${mode} from prior session.`, "info");
+      } else {
+        ctx.ui.notify("No new commit needed for leftover merge/squash state — already committed.", "info");
+      }
+    } catch (err) {
+      const errorMessage = getErrorMessage(err);
+      ctx.ui.notify(`Failed to finalize leftover merge/squash commit: ${errorMessage}`, "error");
+      return false;
     }
   } else {
     // Still conflicted — try auto-resolving .gsd/ state file conflicts (#530)
diff --git a/src/resources/extensions/gsd/auto-start.ts b/src/resources/extensions/gsd/auto-start.ts
index 85bdbe370..926313891 100644
--- a/src/resources/extensions/gsd/auto-start.ts
+++ b/src/resources/extensions/gsd/auto-start.ts
@@ -58,9 +58,8 @@ import { initRoutingHistory } from "./routing-history.js";
 import { restoreHookState, resetHookState } from "./post-unit-hooks.js";
 import { resetProactiveHealing, setLevelChangeCallback } from "./doctor-proactive.js";
 import { snapshotSkills } from "./skill-discovery.js";
-import { isDbAvailable, getMilestone, openDatabase } from "./gsd-db.js";
+import { isDbAvailable, getMilestone } from "./gsd-db.js";
 import { hideFooter } from "./auto-dashboard.js";
-import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js";
 import {
   debugLog,
   enableDebug,
@@ -68,7 +67,6 @@ import {
   getDebugLogPath,
 } from "./debug-logger.js";
 import { parseUnitId } from "./unit-id.js";
-import { setLogBasePath } from "./workflow-logger.js";
 import type { AutoSession } from "./auto/session.js";
 import {
   existsSync,
@@ -80,6 +78,7 @@ import {
 import { join } from "node:path";
 import { sep as pathSep } from "node:path";
 
+import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js";
 import type { WorktreeResolver } from "./worktree-resolver.js";
 
 export interface BootstrapDeps {
@@ -98,26 +97,32 @@ export interface BootstrapDeps {
  * concurrent session detected). Returns true when ready to dispatch.
  */
 
+/**
+ * Open the project-root DB before the first deriveState call (#2841).
+ * When auto-mode starts cold (no prior DB handle), state derivation that
+ * touches DB-backed helpers (queue-order, task status) silently falls back
+ * to markdown-only data, producing stale or incomplete state.  Opening the
+ * DB first ensures deriveState sees the full picture on its very first run.
+ */
+async function openProjectDbIfPresent(basePath: string): Promise<void> {
+  const gsdDbPath = resolveProjectRootDbPath(basePath);
+  if (!existsSync(gsdDbPath)) return;
+  if (isDbAvailable()) return;
+
+  try {
+    const { openDatabase } = await import("./gsd-db.js");
+    openDatabase(gsdDbPath);
+  } catch {
+    /* non-fatal — DB lifecycle block below will retry */
+  }
+}
+
 /** Guard: tracks consecutive bootstrap attempts that found phase === "complete".
  *  Prevents the recursive dialog loop described in #1348 where
  *  bootstrapAutoSession → showSmartEntry → checkAutoStartAfterDiscuss → startAuto
  *  cycles indefinitely when the discuss workflow doesn't produce a milestone. */
 let _consecutiveCompleteBootstraps = 0;
 const MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS = 2;
-
-async function openProjectDbIfPresent(basePath: string): Promise<void> {
-  const gsdDbPath = resolveProjectRootDbPath(basePath);
-  if (!existsSync(gsdDbPath) || isDbAvailable()) return;
-
-  try {
-    openDatabase(gsdDbPath);
-  } catch (err) {
-    process.stderr.write(
-      `gsd-db: failed to open existing database: ${(err as Error).message}\n`,
-    );
-  }
-}
-
 export async function bootstrapAutoSession(
   s: AutoSession,
   ctx: ExtensionCommandContext,
@@ -198,10 +203,13 @@ export async function bootstrapAutoSession(
     ensureGitignore(base, { manageGitignore });
     if (manageGitignore !== false) untrackRuntimeFiles(base);
 
-    // Bootstrap .gsd/ if it doesn't exist
+    // Bootstrap milestones/ if it doesn't exist.
+    // Check milestones/ directly — ensureGsdSymlink above already created .gsd/,
+    // so checking .gsd/ existence would be dead code (#2942).
     const gsdDir = join(base, ".gsd");
-    if (!existsSync(gsdDir)) {
-      mkdirSync(join(gsdDir, "milestones"), { recursive: true });
+    const milestonesPath = join(gsdDir, "milestones");
+    if (!existsSync(milestonesPath)) {
+      mkdirSync(milestonesPath, { recursive: true });
       try {
         nativeAddAll(base);
         nativeCommit(base, "chore: init gsd");
@@ -280,10 +288,6 @@ export async function bootstrapAutoSession(
       ctx.ui.notify(`Debug logging enabled → ${getDebugLogPath()}`, "info");
     }
 
-    // Open the project DB before the first derive so resume uses DB truth
-    // immediately on cold starts instead of falling back to markdown (#2841).
-    await openProjectDbIfPresent(base);
-
     // Invalidate caches before initial state derivation
     invalidateAllCaches();
 
@@ -293,6 +297,10 @@ export async function bootstrapAutoSession(
       (mid) => !!resolveMilestoneFile(base, mid, "SUMMARY"),
     );
 
+    // Open the project-root DB before deriveState so DB-backed state
+    // derivation (queue-order, task status) works on a cold start (#2841).
+    await openProjectDbIfPresent(base);
+
     let state = await deriveState(base);
 
     // Stale worktree state recovery (#654)
@@ -490,7 +498,6 @@ export async function bootstrapAutoSession(
     s.verbose = verboseMode;
     s.cmdCtx = ctx;
     s.basePath = base;
-    setLogBasePath(base);
     s.unitDispatchCount.clear();
     s.unitRecoveryCount.clear();
     s.lastBudgetAlertLevel = 0;
@@ -554,14 +561,15 @@ export async function bootstrapAutoSession(
     }
 
     // ── DB lifecycle ──
-    const gsdDbPath = resolveProjectRootDbPath(s.basePath);
+    const gsdDbPath = join(s.basePath, ".gsd", "gsd.db");
     const gsdDirPath = join(s.basePath, ".gsd");
     if (existsSync(gsdDirPath) && !existsSync(gsdDbPath)) {
       const hasDecisions = existsSync(join(gsdDirPath, "DECISIONS.md"));
       const hasRequirements = existsSync(join(gsdDirPath, "REQUIREMENTS.md"));
       const hasMilestones = existsSync(join(gsdDirPath, "milestones"));
       try {
-        openDatabase(gsdDbPath);
+        const { openDatabase: openDb } = await import("./gsd-db.js");
+        openDb(gsdDbPath);
         if (hasDecisions || hasRequirements || hasMilestones) {
           const { migrateFromMarkdown } = await import("./md-importer.js");
           migrateFromMarkdown(s.basePath);
@@ -574,7 +582,8 @@ export async function bootstrapAutoSession(
     }
     if (existsSync(gsdDbPath) && !isDbAvailable()) {
       try {
-        openDatabase(gsdDbPath);
+        const { openDatabase: openDb } = await import("./gsd-db.js");
+        openDb(gsdDbPath);
       } catch (err) {
         process.stderr.write(
           `gsd-db: failed to open existing database: ${(err as Error).message}\n`,
diff --git a/src/resources/extensions/gsd/auto-worktree.ts b/src/resources/extensions/gsd/auto-worktree.ts
index c07c7d4e5..92cb389c8 100644
--- a/src/resources/extensions/gsd/auto-worktree.ts
+++ b/src/resources/extensions/gsd/auto-worktree.ts
@@ -15,6 +15,7 @@ import {
   realpathSync,
   rmSync,
   unlinkSync,
+  statSync,
   lstatSync as lstatSyncFn,
 } from "node:fs";
 import { isAbsolute, join, sep as pathSep } from "node:path";
@@ -62,6 +63,7 @@ import {
   nativeDiffNumstat,
   nativeUpdateRef,
   nativeIsAncestor,
+  nativeMergeAbort,
 } from "./native-git-bridge.js";
 
 const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
@@ -84,6 +86,7 @@ const ROOT_STATE_FILES = [
   "QUEUE.md",
   "completed-units.json",
   "metrics.json",
+  "mcp.json",
   // NOTE: project preferences are intentionally NOT in ROOT_STATE_FILES.
   // Forward-sync (main → worktree) is handled explicitly in syncGsdStateToWorktree().
   // Back-sync (worktree → main) must NEVER overwrite the project root's copy
@@ -102,6 +105,67 @@ function isSamePath(a: string, b: string): boolean {
   }
 }
 
+// ─── ASSESSMENT Force-Sync Helper (#2821) ─────────────────────────────────
+
+/** Regex matching YAML frontmatter `verdict:` field. */
+const VERDICT_RE = /verdict:\s*[\w-]+/i;
+
+/**
+ * Walk a milestone directory and force-overwrite ASSESSMENT files in the
+ * destination when the source copy contains a `verdict:` field.
+ *
+ * This is the targeted fix for the UAT stuck-loop (#2821): the main
+ * safeCopyRecursive uses force:false to protect worktree-authoritative
+ * files (#1886), but ASSESSMENT files written by run-uat must be
+ * forward-synced when the project root has a verdict. Without this,
+ * the worktree retains a stale FAIL or missing ASSESSMENT and
+ * checkNeedsRunUat re-dispatches run-uat indefinitely.
+ *
+ * Only overwrites when the source has a verdict — never clobbers a
+ * worktree ASSESSMENT with a verdictless project-root copy.
+ */
+function forceOverwriteAssessmentsWithVerdict(
+  srcMilestoneDir: string,
+  dstMilestoneDir: string,
+): void {
+  if (!existsSync(srcMilestoneDir)) return;
+
+  // Walk slices/<SID>/ looking for *-ASSESSMENT.md files
+  const slicesDir = join(srcMilestoneDir, "slices");
+  if (!existsSync(slicesDir)) return;
+
+  try {
+    for (const sliceEntry of readdirSync(slicesDir, { withFileTypes: true })) {
+      if (!sliceEntry.isDirectory()) continue;
+      const srcSliceDir = join(slicesDir, sliceEntry.name);
+      const dstSliceDir = join(dstMilestoneDir, "slices", sliceEntry.name);
+
+      try {
+        for (const fileEntry of readdirSync(srcSliceDir, { withFileTypes: true })) {
+          if (!fileEntry.isFile()) continue;
+          if (!fileEntry.name.endsWith("-ASSESSMENT.md")) continue;
+
+          const srcFile = join(srcSliceDir, fileEntry.name);
+          try {
+            const srcContent = readFileSync(srcFile, "utf-8");
+            if (!VERDICT_RE.test(srcContent)) continue; // no verdict in source — skip
+
+            // Source has a verdict — force-copy into worktree
+            mkdirSync(dstSliceDir, { recursive: true });
+            safeCopy(srcFile, join(dstSliceDir, fileEntry.name), { force: true });
+          } catch {
+            /* non-fatal per file */
+          }
+        }
+      } catch {
+        /* non-fatal per slice */
+      }
+    }
+  } catch {
+    /* non-fatal */
+  }
+}
+
 // ─── Module State ──────────────────────────────────────────────────────────
 
 /** Original project root before chdir into auto-worktree. */
@@ -214,6 +278,19 @@ export function syncProjectRootToWorktree(
     { force: false },
   );
 
+  // Force-sync ASSESSMENT files that have a verdict from project root (#2821).
+  // The additive-only copy above preserves worktree-authoritative files, but
+  // ASSESSMENT files are special: after run-uat writes a verdict and post-unit
+  // syncs it to the project root, the worktree may retain a stale copy (e.g.
+  // verdict:fail while the project root has verdict:pass from a retry). On
+  // session resume the DB is rebuilt from disk, and if the stale ASSESSMENT
+  // persists, checkNeedsRunUat finds no passing verdict → re-dispatches
+  // run-uat indefinitely (stuck-loop ×9).
+  forceOverwriteAssessmentsWithVerdict(
+    join(prGsd, "milestones", milestoneId),
+    join(wtGsd, "milestones", milestoneId),
+  );
+
   // Forward-sync completed-units.json from project root to worktree.
   // Project root is authoritative for completion state after crash recovery;
   // without this, the worktree re-dispatches already-completed units (#1886).
@@ -223,12 +300,18 @@ export function syncProjectRootToWorktree(
     { force: true },
   );
 
-  // Delete worktree gsd.db so it rebuilds from the freshly synced files.
-  // Stale DB rows are the root cause of the infinite skip loop (#853).
+  // Delete worktree gsd.db ONLY if it is empty (0 bytes).
+  // An empty DB is stale/corrupt and should be rebuilt (#853).
+  // A non-empty DB was populated by gsd-migrate on respawn and must be
+  // preserved — deleting it truncates the file to 0 bytes when
+  // openDatabase re-creates it, causing "no such table" failures (#2815).
   try {
     const wtDb = join(wtGsd, "gsd.db");
     if (existsSync(wtDb)) {
-      unlinkSync(wtDb);
+      const size = statSync(wtDb).size;
+      if (size === 0) {
+        unlinkSync(wtDb);
+      }
     }
   } catch {
     /* non-fatal */
@@ -1004,6 +1087,7 @@ function copyPlanningArtifacts(srcBase: string, wtPath: string): void {
     "STATE.md",
     "KNOWLEDGE.md",
     "OVERRIDES.md",
+    "mcp.json",
   ]) {
     safeCopy(join(srcGsd, file), join(dstGsd, file), { force: true });
   }
@@ -1414,9 +1498,19 @@ export function mergeMilestoneToMain(
       encoding: "utf-8",
     }).trim();
     if (status) {
+      // Use --include-untracked to stash untracked files that would block
+      // the squash merge, but EXCLUDE .gsd/milestones/ (#2505).
+      // --include-untracked without exclusion sweeps queued milestone
+      // CONTEXT files into the stash. If stash pop later fails, those files
+      // are permanently trapped in the stash entry and lost on the next
+      // stash push or drop.
       execFileSync(
         "git",
-        ["stash", "push", "--include-untracked", "-m", `gsd: pre-merge stash for ${milestoneId}`],
+        [
+          "stash", "push", "--include-untracked",
+          "-m", `gsd: pre-merge stash for ${milestoneId}`,
+          "--", ":(exclude).gsd/milestones",
+        ],
         { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
       );
       stashed = true;
@@ -1426,6 +1520,65 @@ export function mergeMilestoneToMain(
     // report the dirty tree if it fails.
   }
 
+  // 7a. Shelter queued milestone directories before the squash merge (#2505).
+  // The milestone branch may contain copies of queued milestone dirs (via
+  // copyPlanningArtifacts), so `git merge --squash` rejects when those same
+  // files exist as untracked in the working tree. Temporarily move them to
+  // a backup location, then restore after the merge+commit.
+  const milestonesDir = join(gsdRoot(originalBasePath_), "milestones");
+  const shelterDir = join(gsdRoot(originalBasePath_), ".milestone-shelter");
+  const shelteredDirs: string[] = [];
+
+  // Helper: restore sheltered milestone directories (#2505).
+  // Called on both success and error paths to ensure queued CONTEXT files
+  // are never permanently lost.
+  const restoreShelter = (): void => {
+    if (shelteredDirs.length === 0) return;
+    for (const dirName of shelteredDirs) {
+      try {
+        mkdirSync(milestonesDir, { recursive: true });
+        cpSync(join(shelterDir, dirName), join(milestonesDir, dirName), { recursive: true, force: true });
+      } catch { /* best-effort */ }
+    }
+    try { rmSync(shelterDir, { recursive: true, force: true }); } catch { /* best-effort */ }
+  };
+
+  try {
+    if (existsSync(milestonesDir)) {
+      const entries = readdirSync(milestonesDir, { withFileTypes: true });
+      for (const entry of entries) {
+        if (!entry.isDirectory()) continue;
+        // Only shelter directories that do NOT belong to the milestone being merged
+        if (entry.name === milestoneId) continue;
+        const srcDir = join(milestonesDir, entry.name);
+        const dstDir = join(shelterDir, entry.name);
+        try {
+          mkdirSync(shelterDir, { recursive: true });
+          cpSync(srcDir, dstDir, { recursive: true, force: true });
+          rmSync(srcDir, { recursive: true, force: true });
+          shelteredDirs.push(entry.name);
+        } catch {
+          // Non-fatal — if shelter fails, the merge may still succeed
+        }
+      }
+    }
+  } catch {
+    // Non-fatal — proceed with merge; untracked files may block it
+  }
+
+  // 7b. Clean up stale merge state before attempting squash merge (#2912).
+  // A leftover MERGE_HEAD (from a previous failed merge, libgit2 native path,
+  // or interrupted operation) causes `git merge --squash` to refuse with
+  // "fatal: You have not concluded your merge (MERGE_HEAD exists)".
+  // Defensively remove merge artifacts before starting.
+  try {
+    const gitDir_ = resolveGitDir(originalBasePath_);
+    for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+      const p = join(gitDir_, f);
+      if (existsSync(p)) unlinkSync(p);
+    }
+  } catch { /* best-effort */ }
+
   // 8. Squash merge — auto-resolve .gsd/ state file conflicts (#530)
   const mergeResult = nativeMergeSquash(originalBasePath_, milestoneBranch);
 
@@ -1434,6 +1587,16 @@ export function mergeMilestoneToMain(
     // untracked .gsd/ files left by syncStateToProjectRoot).  Preserve the
     // milestone branch so commits are not lost.
     if (mergeResult.conflicts.includes("__dirty_working_tree__")) {
+      // Defensively clean merge state — the native path may leave MERGE_HEAD
+      // even when the merge is rejected (#2912).
+      try {
+        const gitDir_ = resolveGitDir(originalBasePath_);
+        for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+          const p = join(gitDir_, f);
+          if (existsSync(p)) unlinkSync(p);
+        }
+      } catch { /* best-effort */ }
+
       // Pop stash before throwing so local work is not lost.
       if (stashed) {
         try {
@@ -1444,6 +1607,7 @@ export function mergeMilestoneToMain(
           });
         } catch { /* stash pop conflict is non-fatal */ }
       }
+      restoreShelter();
       // Restore cwd so the caller is not stranded on the integration branch
       process.chdir(previousCwd);
       // Surface the actual dirty filenames from git stderr instead of
@@ -1490,6 +1654,18 @@ export function mergeMilestoneToMain(
 
       // If there are still real code conflicts, escalate
       if (codeConflicts.length > 0) {
+        // Abort merge state so MERGE_HEAD is not left on disk (#2912).
+        // libgit2's merge creates MERGE_HEAD even for squash merges; if left
+        // dangling, subsequent merges fail and doctor reports corrupt state.
+        try { nativeMergeAbort(originalBasePath_); } catch { /* best-effort */ }
+        try {
+          const gitDir_ = resolveGitDir(originalBasePath_);
+          for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+            const p = join(gitDir_, f);
+            if (existsSync(p)) unlinkSync(p);
+          }
+        } catch { /* best-effort */ }
+
         // Pop stash before throwing so local work is not lost (#2151).
         if (stashed) {
           try {
@@ -1500,6 +1676,7 @@ export function mergeMilestoneToMain(
             });
           } catch { /* stash pop conflict is non-fatal */ }
         }
+        restoreShelter();
         throw new MergeConflictError(
           codeConflicts,
           "squash",
@@ -1515,14 +1692,18 @@ export function mergeMilestoneToMain(
   const commitResult = nativeCommit(originalBasePath_, commitMessage);
   const nothingToCommit = commitResult === null;
 
-  // 9a. Clean up SQUASH_MSG left by git merge --squash (#1853).
+  // 9a. Clean up merge state files left by git merge --squash (#1853, #2912).
   // git only removes SQUASH_MSG when the commit reads it directly (plain
   // `git commit`).  nativeCommit uses `-F -` (stdin) or libgit2, neither
-  // of which trigger git's SQUASH_MSG cleanup.  If left on disk, doctor
-  // reports `corrupt_merge_state` on every subsequent run.
+  // of which trigger git's SQUASH_MSG cleanup.  MERGE_HEAD is created by
+  // libgit2's merge even in squash mode and is not removed by nativeCommit.
+  // If left on disk, doctor reports `corrupt_merge_state` on every subsequent run.
   try {
-    const squashMsgPath = join(resolveGitDir(originalBasePath_), "SQUASH_MSG");
-    if (existsSync(squashMsgPath)) unlinkSync(squashMsgPath);
+    const gitDir_ = resolveGitDir(originalBasePath_);
+    for (const f of ["SQUASH_MSG", "MERGE_MSG", "MERGE_HEAD"]) {
+      const p = join(gitDir_, f);
+      if (existsSync(p)) unlinkSync(p);
+    }
   } catch { /* best-effort */ }
 
   // 9a-ii. Restore stashed files now that the merge+commit is complete (#2151).
@@ -1581,6 +1762,9 @@ export function mergeMilestoneToMain(
     }
   }
 
+  // 9a-iii. Restore sheltered queued milestone directories (#2505).
+  restoreShelter();
+
   // 9b. Safety check (#1792): if nothing was committed, verify the milestone
   // work is already on the integration branch before allowing teardown.
   // Compare only non-.gsd/ paths — .gsd/ state files diverge normally and
diff --git a/src/resources/extensions/gsd/auto/loop-deps.ts b/src/resources/extensions/gsd/auto/loop-deps.ts
index 565dde5a3..a7678d85f 100644
--- a/src/resources/extensions/gsd/auto/loop-deps.ts
+++ b/src/resources/extensions/gsd/auto/loop-deps.ts
@@ -93,6 +93,7 @@ export interface LoopDeps {
     body: string,
     kind: string,
     category: string,
+    projectName?: string,
   ) => void;
   setActiveMilestoneId: (basePath: string, mid: string) => void;
   pruneQueueOrder: (basePath: string, pendingIds: string[]) => void;
diff --git a/src/resources/extensions/gsd/auto/phases.ts b/src/resources/extensions/gsd/auto/phases.ts
index 06778ff1b..620fe6809 100644
--- a/src/resources/extensions/gsd/auto/phases.ts
+++ b/src/resources/extensions/gsd/auto/phases.ts
@@ -26,7 +26,7 @@ import { runUnit } from "./run-unit.js";
 import { debugLog } from "../debug-logger.js";
 import { PROJECT_FILES } from "../detection.js";
 import { MergeConflictError } from "../git-service.js";
-import { join } from "node:path";
+import { join, basename } from "node:path";
 import { existsSync, cpSync } from "node:fs";
 import { logWarning, logError } from "../workflow-logger.js";
 import { gsdRoot } from "../paths.js";
@@ -230,6 +230,7 @@ export async function runPreDispatch(
       `Milestone ${s.currentMilestoneId} complete!`,
       "success",
       "milestone",
+      basename(s.originalBasePath || s.basePath),
     );
     deps.logCmuxEvent(
       prefs,
@@ -388,6 +389,7 @@ export async function runPreDispatch(
         "All milestones complete!",
         "success",
         "milestone",
+        basename(s.originalBasePath || s.basePath),
       );
       deps.logCmuxEvent(
         prefs,
@@ -411,7 +413,7 @@ export async function runPreDispatch(
       const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
       await deps.stopAuto(ctx, pi, blockerMsg);
       ctx.ui.notify(`${blockerMsg}. Fix and run /gsd auto.`, "warning");
-      deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention");
+      deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention", basename(s.originalBasePath || s.basePath));
       deps.logCmuxEvent(prefs, blockerMsg, "error");
     } else {
       const ids = incomplete.map((m: { id: string }) => m.id).join(", ");
@@ -492,6 +494,7 @@ export async function runPreDispatch(
       `Milestone ${mid} complete!`,
       "success",
       "milestone",
+      basename(s.originalBasePath || s.basePath),
     );
     deps.logCmuxEvent(
       prefs,
@@ -509,7 +512,7 @@ export async function runPreDispatch(
     const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
     await closeoutAndStop(ctx, pi, s, deps, blockerMsg);
     ctx.ui.notify(`${blockerMsg}. Fix and run /gsd auto.`, "warning");
-    deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention");
+    deps.sendDesktopNotification("GSD", blockerMsg, "error", "attention", basename(s.originalBasePath || s.basePath));
     deps.logCmuxEvent(prefs, blockerMsg, "error");
     debugLog("autoLoop", { phase: "exit", reason: "blocked" });
     deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "terminal", data: { reason: "blocked", blockers: state.blockers } });
@@ -755,7 +758,7 @@ export async function runGuards(
         // 100% — special enforcement logic (halt/pause/warn)
         const msg = `Budget ceiling ${deps.formatCost(budgetCeiling)} reached (spent ${deps.formatCost(totalCost)}).`;
         if (budgetEnforcementAction === "halt") {
-          deps.sendDesktopNotification("GSD", msg, "error", "budget");
+          deps.sendDesktopNotification("GSD", msg, "error", "budget", basename(s.originalBasePath || s.basePath));
           await deps.stopAuto(ctx, pi, "Budget ceiling reached");
           debugLog("autoLoop", { phase: "exit", reason: "budget-halt" });
           return { action: "break", reason: "budget-halt" };
@@ -765,14 +768,14 @@ export async function runGuards(
             `${msg} Pausing auto-mode — /gsd auto to override and continue.`,
             "warning",
           );
-          deps.sendDesktopNotification("GSD", msg, "warning", "budget");
+          deps.sendDesktopNotification("GSD", msg, "warning", "budget", basename(s.originalBasePath || s.basePath));
           deps.logCmuxEvent(prefs, msg, "warning");
           await deps.pauseAuto(ctx, pi);
           debugLog("autoLoop", { phase: "exit", reason: "budget-pause" });
           return { action: "break", reason: "budget-pause" };
         }
         ctx.ui.notify(`${msg} Continuing (enforcement: warn).`, "warning");
-        deps.sendDesktopNotification("GSD", msg, "warning", "budget");
+        deps.sendDesktopNotification("GSD", msg, "warning", "budget", basename(s.originalBasePath || s.basePath));
         deps.logCmuxEvent(prefs, msg, "warning");
       } else if (threshold.pct < 100) {
         // Sub-100% — simple notification
@@ -783,6 +786,7 @@ export async function runGuards(
           msg,
           threshold.notifyLevel,
           "budget",
+          basename(s.originalBasePath || s.basePath),
         );
         deps.logCmuxEvent(prefs, msg, threshold.cmuxLevel);
       }
@@ -812,6 +816,7 @@ export async function runGuards(
         `Context ${contextUsage.percent}% — paused`,
         "warning",
         "attention",
+        basename(s.originalBasePath || s.basePath),
       );
       await deps.pauseAuto(ctx, pi);
       debugLog("autoLoop", { phase: "exit", reason: "context-window" });
@@ -929,6 +934,23 @@ export async function runUnitPhase(
     },
   );
 
+  // Select and apply model (with tier escalation on retry — normal units only)
+  const modelResult = await deps.selectAndApplyModel(
+    ctx,
+    pi,
+    unitType,
+    unitId,
+    s.basePath,
+    prefs,
+    s.verbose,
+    s.autoModeStartModel,
+    sidecarItem ? undefined : { isRetry, previousTier },
+  );
+  s.currentUnitRouting =
+    modelResult.routing as AutoSession["currentUnitRouting"];
+  s.currentUnitModel =
+    modelResult.appliedModel as AutoSession["currentUnitModel"];
+
   // Status bar + progress widget
   ctx.ui.setStatus("gsd-auto", "auto");
   if (mid)
@@ -1001,23 +1023,6 @@ export async function runUnitPhase(
     logWarning("engine", "Prompt reorder failed", { error: msg });
   }
 
-  // Select and apply model (with tier escalation on retry — normal units only)
-  const modelResult = await deps.selectAndApplyModel(
-    ctx,
-    pi,
-    unitType,
-    unitId,
-    s.basePath,
-    prefs,
-    s.verbose,
-    s.autoModeStartModel,
-    sidecarItem ? undefined : { isRetry, previousTier },
-  );
-  s.currentUnitRouting =
-    modelResult.routing as AutoSession["currentUnitRouting"];
-  s.currentUnitModel =
-    modelResult.appliedModel as AutoSession["currentUnitModel"];
-
   // Apply sidecar/pre-dispatch hook model override (takes priority over standard model selection)
   const hookModelOverride = sidecarItem?.model ?? iterData.hookModelOverride;
   if (hookModelOverride) {
@@ -1142,14 +1147,18 @@ export async function runUnitPhase(
   // ── Immediate unit closeout (metrics, activity log, memory) ────────
   // Run right after runUnit() returns so telemetry is never lost to a
   // crash between iterations.
-  await deps.closeoutUnit(
-    ctx,
-    s.basePath,
-    unitType,
-    unitId,
-    s.currentUnit.startedAt,
-    deps.buildSnapshotOpts(unitType, unitId),
-  );
+  // Guard: stopAuto() may have nulled s.currentUnit via s.reset() while
+  // this coroutine was suspended at `await runUnit(...)` (#2939).
+  if (s.currentUnit) {
+    await deps.closeoutUnit(
+      ctx,
+      s.basePath,
+      unitType,
+      unitId,
+      s.currentUnit.startedAt,
+      deps.buildSnapshotOpts(unitType, unitId),
+    );
+  }
 
   // ── Zero tool-call guard (#1833) ──────────────────────────────────
   // An execute-task agent that completes with 0 tool calls made no
@@ -1159,7 +1168,7 @@ export async function runUnitPhase(
     const currentLedger = deps.getLedger() as { units: Array<{ type: string; id: string; startedAt: number; toolCalls: number }> } | null;
     if (currentLedger?.units) {
       const lastUnit = [...currentLedger.units].reverse().find(
-        (u: { type: string; id: string; startedAt: number; toolCalls: number }) => u.type === unitType && u.id === unitId && u.startedAt === s.currentUnit!.startedAt,
+        (u: { type: string; id: string; startedAt: number; toolCalls: number }) => u.type === unitType && u.id === unitId && u.startedAt === s.currentUnit?.startedAt,
       );
       if (lastUnit && lastUnit.toolCalls === 0) {
         debugLog("runUnitPhase", {
@@ -1174,7 +1183,7 @@ export async function runUnitPhase(
         );
         // Fall through to next iteration where dispatch will re-derive
         // and re-dispatch this task.
-        return { action: "next", data: { unitStartedAt: s.currentUnit.startedAt } };
+        return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
       }
     }
   }
@@ -1198,7 +1207,7 @@ export async function runUnitPhase(
 
   deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
 
-  return { action: "next", data: { unitStartedAt: s.currentUnit.startedAt } };
+  return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
 }
 
 // ─── runFinalize ──────────────────────────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts b/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts
index 22dd56075..5e40359b7 100644
--- a/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts
+++ b/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts
@@ -68,6 +68,28 @@ export async function handleAgentEnd(
 
   const lastMsg = event.messages[event.messages.length - 1];
   if (lastMsg && "stopReason" in lastMsg && lastMsg.stopReason === "aborted") {
+    // Empty content with aborted stopReason is a non-fatal agent stop (the LLM
+    // chose to end without producing output). Only pause on genuine fatal aborts
+    // that carry error context — e.g. errorMessage field or non-empty content
+    // indicating a mid-stream failure. (#2695)
+    const content = "content" in lastMsg ? lastMsg.content : undefined;
+    const hasEmptyContent = Array.isArray(content) && content.length === 0;
+    const hasErrorMessage = "errorMessage" in lastMsg && !!lastMsg.errorMessage;
+
+    if (hasEmptyContent && !hasErrorMessage) {
+      // Non-fatal: treat as a normal agent end so the loop can continue
+      // instead of entering a stuck re-dispatch cycle.
+      try {
+        resetRetryState(retryState);
+        resolveAgentEnd(event);
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        ctx.ui.notify(`Auto-mode error after empty-content abort: ${message}. Stopping auto-mode.`, "error");
+        try { await pauseAuto(ctx, pi); } catch { /* best-effort */ }
+      }
+      return;
+    }
+
     await pauseAuto(ctx, pi);
     return;
   }
@@ -79,6 +101,15 @@ export async function handleAgentEnd(
     // ── 1. Classify ──────────────────────────────────────────────────────
     const cls = classifyError(errorMsg, explicitRetryAfterMs);
 
+    // Cap rate-limit backoff for CLI-style providers (openai-codex, google-gemini-cli)
+    // which use per-user quotas with shorter windows (#2922).
+    if (cls.kind === "rate-limit") {
+      const currentProvider = ctx.model?.provider;
+      if (currentProvider === "openai-codex" || currentProvider === "google-gemini-cli") {
+        cls.retryAfterMs = Math.min(cls.retryAfterMs, 30_000);
+      }
+    }
+
     // ── 2. Decide & Act ──────────────────────────────────────────────────
 
     // --- Network errors: same-model retry with backoff ---
diff --git a/src/resources/extensions/gsd/bootstrap/db-tools.ts b/src/resources/extensions/gsd/bootstrap/db-tools.ts
index 8e6e490d2..d4ebe91dc 100644
--- a/src/resources/extensions/gsd/bootstrap/db-tools.ts
+++ b/src/resources/extensions/gsd/bootstrap/db-tools.ts
@@ -121,14 +121,6 @@ export function registerDbTools(pi: ExtensionAPI): void {
       };
     }
     try {
-      const db = await import("../gsd-db.js");
-      const existing = db.getRequirementById(params.id);
-      if (!existing) {
-        return {
-          content: [{ type: "text" as const, text: `Error: Requirement ${params.id} not found.` }],
-          details: { operation: "update_requirement", id: params.id, error: "not_found" } as any,
-        };
-      }
       const { updateRequirementInDb } = await import("../db-writer.js");
       const updates: Record<string, string | undefined> = {};
       if (params.status !== undefined) updates.status = params.status;
@@ -196,6 +188,91 @@ export function registerDbTools(pi: ExtensionAPI): void {
   pi.registerTool(requirementUpdateTool);
   registerAlias(pi, requirementUpdateTool, "gsd_update_requirement", "gsd_requirement_update");
 
+  // ─── gsd_requirement_save ─────────────────────────────────────────────
+
+  const requirementSaveExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const dbAvailable = await ensureDbOpen();
+    if (!dbAvailable) {
+      return {
+        content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot save requirement." }],
+        details: { operation: "save_requirement", error: "db_unavailable" } as any,
+      };
+    }
+    try {
+      const { saveRequirementToDb } = await import("../db-writer.js");
+      const result = await saveRequirementToDb(
+        {
+          class: params.class,
+          status: params.status,
+          description: params.description,
+          why: params.why,
+          source: params.source,
+          primary_owner: params.primary_owner,
+          supporting_slices: params.supporting_slices,
+          validation: params.validation,
+          notes: params.notes,
+        },
+        process.cwd(),
+      );
+      return {
+        content: [{ type: "text" as const, text: `Saved requirement ${result.id}` }],
+        details: { operation: "save_requirement", id: result.id } as any,
+      };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      logError("tool", `gsd_requirement_save tool failed: ${msg}`, { tool: "gsd_requirement_save", error: String(err) });
+      return {
+        content: [{ type: "text" as const, text: `Error saving requirement: ${msg}` }],
+        details: { operation: "save_requirement", error: msg } as any,
+      };
+    }
+  };
+
+  const requirementSaveTool = {
+    name: "gsd_requirement_save",
+    label: "Save Requirement",
+    description:
+      "Record a new requirement to the GSD database and regenerate REQUIREMENTS.md. " +
+      "Requirement IDs are auto-assigned — never provide an ID manually.",
+    promptSnippet: "Record a new GSD requirement to the database (auto-assigns ID, regenerates REQUIREMENTS.md)",
+    promptGuidelines: [
+      "Use gsd_requirement_save when recording a new functional, non-functional, or operational requirement.",
+      "Requirement IDs are auto-assigned (R001, R002, ...) — never guess or provide an ID.",
+      "class, description, why, and source are required. All other fields are optional.",
+      "The tool writes to the DB and regenerates .gsd/REQUIREMENTS.md automatically.",
+    ],
+    parameters: Type.Object({
+      class: Type.String({ description: "Requirement class (e.g. 'functional', 'non-functional', 'operational')" }),
+      description: Type.String({ description: "Short description of the requirement" }),
+      why: Type.String({ description: "Why this requirement matters" }),
+      source: Type.String({ description: "Origin of the requirement (e.g. 'user-research', 'design', 'M001')" }),
+      status: Type.Optional(Type.String({ description: "Status (default: 'active')" })),
+      primary_owner: Type.Optional(Type.String({ description: "Primary owning slice" })),
+      supporting_slices: Type.Optional(Type.String({ description: "Supporting slices" })),
+      validation: Type.Optional(Type.String({ description: "Validation criteria" })),
+      notes: Type.Optional(Type.String({ description: "Additional notes" })),
+    }),
+    execute: requirementSaveExecute,
+    renderCall(args: any, theme: any) {
+      let text = theme.fg("toolTitle", theme.bold("requirement_save "));
+      if (args.class) text += theme.fg("accent", `[${args.class}] `);
+      if (args.description) text += theme.fg("muted", args.description);
+      return new Text(text, 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = result.details;
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+      }
+      let text = theme.fg("success", `Requirement ${d?.id ?? ""} saved`);
+      text += theme.fg("dim", ` → REQUIREMENTS.md`);
+      return new Text(text, 0, 0);
+    },
+  };
+
+  pi.registerTool(requirementSaveTool);
+  registerAlias(pi, requirementSaveTool, "gsd_save_requirement", "gsd_requirement_save");
+
   // ─── gsd_summary_save (formerly gsd_save_summary) ──────────────────────
 
   const summarySaveExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
diff --git a/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts b/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts
index a261555a3..79b5a9ae6 100644
--- a/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts
+++ b/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts
@@ -32,6 +32,31 @@ export function resolveProjectRootDbPath(basePath: string): string {
     return join(projectRoot, ".gsd", "gsd.db");
   }
 
+  // Symlink-resolved layout: /.gsd/projects/<hash>/worktrees/M001/...
+  // The project root is everything before /.gsd/projects/ (#2517)
+  const symlinkMarker = `${sep}.gsd${sep}projects${sep}`;
+  const symlinkIdx = basePath.indexOf(symlinkMarker);
+  if (symlinkIdx !== -1) {
+    const afterProjects = basePath.slice(symlinkIdx + symlinkMarker.length);
+    // Expect: <hash>/worktrees/...
+    const worktreeSeg = `${sep}worktrees${sep}`;
+    if (afterProjects.includes(worktreeSeg)) {
+      const projectRoot = basePath.slice(0, symlinkIdx);
+      return join(projectRoot, ".gsd", "gsd.db");
+    }
+  }
+
+  // Forward-slash variant for symlink-resolved layout
+  const fwdSymlinkMarker = "/.gsd/projects/";
+  const fwdSymlinkIdx = basePath.indexOf(fwdSymlinkMarker);
+  if (fwdSymlinkIdx !== -1) {
+    const afterProjects = basePath.slice(fwdSymlinkIdx + fwdSymlinkMarker.length);
+    if (afterProjects.includes("/worktrees/")) {
+      const projectRoot = basePath.slice(0, fwdSymlinkIdx);
+      return join(projectRoot, ".gsd", "gsd.db");
+    }
+  }
+
   return join(basePath, ".gsd", "gsd.db");
 }
 
@@ -81,8 +106,20 @@ export async function ensureDbOpen(): Promise<boolean> {
       return opened;
     }
 
+    process.stderr.write(
+      `gsd-db: ensureDbOpen failed — no .gsd directory found (resolvedPath=${resolveProjectRootDbPath(basePath)}, cwd=${basePath})\n`,
+    );
     return false;
-  } catch {
+  } catch (err) {
+    const basePath = process.cwd();
+    const diagnostic = {
+      resolvedPath: resolveProjectRootDbPath(basePath),
+      cwd: basePath,
+      error: (err as Error).message ?? String(err),
+    };
+    process.stderr.write(
+      `gsd-db: ensureDbOpen failed — ${JSON.stringify(diagnostic)}\n`,
+    );
     return false;
   }
 }
diff --git a/src/resources/extensions/gsd/bootstrap/register-hooks.ts b/src/resources/extensions/gsd/bootstrap/register-hooks.ts
index 4fd7a1292..d8690c7a3 100644
--- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts
@@ -6,8 +6,9 @@ import { isToolCallEventType } from "@gsd/pi-coding-agent";
 import { buildMilestoneFileName, resolveMilestonePath, resolveSliceFile, resolveSlicePath } from "../paths.js";
 import { buildBeforeAgentStartResult } from "./system-context.js";
 import { handleAgentEnd } from "./agent-end-recovery.js";
-import { clearDiscussionFlowState, isDepthVerified, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite } from "./write-gate.js";
+import { clearDiscussionFlowState, isDepthVerified, isQueuePhaseActive, markDepthVerified, resetWriteGateState, shouldBlockContextWrite, shouldBlockQueueExecution } from "./write-gate.js";
 import { isBlockedStateFile, isBashWriteToStateFile, BLOCKED_WRITE_ERROR } from "../write-intercept.js";
+import { cleanupQuickBranch } from "../quick.js";
 import { getDiscussionMilestoneId } from "../guided-flow.js";
 import { loadToolApiKeys } from "../commands-config.js";
 import { loadFile, saveFile, formatContinue } from "../files.js";
@@ -16,8 +17,6 @@ import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markTool
 import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js";
 import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
 import { saveActivityLog } from "../activity-log.js";
-import { startRtkStatusUpdates, stopRtkStatusUpdates } from "../rtk-status.js";
-import { rewriteCommandWithRtk } from "../../shared/rtk.js";
 
 // Skip the welcome screen on the very first session_start — cli.ts already
 // printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
@@ -29,19 +28,10 @@ async function syncServiceTierStatus(ctx: ExtensionContext): Promise<void> {
 }
 
 export function registerHooks(pi: ExtensionAPI): void {
-  // Route all agent bash tool commands through RTK rewrite when opted in.
-  // This is a no-op when RTK is disabled or not installed.
-  pi.on("bash_transform", async (event) => {
-    const rewritten = rewriteCommandWithRtk(event.command);
-    if (rewritten === event.command) return undefined;
-    return { command: rewritten };
-  });
-
   pi.on("session_start", async (_event, ctx) => {
     resetWriteGateState();
     resetToolCallLoopGuard();
     await syncServiceTierStatus(ctx);
-    startRtkStatusUpdates(ctx);
 
     // Apply show_token_cost preference (#1515)
     try {
@@ -86,11 +76,6 @@ export function registerHooks(pi: ExtensionAPI): void {
     clearDiscussionFlowState();
     await syncServiceTierStatus(ctx);
     loadToolApiKeys();
-    startRtkStatusUpdates(ctx);
-  });
-
-  pi.on("session_fork", async (_event, ctx) => {
-    startRtkStatusUpdates(ctx);
   });
 
   pi.on("before_agent_start", async (event, ctx: ExtensionContext) => {
@@ -102,6 +87,17 @@ export function registerHooks(pi: ExtensionAPI): void {
     await handleAgentEnd(pi, event, ctx);
   });
 
+  // Squash-merge quick-task branch back to the original branch after the
+  // agent turn completes (#2668). cleanupQuickBranch is a no-op when no
+  // quick-return state is pending, so this is safe to call on every turn.
+  pi.on("turn_end", async () => {
+    try {
+      cleanupQuickBranch();
+    } catch {
+      // Best-effort: don't break the turn lifecycle if cleanup fails.
+    }
+  });
+
   pi.on("session_before_compact", async () => {
     if (isAutoActive() || isAutoPaused()) {
       return { cancel: true };
@@ -139,7 +135,6 @@ export function registerHooks(pi: ExtensionAPI): void {
   });
 
   pi.on("session_shutdown", async (_event, ctx: ExtensionContext) => {
-    stopRtkStatusUpdates(ctx);
     if (isParallelActive()) {
       try {
         await shutdownParallel(process.cwd());
@@ -161,6 +156,23 @@ export function registerHooks(pi: ExtensionAPI): void {
       return { block: true, reason: loopCheck.reason };
     }
 
+    // ── Queue-mode execution guard (#2545): block source-code mutations ──
+    // When /gsd queue is active, the agent should only create milestones,
+    // not execute work. Block write/edit to non-.gsd/ paths and bash commands
+    // that would modify files.
+    if (isQueuePhaseActive()) {
+      let queueInput = "";
+      if (isToolCallEventType("write", event)) {
+        queueInput = event.input.path;
+      } else if (isToolCallEventType("edit", event)) {
+        queueInput = event.input.path;
+      } else if (isToolCallEventType("bash", event)) {
+        queueInput = event.input.command;
+      }
+      const queueGuard = shouldBlockQueueExecution(event.toolName, queueInput, true);
+      if (queueGuard.block) return queueGuard;
+    }
+
     // ── Single-writer engine: block direct writes to STATE.md ──────────
     // Covers write, edit, and bash tools to prevent bypass vectors.
     if (isToolCallEventType("write", event)) {
@@ -245,7 +257,7 @@ export function registerHooks(pi: ExtensionAPI): void {
 
   pi.on("tool_execution_start", async (event) => {
     if (!isAutoActive()) return;
-    markToolStart(event.toolCallId, event.toolName);
+    markToolStart(event.toolCallId);
   });
 
   pi.on("tool_execution_end", async (event) => {
diff --git a/src/resources/extensions/gsd/bootstrap/system-context.ts b/src/resources/extensions/gsd/bootstrap/system-context.ts
index 0a8255fdc..94930375a 100644
--- a/src/resources/extensions/gsd/bootstrap/system-context.ts
+++ b/src/resources/extensions/gsd/bootstrap/system-context.ts
@@ -1,4 +1,4 @@
-import { existsSync, readFileSync } from "node:fs";
+import { existsSync, readFileSync, unlinkSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 
@@ -6,6 +6,7 @@ import type { ExtensionContext } from "@gsd/pi-coding-agent";
 
 import { debugTime } from "../debug-logger.js";
 import { loadPrompt } from "../prompt-loader.js";
+import { readForensicsMarker } from "../forensics.js";
 import { resolveAllSkillReferences, renderPreferencesForSystemPrompt, loadEffectiveGSDPreferences } from "../preferences.js";
 import { resolveGsdRootFile, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTaskFiles, resolveTasksDir, relSliceFile, relSlicePath, relTaskFile } from "../paths.js";
 import { hasSkillSnapshot, detectNewSkills, formatSkillsXml } from "../skill-discovery.js";
@@ -94,30 +95,54 @@ export async function buildBeforeAgentStartResult(
     }
   }
 
+  let codebaseBlock = "";
+  const codebasePath = resolveGsdRootFile(process.cwd(), "CODEBASE");
+  if (existsSync(codebasePath)) {
+    try {
+      const rawContent = readFileSync(codebasePath, "utf-8").trim();
+      if (rawContent) {
+        // Cap injection size to ~2 000 tokens to avoid bloating every request.
+        // Full map is always available at .gsd/CODEBASE.md.
+        const MAX_CODEBASE_CHARS = 8_000;
+        const generatedMatch = rawContent.match(/Generated: (\S+)/);
+        const generatedAt = generatedMatch?.[1] ?? "unknown";
+        const content = rawContent.length > MAX_CODEBASE_CHARS
+          ? rawContent.slice(0, MAX_CODEBASE_CHARS) + "\n\n*(truncated — see .gsd/CODEBASE.md for full map)*"
+          : rawContent;
+        codebaseBlock = `\n\n[PROJECT CODEBASE — File structure and descriptions (generated ${generatedAt}, may be stale — run /gsd codebase update to refresh)]\n\n${content}`;
+      }
+    } catch {
+      // skip
+    }
+  }
+
   warnDeprecatedAgentInstructions();
 
   const injection = await buildGuidedExecuteContextInjection(event.prompt, process.cwd());
+
+  // Re-inject forensics context on follow-up turns (#2941)
+  const forensicsInjection = !injection ? buildForensicsContextInjection(process.cwd()) : null;
+
   const worktreeBlock = buildWorktreeContextBlock();
-  const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — GSD]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}`;
+  const fullSystem = `${event.systemPrompt}\n\n[SYSTEM CONTEXT — GSD]\n\n${systemContent}${preferenceBlock}${knowledgeBlock}${codebaseBlock}${memoryBlock}${newSkillsBlock}${worktreeBlock}`;
 
   stopContextTimer({
     systemPromptSize: fullSystem.length,
-    injectionSize: injection?.length ?? 0,
+    injectionSize: injection?.length ?? forensicsInjection?.length ?? 0,
     hasPreferences: preferenceBlock.length > 0,
     hasNewSkills: newSkillsBlock.length > 0,
   });
 
+  // Determine which context message to inject (guided execute takes priority)
+  const contextMessage = injection
+    ? { customType: "gsd-guided-context", content: injection, display: false as const }
+    : forensicsInjection
+      ? { customType: "gsd-forensics", content: forensicsInjection, display: false as const }
+      : null;
+
   return {
     systemPrompt: fullSystem,
-    ...(injection
-      ? {
-        message: {
-          customType: "gsd-guided-context",
-          content: injection,
-          display: false as const,
-        },
-      }
-      : {}),
+    ...(contextMessage ? { message: contextMessage } : {}),
   };
 }
 
@@ -375,3 +400,38 @@ function oneLine(text: string): string {
   return text.replace(/\s+/g, " ").trim();
 }
 
+// ─── Forensics Context Re-injection (#2941) ──────────────────────────────────
+
+/**
+ * Check for an active forensics session and return the prompt content
+ * so it can be re-injected on follow-up turns.
+ */
+function buildForensicsContextInjection(basePath: string): string | null {
+  const marker = readForensicsMarker(basePath);
+  if (!marker) return null;
+
+  // Expire markers older than 2 hours to avoid stale context
+  const age = Date.now() - new Date(marker.createdAt).getTime();
+  if (age > 2 * 60 * 60 * 1000) {
+    clearForensicsMarker(basePath);
+    return null;
+  }
+
+  return marker.promptContent;
+}
+
+/**
+ * Remove the active forensics marker file, e.g. when the investigation
+ * is complete or the session expires.
+ */
+export function clearForensicsMarker(basePath: string): void {
+  const markerPath = join(basePath, ".gsd", "runtime", "active-forensics.json");
+  if (existsSync(markerPath)) {
+    try {
+      unlinkSync(markerPath);
+    } catch {
+      // non-fatal
+    }
+  }
+}
+
diff --git a/src/resources/extensions/gsd/bootstrap/write-gate.ts b/src/resources/extensions/gsd/bootstrap/write-gate.ts
index 75a964021..c73d7f828 100644
--- a/src/resources/extensions/gsd/bootstrap/write-gate.ts
+++ b/src/resources/extensions/gsd/bootstrap/write-gate.ts
@@ -1,5 +1,31 @@
 const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/;
 
+/**
+ * Path segment that identifies .gsd/ planning artifacts.
+ * Writes to these paths are allowed during queue mode.
+ */
+const GSD_DIR_RE = /(^|[/\\])\.gsd([/\\]|$)/;
+
+/**
+ * Read-only tool names that are always safe during queue mode.
+ */
+const QUEUE_SAFE_TOOLS = new Set([
+  "read", "grep", "find", "ls", "glob",
+  // Discussion & planning tools
+  "ask_user_questions",
+  "gsd_milestone_generate_id",
+  "gsd_summary_save",
+  // Web research tools used during queue discussion
+  "search-the-web", "resolve_library", "get_library_docs", "fetch_page",
+  "search_and_read",
+]);
+
+/**
+ * Bash commands that are read-only / investigative — safe during queue mode.
+ * Matches the leading command in a bash invocation.
+ */
+const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s)/;
+
 let depthVerificationDone = false;
 let activeQueuePhase = false;
 
@@ -49,3 +75,52 @@ export function shouldBlockContextWrite(
   };
 }
 
+/**
+ * Queue-mode execution guard (#2545).
+ *
+ * When the queue phase is active, the agent should only create planning
+ * artifacts (milestones, CONTEXT.md, QUEUE.md, etc.) — never execute work.
+ * This function blocks write/edit/bash tool calls that would modify source
+ * code outside of .gsd/.
+ *
+ * @param toolName  The tool being called (write, edit, bash, etc.)
+ * @param input     For write/edit: the file path. For bash: the command string.
+ * @param queuePhaseActive  Whether the queue phase is currently active.
+ * @returns { block, reason } — block=true if the call should be rejected.
+ */
+export function shouldBlockQueueExecution(
+  toolName: string,
+  input: string,
+  queuePhaseActive: boolean,
+): { block: boolean; reason?: string } {
+  if (!queuePhaseActive) return { block: false };
+
+  // Always-safe tools (read-only, discussion, planning)
+  if (QUEUE_SAFE_TOOLS.has(toolName)) return { block: false };
+
+  // write/edit — allow if targeting .gsd/ planning artifacts
+  if (toolName === "write" || toolName === "edit") {
+    if (GSD_DIR_RE.test(input)) return { block: false };
+    return {
+      block: true,
+      reason: `Blocked: /gsd queue is a planning tool — it creates milestones, not executes work. ` +
+        `Cannot ${toolName} to "${input}" during queue mode. ` +
+        `Write CONTEXT.md files and update PROJECT.md/QUEUE.md instead.`,
+    };
+  }
+
+  // bash — allow read-only/investigative commands, block everything else
+  if (toolName === "bash") {
+    if (BASH_READ_ONLY_RE.test(input)) return { block: false };
+    return {
+      block: true,
+      reason: `Blocked: /gsd queue is a planning tool — it creates milestones, not executes work. ` +
+        `Cannot run "${input.slice(0, 80)}${input.length > 80 ? "…" : ""}" during queue mode. ` +
+        `Use read-only commands (cat, grep, git log, etc.) to investigate, then write planning artifacts.`,
+    };
+  }
+
+  // Unknown tools — allow by default (custom extension tools, etc.)
+  return { block: false };
+}
+
diff --git a/src/resources/extensions/gsd/captures.ts b/src/resources/extensions/gsd/captures.ts
index 72447876e..645d907f6 100644
--- a/src/resources/extensions/gsd/captures.ts
+++ b/src/resources/extensions/gsd/captures.ts
@@ -26,6 +26,7 @@ export interface CaptureEntry {
   resolution?: string;
   rationale?: string;
   resolvedAt?: string;
+  resolvedInMilestone?: string;
   executed?: boolean;
 }
 
@@ -176,6 +177,7 @@ export function markCaptureResolved(
   classification: Classification,
   resolution: string,
   rationale: string,
+  milestoneId?: string,
 ): void {
   const filePath = resolveCapturesPath(basePath);
   if (!existsSync(filePath)) return;
@@ -206,13 +208,17 @@ export function markCaptureResolved(
     `**Rationale:** ${rationale}`,
     `**Resolved:** ${resolvedAt}`,
   ];
+  if (milestoneId) {
+    newFields.push(`**Milestone:** ${milestoneId}`);
+  }
 
-  // Remove any existing classification/resolution/rationale/resolved fields
+  // Remove any existing classification/resolution/rationale/resolved/milestone fields
   // (in case of re-triage)
   section = section.replace(/\*\*Classification:\*\*\s*.+\n?/g, "");
   section = section.replace(/\*\*Resolution:\*\*\s*.+\n?/g, "");
   section = section.replace(/\*\*Rationale:\*\*\s*.+\n?/g, "");
   section = section.replace(/\*\*Resolved:\*\*\s*.+\n?/g, "");
+  section = section.replace(/\*\*Milestone:\*\*\s*.+\n?/g, "");
 
   // Add new fields after Status line
   section = section.trimEnd() + "\n" + newFields.join("\n") + "\n";
@@ -255,18 +261,70 @@ export function markCaptureExecuted(basePath: string, captureId: string): void {
  * Load resolved captures that have actionable classifications (inject, replan,
  * quick-task) but have NOT yet been executed.
  * These are captures whose resolutions need to be carried out.
+ *
+ * When `currentMilestoneId` is provided, captures resolved in a *different*
+ * milestone are treated as stale and excluded.  This prevents quick-task
+ * captures from a prior milestone re-executing after the underlying issues
+ * were already fixed by planned milestone work (#2872).
+ *
+ * Captures that have no `resolvedInMilestone` (legacy captures resolved before
+ * this field was introduced) are always included for backward compatibility.
  */
-export function loadActionableCaptures(basePath: string): CaptureEntry[] {
+export function loadActionableCaptures(basePath: string, currentMilestoneId?: string): CaptureEntry[] {
   return loadAllCaptures(basePath).filter(
     c =>
       c.status === "resolved" &&
       !c.executed &&
       (c.classification === "inject" ||
         c.classification === "replan" ||
-        c.classification === "quick-task"),
+        c.classification === "quick-task") &&
+      // Staleness gate: exclude captures resolved in a different milestone (#2872)
+      (!currentMilestoneId ||
+        !c.resolvedInMilestone ||
+        c.resolvedInMilestone === currentMilestoneId),
   );
 }
 
+/**
+ * Retroactively stamp a capture with a milestone ID.
+ *
+ * Used by executeTriageResolutions() as a safety net when the triage LLM
+ * resolves a capture without writing the **Milestone:** field.  This ensures
+ * the staleness gate in loadActionableCaptures() works correctly even for
+ * captures resolved before the prompt was updated (#2872).
+ */
+export function stampCaptureMilestone(basePath: string, captureId: string, milestoneId: string): void {
+  const filePath = resolveCapturesPath(basePath);
+  if (!existsSync(filePath)) return;
+
+  const content = readFileSync(filePath, "utf-8");
+
+  const sectionRegex = new RegExp(
+    `(### ${escapeRegex(captureId)}\\n(?:(?!### ).)*?)(?=### |$)`,
+    "s",
+  );
+  const match = sectionRegex.exec(content);
+  if (!match) return;
+
+  let section = match[1];
+
+  // Only stamp if not already present
+  if (/\*\*Milestone:\*\*/.test(section)) return;
+
+  // Insert after the Resolved field (or at end of section)
+  const resolvedFieldEnd = section.search(/\*\*Resolved:\*\*\s*.+\n?/);
+  if (resolvedFieldEnd !== -1) {
+    const resolvedMatch = section.match(/\*\*Resolved:\*\*\s*.+\n?/);
+    const insertPos = resolvedFieldEnd + (resolvedMatch?.[0]?.length ?? 0);
+    section = section.slice(0, insertPos) + `**Milestone:** ${milestoneId}\n` + section.slice(insertPos);
+  } else {
+    section = section.trimEnd() + "\n" + `**Milestone:** ${milestoneId}` + "\n";
+  }
+
+  const updated = content.replace(sectionRegex, section);
+  writeFileSync(filePath, updated, "utf-8");
+}
+
 // ─── Parser ───────────────────────────────────────────────────────────────────
 
 /**
@@ -291,6 +349,7 @@ function parseCapturesContent(content: string): CaptureEntry[] {
     const resolution = extractBoldField(body, "Resolution");
     const rationale = extractBoldField(body, "Rationale");
     const resolvedAt = extractBoldField(body, "Resolved");
+    const milestoneId = extractBoldField(body, "Milestone");
     const executedAt = extractBoldField(body, "Executed");
 
     if (!text || !timestamp) continue;
@@ -308,6 +367,7 @@ function parseCapturesContent(content: string): CaptureEntry[] {
       ...(resolution ? { resolution } : {}),
       ...(rationale ? { rationale } : {}),
       ...(resolvedAt ? { resolvedAt } : {}),
+      ...(milestoneId ? { resolvedInMilestone: milestoneId } : {}),
       ...(executedAt ? { executed: true } : {}),
     });
   }
diff --git a/src/resources/extensions/gsd/codebase-generator.ts b/src/resources/extensions/gsd/codebase-generator.ts
new file mode 100644
index 000000000..6fe558abb
--- /dev/null
+++ b/src/resources/extensions/gsd/codebase-generator.ts
@@ -0,0 +1,351 @@
+/**
+ * GSD Codebase Map Generator
+ *
+ * Produces .gsd/CODEBASE.md — a structural table of contents for the project.
+ * Gives fresh agent contexts instant orientation without filesystem exploration.
+ *
+ * Generation: walk `git ls-files`, group by directory, output with descriptions.
+ * Maintenance: agent updates descriptions as it works; incremental update preserves them.
+ */
+
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { join, dirname, extname } from "node:path";
+
+import { execSync } from "node:child_process";
+import { gsdRoot } from "./paths.js";
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+export interface CodebaseMapOptions {
+  excludePatterns?: string[];
+  maxFiles?: number;
+  collapseThreshold?: number;
+}
+
+interface FileEntry {
+  path: string;
+  description: string;
+}
+
+interface DirectoryGroup {
+  path: string;
+  files: FileEntry[];
+  collapsed: boolean;
+}
+
+// ─── Defaults ────────────────────────────────────────────────────────────────
+
+const DEFAULT_EXCLUDES = [
+  ".gsd/",
+  ".planning/",
+  ".git/",
+  "node_modules/",
+  "dist/",
+  "build/",
+  ".next/",
+  "coverage/",
+  "__pycache__/",
+  ".venv/",
+  "vendor/",
+];
+
+const DEFAULT_MAX_FILES = 500;
+const DEFAULT_COLLAPSE_THRESHOLD = 20;
+
+// ─── Parsing ─────────────────────────────────────────────────────────────────
+
+/**
+ * Parse an existing CODEBASE.md to extract file → description mappings.
+ * Also scans <!-- gsd:collapsed-descriptions --> comment blocks to preserve
+ * descriptions for files in collapsed directories across incremental updates.
+ */
+export function parseCodebaseMap(content: string): Map<string, string> {
+  const descriptions = new Map<string, string>();
+  let inCollapsedBlock = false;
+
+  for (const line of content.split("\n")) {
+    // Track collapsed-description comment blocks
+    if (line.trimStart().startsWith("<!-- gsd:collapsed-descriptions")) {
+      inCollapsedBlock = true;
+      continue;
+    }
+    if (inCollapsedBlock && line.trimStart().startsWith("-->")) {
+      inCollapsedBlock = false;
+      continue;
+    }
+
+    // Match: - `path/to/file.ts` — Description here
+    const match = line.match(/^- `(.+?)` — (.+)$/);
+    if (match) {
+      descriptions.set(match[1], match[2]);
+      continue;
+    }
+
+    // Match: - `path/to/file.ts` (no description) — only outside collapsed blocks
+    if (!inCollapsedBlock) {
+      const bareMatch = line.match(/^- `(.+?)`\s*$/);
+      if (bareMatch) {
+        descriptions.set(bareMatch[1], "");
+      }
+    }
+  }
+  return descriptions;
+}
+
+// ─── File Enumeration ────────────────────────────────────────────────────────
+
+function shouldExclude(filePath: string, excludes: string[]): boolean {
+  for (const pattern of excludes) {
+    if (pattern.endsWith("/")) {
+      if (filePath.startsWith(pattern) || filePath.includes(`/${pattern}`)) return true;
+    } else if (filePath === pattern || filePath.endsWith(`/${pattern}`)) {
+      return true;
+    }
+  }
+  // Skip binary/lock files
+  const ext = extname(filePath).toLowerCase();
+  if ([".lock", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".woff", ".woff2", ".ttf", ".eot", ".svg"].includes(ext)) {
+    return true;
+  }
+  return false;
+}
+
+function lsFiles(basePath: string): string[] {
+  try {
+    const result = execSync("git ls-files", { cwd: basePath, encoding: "utf-8", timeout: 10000 });
+    return result.split("\n").filter(Boolean);
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Enumerate tracked files, applying exclusions and the maxFiles cap.
+ * Returns both the file list and whether truncation occurred.
+ */
+function enumerateFiles(basePath: string, excludes: string[], maxFiles: number): { files: string[]; truncated: boolean } {
+  const allFiles = lsFiles(basePath);
+  const filtered = allFiles.filter((f) => !shouldExclude(f, excludes));
+  const truncated = filtered.length > maxFiles;
+  return { files: truncated ? filtered.slice(0, maxFiles) : filtered, truncated };
+}
+
+// ─── Grouping ────────────────────────────────────────────────────────────────
+
+function groupByDirectory(
+  files: string[],
+  descriptions: Map<string, string>,
+  collapseThreshold: number,
+): DirectoryGroup[] {
+  const dirMap = new Map<string, FileEntry[]>();
+
+  for (const file of files) {
+    const dir = dirname(file);
+    const dirKey = dir === "." ? "" : dir;
+    if (!dirMap.has(dirKey)) {
+      dirMap.set(dirKey, []);
+    }
+    dirMap.get(dirKey)!.push({
+      path: file,
+      description: descriptions.get(file) ?? "",
+    });
+  }
+
+  const groups: DirectoryGroup[] = [];
+  const sortedDirs = [...dirMap.keys()].sort();
+
+  for (const dir of sortedDirs) {
+    const dirFiles = dirMap.get(dir)!;
+    dirFiles.sort((a, b) => a.path.localeCompare(b.path));
+
+    groups.push({
+      path: dir,
+      files: dirFiles,
+      collapsed: dirFiles.length > collapseThreshold,
+    });
+  }
+
+  return groups;
+}
+
+// ─── Rendering ───────────────────────────────────────────────────────────────
+
+function renderCodebaseMap(groups: DirectoryGroup[], totalFiles: number, truncated: boolean): string {
+  const lines: string[] = [];
+  const now = new Date().toISOString().split(".")[0] + "Z";
+  const described = groups.reduce((sum, g) => sum + g.files.filter((f) => f.description).length, 0);
+
+  lines.push("# Codebase Map");
+  lines.push("");
+  lines.push(`Generated: ${now} | Files: ${totalFiles} | Described: ${described}/${totalFiles}`);
+  if (truncated) {
+    lines.push(`Note: Truncated to first ${totalFiles} files. Run with higher --max-files to include all.`);
+  }
+  lines.push("");
+
+  for (const group of groups) {
+    const heading = group.path || "(root)";
+    lines.push(`### ${heading}/`);
+
+    if (group.collapsed) {
+      // Summarize collapsed directories
+      const extensions = new Map<string, number>();
+      for (const f of group.files) {
+        const ext = extname(f.path) || "(no ext)";
+        extensions.set(ext, (extensions.get(ext) ?? 0) + 1);
+      }
+      const extSummary = [...extensions.entries()]
+        .sort((a, b) => b[1] - a[1])
+        .map(([ext, count]) => `${count} ${ext}`)
+        .join(", ");
+      lines.push(`- *(${group.files.length} files: ${extSummary})*`);
+
+      // Preserve any existing descriptions in a hidden comment block so
+      // incremental updates can recover them via parseCodebaseMap.
+      const descLines = group.files
+        .filter((f) => f.description)
+        .map((f) => `- \`${f.path}\` — ${f.description}`);
+      if (descLines.length > 0) {
+        lines.push("<!-- gsd:collapsed-descriptions");
+        lines.push(...descLines);
+        lines.push("-->");
+      }
+    } else {
+      for (const file of group.files) {
+        if (file.description) {
+          lines.push(`- \`${file.path}\` — ${file.description}`);
+        } else {
+          lines.push(`- \`${file.path}\``);
+        }
+      }
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+// ─── Public API ──────────────────────────────────────────────────────────────
+
+/**
+ * Generate a fresh CODEBASE.md from scratch.
+ * Preserves existing descriptions if `existingDescriptions` is provided.
+ */
+export function generateCodebaseMap(
+  basePath: string,
+  options?: CodebaseMapOptions,
+  existingDescriptions?: Map<string, string>,
+): { content: string; fileCount: number; truncated: boolean; files: string[] } {
+  const excludes = [...DEFAULT_EXCLUDES, ...(options?.excludePatterns ?? [])];
+  const maxFiles = options?.maxFiles ?? DEFAULT_MAX_FILES;
+  const collapseThreshold = options?.collapseThreshold ?? DEFAULT_COLLAPSE_THRESHOLD;
+
+  const { files, truncated } = enumerateFiles(basePath, excludes, maxFiles);
+  const descriptions = existingDescriptions ?? new Map<string, string>();
+  const groups = groupByDirectory(files, descriptions, collapseThreshold);
+  const content = renderCodebaseMap(groups, files.length, truncated);
+
+  return { content, fileCount: files.length, truncated, files };
+}
+
+/**
+ * Incremental update: re-scan files, preserve existing descriptions,
+ * add new files, remove deleted files.
+ */
+export function updateCodebaseMap(
+  basePath: string,
+  options?: CodebaseMapOptions,
+): { content: string; added: number; removed: number; unchanged: number; fileCount: number; truncated: boolean } {
+  const codebasePath = join(gsdRoot(basePath), "CODEBASE.md");
+
+  // Load existing descriptions
+  let existingDescriptions = new Map<string, string>();
+  if (existsSync(codebasePath)) {
+    const existing = readFileSync(codebasePath, "utf-8");
+    existingDescriptions = parseCodebaseMap(existing);
+  }
+
+  const existingFiles = new Set(existingDescriptions.keys());
+
+  // Generate new map preserving descriptions — reuse the returned file list
+  // to avoid a second enumeration (prevents race between content and stats).
+  const result = generateCodebaseMap(basePath, options, existingDescriptions);
+  const currentSet = new Set(result.files);
+
+  // Count changes
+  let added = 0;
+  let removed = 0;
+
+  for (const f of result.files) {
+    if (!existingFiles.has(f)) added++;
+  }
+  for (const f of existingFiles) {
+    if (!currentSet.has(f)) removed++;
+  }
+
+  return {
+    content: result.content,
+    added,
+    removed,
+    unchanged: result.files.length - added,
+    fileCount: result.fileCount,
+    truncated: result.truncated,
+  };
+}
+
+/**
+ * Write CODEBASE.md to .gsd/ directory.
+ */
+export function writeCodebaseMap(basePath: string, content: string): string {
+  const root = gsdRoot(basePath);
+  mkdirSync(root, { recursive: true });
+  const outPath = join(root, "CODEBASE.md");
+  writeFileSync(outPath, content, "utf-8");
+  return outPath;
+}
+
+/**
+ * Read existing CODEBASE.md, or return null if it doesn't exist.
+ */
+export function readCodebaseMap(basePath: string): string | null {
+  const codebasePath = join(gsdRoot(basePath), "CODEBASE.md");
+  if (!existsSync(codebasePath)) return null;
+  try {
+    return readFileSync(codebasePath, "utf-8");
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Get stats about the codebase map.
+ */
+export function getCodebaseMapStats(basePath: string): {
+  exists: boolean;
+  fileCount: number;
+  describedCount: number;
+  undescribedCount: number;
+  generatedAt: string | null;
+} {
+  const content = readCodebaseMap(basePath);
+  if (!content) {
+    return { exists: false, fileCount: 0, describedCount: 0, undescribedCount: 0, generatedAt: null };
+  }
+
+  // Parse total file count from the header line (accurate even for collapsed dirs)
+  const fileCountMatch = content.match(/Files:\s*(\d+)/);
+  const totalFiles = fileCountMatch ? parseInt(fileCountMatch[1], 10) : 0;
+
+  // Use parseCodebaseMap to count described files (includes collapsed-description blocks)
+  const descriptions = parseCodebaseMap(content);
+  const described = [...descriptions.values()].filter((d) => d.length > 0).length;
+  const dateMatch = content.match(/Generated: (\S+)/);
+
+  return {
+    exists: true,
+    fileCount: totalFiles,
+    describedCount: described,
+    undescribedCount: totalFiles - described,
+    generatedAt: dateMatch?.[1] ?? null,
+  };
+}
diff --git a/src/resources/extensions/gsd/commands-codebase.ts b/src/resources/extensions/gsd/commands-codebase.ts
new file mode 100644
index 000000000..305f09256
--- /dev/null
+++ b/src/resources/extensions/gsd/commands-codebase.ts
@@ -0,0 +1,164 @@
+/**
+ * GSD Command — /gsd codebase
+ *
+ * Generate and manage the codebase map (.gsd/CODEBASE.md).
+ * Subcommands: generate, update, stats, help
+ */
+
+import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
+
+import {
+  generateCodebaseMap,
+  updateCodebaseMap,
+  writeCodebaseMap,
+  getCodebaseMapStats,
+  readCodebaseMap,
+} from "./codebase-generator.js";
+
+const USAGE =
+  "Usage: /gsd codebase [generate|update|stats]\n\n" +
+  "  generate [--max-files N]  — Generate or regenerate CODEBASE.md\n" +
+  "  update                    — Incremental update (preserves descriptions)\n" +
+  "  stats                     — Show file count, coverage, and generation time\n" +
+  "  help                      — Show this help\n\n" +
+  "With no subcommand, shows stats if a map exists or help if not.";
+
+export async function handleCodebase(
+  args: string,
+  ctx: ExtensionCommandContext,
+  _pi: ExtensionAPI,
+): Promise<void> {
+  const basePath = process.cwd();
+  const parts = args.trim().split(/\s+/);
+  const sub = parts[0] ?? "";
+
+  switch (sub) {
+    case "generate": {
+      const maxFiles = parseMaxFiles(args, ctx);
+      if (maxFiles === false) return; // validation failed, message already shown
+
+      const existing = readCodebaseMap(basePath);
+      const existingDescriptions = existing
+        ? (await import("./codebase-generator.js")).parseCodebaseMap(existing)
+        : undefined;
+
+      const result = generateCodebaseMap(basePath, { maxFiles: maxFiles ?? undefined }, existingDescriptions);
+
+      if (result.fileCount === 0) {
+        ctx.ui.notify(
+          "Codebase map generated with 0 files.\n" +
+          "Is this a git repository? Run 'git ls-files' to verify.",
+          "warning",
+        );
+        return;
+      }
+
+      const outPath = writeCodebaseMap(basePath, result.content);
+      ctx.ui.notify(
+        `Codebase map generated: ${result.fileCount} files\n` +
+        `Written to: ${outPath}` +
+        (result.truncated ? `\n⚠ Truncated — increase --max-files to include all files` : ""),
+        "success",
+      );
+      return;
+    }
+
+    case "update": {
+      const existing = readCodebaseMap(basePath);
+      if (!existing) {
+        ctx.ui.notify(
+          "No codebase map found. Run /gsd codebase generate to create one.",
+          "warning",
+        );
+        return;
+      }
+
+      const maxFiles = parseMaxFiles(args, ctx);
+      if (maxFiles === false) return;
+
+      const result = updateCodebaseMap(basePath, { maxFiles: maxFiles ?? undefined });
+      writeCodebaseMap(basePath, result.content);
+
+      ctx.ui.notify(
+        `Codebase map updated: ${result.fileCount} files\n` +
+        `  Added: ${result.added} | Removed: ${result.removed} | Unchanged: ${result.unchanged}` +
+        (result.truncated ? `\n⚠ Truncated — increase --max-files to include all files` : ""),
+        "success",
+      );
+      return;
+    }
+
+    case "stats": {
+      showStats(basePath, ctx);
+      return;
+    }
+
+    case "help":
+      ctx.ui.notify(USAGE, "info");
+      return;
+
+    case "": {
+      // Safe default: show stats if map exists, help if not
+      const existing = readCodebaseMap(basePath);
+      if (existing) {
+        showStats(basePath, ctx);
+      } else {
+        ctx.ui.notify(USAGE, "info");
+      }
+      return;
+    }
+
+    default:
+      ctx.ui.notify(
+        `Unknown subcommand "${sub}".\n\n${USAGE}`,
+        "warning",
+      );
+  }
+}
+
+function showStats(basePath: string, ctx: ExtensionCommandContext): void {
+  const stats = getCodebaseMapStats(basePath);
+  if (!stats.exists) {
+    ctx.ui.notify("No codebase map found. Run /gsd codebase generate to create one.", "info");
+    return;
+  }
+
+  const coverage = stats.fileCount > 0
+    ? Math.round((stats.describedCount / stats.fileCount) * 100)
+    : 0;
+
+  ctx.ui.notify(
+    `Codebase Map Stats:\n` +
+    `  Files: ${stats.fileCount}\n` +
+    `  Described: ${stats.describedCount} (${coverage}%)\n` +
+    `  Undescribed: ${stats.undescribedCount}\n` +
+    `  Generated: ${stats.generatedAt ?? "unknown"}\n\n` +
+    (stats.undescribedCount > 0
+      ? `Tip: Run /gsd codebase update to refresh after file changes.`
+      : `Coverage is complete.`),
+    "info",
+  );
+}
+
+/**
+ * Parse and validate --max-files flag.
+ * Returns the parsed number, undefined if flag not present, or false if invalid.
+ */
+function parseMaxFiles(args: string, ctx: ExtensionCommandContext): number | undefined | false {
+  const maxFilesStr = extractFlag(args, "--max-files");
+  if (!maxFilesStr) return undefined;
+
+  const maxFiles = parseInt(maxFilesStr, 10);
+  if (isNaN(maxFiles) || maxFiles < 1) {
+    ctx.ui.notify("--max-files must be a positive integer (e.g. --max-files 200).", "warning");
+    return false;
+  }
+  return maxFiles;
+}
+
+function extractFlag(args: string, flag: string): string | undefined {
+  const escaped = flag.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  const regex = new RegExp(`${escaped}[=\\s]+(\\S+)`);
+  const match = args.match(regex);
+  return match?.[1];
+}
diff --git a/src/resources/extensions/gsd/commands/catalog.ts b/src/resources/extensions/gsd/commands/catalog.ts
index 7d688d41c..02882a07c 100644
--- a/src/resources/extensions/gsd/commands/catalog.ts
+++ b/src/resources/extensions/gsd/commands/catalog.ts
@@ -15,7 +15,7 @@ export interface GsdCommandDefinition {
 type CompletionMap = Record<string, readonly GsdCommandDefinition[]>;
 
 export const GSD_COMMAND_DESCRIPTION =
-  "GSD — Get Shit Done: /gsd help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink";
+  "GSD — Get Shit Done: /gsd help|start|templates|next|auto|stop|pause|status|widget|visualize|queue|quick|discuss|capture|triage|dispatch|history|undo|undo-task|reset-slice|rate|skip|export|cleanup|mode|prefs|config|keys|hooks|run-hook|skill-health|doctor|logs|forensics|changelog|migrate|remote|steer|knowledge|new-milestone|parallel|cmux|park|unpark|init|setup|inspect|extensions|update|fast|mcp|rethink|codebase";
 
 export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
   { cmd: "help", desc: "Categorized command reference with descriptions" },
@@ -71,6 +71,7 @@ export const TOP_LEVEL_SUBCOMMANDS: readonly GsdCommandDefinition[] = [
   { cmd: "mcp", desc: "MCP server status and connectivity check (status, check <server>)" },
   { cmd: "rethink", desc: "Conversational project reorganization — reorder, park, discard, add milestones" },
   { cmd: "workflow", desc: "Custom workflow lifecycle (new, run, list, validate, pause, resume)" },
+  { cmd: "codebase", desc: "Generate and manage codebase map (.gsd/CODEBASE.md)" },
 ];
 
 const NESTED_COMPLETIONS: CompletionMap = {
@@ -225,6 +226,14 @@ const NESTED_COMPLETIONS: CompletionMap = {
     { cmd: "pause", desc: "Pause custom workflow auto-mode" },
     { cmd: "resume", desc: "Resume paused custom workflow auto-mode" },
   ],
+  codebase: [
+    { cmd: "generate", desc: "Generate or regenerate CODEBASE.md" },
+    { cmd: "generate --max-files", desc: "Generate with custom file limit (default: 500)" },
+    { cmd: "update", desc: "Incremental update (preserves descriptions)" },
+    { cmd: "update --max-files", desc: "Update with custom file limit" },
+    { cmd: "stats", desc: "Show file count, description coverage, and generation time" },
+    { cmd: "help", desc: "Show usage and available subcommands" },
+  ],
 };
 
 function filterOptions(
diff --git a/src/resources/extensions/gsd/commands/handlers/ops.ts b/src/resources/extensions/gsd/commands/handlers/ops.ts
index a1996dfef..4ebfad1bf 100644
--- a/src/resources/extensions/gsd/commands/handlers/ops.ts
+++ b/src/resources/extensions/gsd/commands/handlers/ops.ts
@@ -206,5 +206,10 @@ Examples:
     await handleRethink(trimmed, ctx, pi);
     return true;
   }
+  if (trimmed === "codebase" || trimmed.startsWith("codebase ")) {
+    const { handleCodebase } = await import("../../commands-codebase.js");
+    await handleCodebase(trimmed.replace(/^codebase\s*/, "").trim(), ctx, pi);
+    return true;
+  }
   return false;
 }
diff --git a/src/resources/extensions/gsd/complexity-classifier.ts b/src/resources/extensions/gsd/complexity-classifier.ts
index 73e505958..c7ae14dbf 100644
--- a/src/resources/extensions/gsd/complexity-classifier.ts
+++ b/src/resources/extensions/gsd/complexity-classifier.ts
@@ -35,15 +35,17 @@ const UNIT_TYPE_TIERS: Record<string, ComplexityTier> = {
   "complete-slice": "light",
   "run-uat": "light",
 
-  // Tier 2 — Standard: research, routine planning, discussion
+  // Tier 2 — Standard: research, routine discussion
   "discuss-milestone": "standard",
   "discuss-slice": "standard",
   "research-milestone": "standard",
   "research-slice": "standard",
-  "plan-milestone": "standard",
-  "plan-slice": "standard",
 
-  // Tier 3 — Heavy: execution, replanning (requires deep reasoning)
+  // Tier 3 — Heavy: planning, execution, replanning (requires deep reasoning)
+  // Planning is heavy so it uses the best configured model (e.g. Opus) and is
+  // not downgraded by dynamic routing when a capable model is configured.
+  "plan-milestone": "heavy",
+  "plan-slice": "heavy",
   "execute-task": "standard",   // default standard, upgraded by metadata
   "replan-slice": "heavy",
   "reassess-roadmap": "heavy",
@@ -185,8 +187,8 @@ function analyzePlanComplexity(
   // Check if this is a milestone-level plan (more complex) vs single slice
   const { milestone: mid, slice: sid } = parseUnitId(unitId);
   if (!sid) {
-    // Milestone-level planning is always at least standard
-    return { tier: "standard", reason: "milestone-level planning" };
+    // Milestone-level planning is always heavy — requires full context and best model
+    return { tier: "heavy", reason: "milestone-level planning" };
   }
 
   // For slice planning, try to read the context/research to gauge complexity
diff --git a/src/resources/extensions/gsd/db-writer.ts b/src/resources/extensions/gsd/db-writer.ts
index 489b0d915..5ff97479a 100644
--- a/src/resources/extensions/gsd/db-writer.ts
+++ b/src/resources/extensions/gsd/db-writer.ts
@@ -227,6 +227,122 @@ export async function nextDecisionId(): Promise<string> {
   }
 }
 
+// ─── Next Requirement ID ─────────────────────────────────────────────────
+
+/**
+ * Compute the next requirement ID from the current DB state.
+ * Queries MAX(CAST(SUBSTR(id, 2) AS INTEGER)) from requirements table.
+ * Returns R001 if no requirements exist. Zero-pads to 3 digits.
+ */
+export async function nextRequirementId(): Promise<string> {
+  try {
+    const db = await import('./gsd-db.js');
+    const adapter = db._getAdapter();
+    if (!adapter) return 'R001';
+
+    const row = adapter
+      .prepare('SELECT MAX(CAST(SUBSTR(id, 2) AS INTEGER)) as max_num FROM requirements')
+      .get();
+
+    const maxNum = row ? (row['max_num'] as number | null) : null;
+    if (maxNum == null || isNaN(maxNum)) return 'R001';
+
+    const next = maxNum + 1;
+    return `R${String(next).padStart(3, '0')}`;
+  } catch (err) {
+    logError('manifest', 'nextRequirementId failed', { fn: 'nextRequirementId', error: String((err as Error).message) });
+    return 'R001';
+  }
+}
+
+// ─── Save Requirement to DB + Regenerate Markdown ────────────────────────
+
+export interface SaveRequirementFields {
+  class: string;
+  status?: string;
+  description: string;
+  why: string;
+  source: string;
+  primary_owner?: string;
+  supporting_slices?: string;
+  validation?: string;
+  notes?: string;
+}
+
+/**
+ * Save a new requirement to DB and regenerate REQUIREMENTS.md.
+ * Auto-assigns the next ID via nextRequirementId().
+ * Returns the assigned ID.
+ */
+export async function saveRequirementToDb(
+  fields: SaveRequirementFields,
+  basePath: string,
+): Promise<{ id: string }> {
+  try {
+    const db = await import('./gsd-db.js');
+
+    const id = await nextRequirementId();
+
+    const requirement: Requirement = {
+      id,
+      class: fields.class,
+      status: fields.status ?? 'active',
+      description: fields.description,
+      why: fields.why,
+      source: fields.source,
+      primary_owner: fields.primary_owner ?? '',
+      supporting_slices: fields.supporting_slices ?? '',
+      validation: fields.validation ?? '',
+      notes: fields.notes ?? '',
+      full_content: '',
+      superseded_by: null,
+    };
+
+    db.upsertRequirement(requirement);
+
+    // Fetch all requirements for full file regeneration
+    const adapter = db._getAdapter();
+    let allRequirements: Requirement[] = [];
+    if (adapter) {
+      const rows = adapter.prepare('SELECT * FROM requirements ORDER BY id').all();
+      allRequirements = rows.map(row => ({
+        id: row['id'] as string,
+        class: row['class'] as string,
+        status: row['status'] as string,
+        description: row['description'] as string,
+        why: row['why'] as string,
+        source: row['source'] as string,
+        primary_owner: row['primary_owner'] as string,
+        supporting_slices: row['supporting_slices'] as string,
+        validation: row['validation'] as string,
+        notes: row['notes'] as string,
+        full_content: row['full_content'] as string,
+        superseded_by: (row['superseded_by'] as string) ?? null,
+      }));
+    }
+
+    const nonSuperseded = allRequirements.filter(r => r.superseded_by == null);
+    const md = generateRequirementsMd(nonSuperseded);
+    const filePath = resolveGsdRootFile(basePath, 'REQUIREMENTS');
+    try {
+      await saveFile(filePath, md);
+    } catch (diskErr) {
+      logError('manifest', 'disk write failed, rolling back DB row', { fn: 'saveRequirementToDb', error: String((diskErr as Error).message) });
+      const rollbackAdapter = db._getAdapter();
+      rollbackAdapter?.prepare('DELETE FROM requirements WHERE id = :id').run({ ':id': id });
+      throw diskErr;
+    }
+    invalidateStateCache();
+    clearPathCache();
+    clearParseCache();
+
+    return { id };
+  } catch (err) {
+    logError('manifest', 'saveRequirementToDb failed', { fn: 'saveRequirementToDb', error: String((err as Error).message) });
+    throw err;
+  }
+}
+
 // ─── Save Decision to DB + Regenerate Markdown ────────────────────────────
 
 export interface SaveDecisionFields {
@@ -344,15 +460,30 @@ export async function updateRequirementInDb(
     const db = await import('./gsd-db.js');
 
     const existing = db.getRequirementById(id);
-    if (!existing) {
-      throw new GSDError(GSD_STALE_STATE, `Requirement ${id} not found`);
-    }
 
-    // Merge updates into existing
+    // If requirement doesn't exist in DB, create a skeleton and merge updates.
+    // This handles the case where requirements were written to REQUIREMENTS.md
+    // but never imported into the database (see #2919).
+    const base: Requirement = existing ?? {
+      id,
+      class: '',
+      status: 'active',
+      description: '',
+      why: '',
+      source: '',
+      primary_owner: '',
+      supporting_slices: '',
+      validation: '',
+      notes: '',
+      full_content: '',
+      superseded_by: null,
+    };
+
+    // Merge updates into existing (or skeleton)
     const merged: Requirement = {
-      ...existing,
+      ...base,
       ...updates,
-      id: existing.id, // ID cannot be changed
+      id: base.id, // ID cannot be changed
     };
 
     db.upsertRequirement(merged);
@@ -388,7 +519,9 @@ export async function updateRequirementInDb(
       await saveFile(filePath, md);
     } catch (diskErr) {
       logError('manifest', 'disk write failed, reverting DB row', { fn: 'updateRequirementInDb', error: String((diskErr as Error).message) });
-      db.upsertRequirement(existing);
+      if (existing) {
+        db.upsertRequirement(existing);
+      }
       throw diskErr;
     }
     // Invalidate file-read caches so deriveState() sees the updated markdown.
diff --git a/src/resources/extensions/gsd/doctor-git-checks.ts b/src/resources/extensions/gsd/doctor-git-checks.ts
index 0b8820108..78754fc8f 100644
--- a/src/resources/extensions/gsd/doctor-git-checks.ts
+++ b/src/resources/extensions/gsd/doctor-git-checks.ts
@@ -14,6 +14,28 @@ import { nativeIsRepo, nativeWorktreeList, nativeWorktreeRemove, nativeBranchLis
 import { getAllWorktreeHealth } from "./worktree-health.js";
 import { loadEffectiveGSDPreferences } from "./preferences.js";
 
+/**
+ * Returns true if the directory contains only doctor artifacts
+ * (e.g. `.gsd/doctor-history.jsonl`). These dirs are created by
+ * appendDoctorHistory() writing to worktree-scoped paths during the audit
+ * and should not be flagged as orphaned worktrees (#3105).
+ */
+function isDoctorArtifactOnly(dirPath: string): boolean {
+  try {
+    const entries = readdirSync(dirPath);
+    // Empty dir — not a doctor artifact, still orphaned
+    if (entries.length === 0) return false;
+    // Only a .gsd subdirectory
+    if (entries.length === 1 && entries[0] === ".gsd") {
+      const gsdEntries = readdirSync(join(dirPath, ".gsd"));
+      return gsdEntries.length <= 1 && gsdEntries.every(e => e === "doctor-history.jsonl");
+    }
+    return false;
+  } catch {
+    return false;
+  }
+}
+
 export async function checkGitHealth(
   basePath: string,
   issues: DoctorIssue[],
@@ -314,6 +336,10 @@ export async function checkGitHealth(
         } catch { continue; }
         const normalizedFullPath = normalizePath(fullPath);
         if (!registeredPaths.has(normalizedFullPath)) {
+          // Skip directories that only contain doctor artifacts (.gsd/doctor-history.jsonl).
+          // appendDoctorHistory() can recreate these dirs during the audit itself,
+          // causing a circular false positive (#3105 Bug 1).
+          if (isDoctorArtifactOnly(fullPath)) continue;
           issues.push({
             severity: "warning",
             code: "worktree_directory_orphaned",
diff --git a/src/resources/extensions/gsd/doctor-providers.ts b/src/resources/extensions/gsd/doctor-providers.ts
index 99c8c4ede..e0f35341b 100644
--- a/src/resources/extensions/gsd/doctor-providers.ts
+++ b/src/resources/extensions/gsd/doctor-providers.ts
@@ -181,7 +181,8 @@ function resolveKey(providerId: string): KeyLookup {
  */
 const PROVIDER_ROUTES: Record<string, string[]> = {
   anthropic: ["github-copilot"],
-  openai: ["github-copilot"],
+  openai: ["github-copilot", "openai-codex"],
+  google: ["google-gemini-cli"],
 };
 
 function checkLlmProviders(): ProviderCheckResult[] {
diff --git a/src/resources/extensions/gsd/doctor-runtime-checks.ts b/src/resources/extensions/gsd/doctor-runtime-checks.ts
index 1137981a7..d2af2bd9a 100644
--- a/src/resources/extensions/gsd/doctor-runtime-checks.ts
+++ b/src/resources/extensions/gsd/doctor-runtime-checks.ts
@@ -119,10 +119,11 @@ export async function checkRuntimeHealth(
 
       for (const key of keys) {
         // Key format: "unitType/unitId" e.g. "execute-task/M001/S01/T01"
-        const slashIdx = key.indexOf("/");
-        if (slashIdx === -1) continue;
-        const unitType = key.slice(0, slashIdx);
-        const unitId = key.slice(slashIdx + 1);
+        // Hook units have compound types: "hook/<hookName>/unitId"
+        const { splitCompletedKey } = await import("./forensics.js");
+        const parsed = splitCompletedKey(key);
+        if (!parsed) continue;
+        const { unitType, unitId } = parsed;
 
         // Only validate artifact-producing unit types
         const { verifyExpectedArtifact } = await import("./auto-recovery.js");
diff --git a/src/resources/extensions/gsd/doctor.ts b/src/resources/extensions/gsd/doctor.ts
index 83fc8a754..05630093e 100644
--- a/src/resources/extensions/gsd/doctor.ts
+++ b/src/resources/extensions/gsd/doctor.ts
@@ -729,8 +729,10 @@ export async function runGSDDoctor(basePath: string, options?: { fix?: boolean;
       }
 
       // Blocker-without-replan detection
+      // Skip when all tasks are done — the blocker was implicitly resolved
+      // within the task and the slice is not stuck (#3105 Bug 2).
       const replanPath = resolveSliceFile(basePath, milestoneId, slice.id, "REPLAN");
-      if (!replanPath) {
+      if (!replanPath && !allTasksDone) {
         for (const task of plan.tasks) {
           if (!task.done) continue;
           const summaryPath = resolveTaskFile(basePath, milestoneId, slice.id, task.id, "SUMMARY");
diff --git a/src/resources/extensions/gsd/error-classifier.ts b/src/resources/extensions/gsd/error-classifier.ts
index eb47d46c4..604167451 100644
--- a/src/resources/extensions/gsd/error-classifier.ts
+++ b/src/resources/extensions/gsd/error-classifier.ts
@@ -60,9 +60,9 @@ const RESET_DELAY_RE = /reset in (\d+)s/i;
  *  1. Permanent (auth/billing/quota) — unless also rate-limited
  *  2. Rate limit (429, rate.?limit, too many requests)
  *  3. Network (ECONNRESET, ETIMEDOUT, socket hang up, fetch failed, dns)
- *  4. Server (500/502/503, overloaded, server_error)
- *  5. Connection (terminated, ECONNREFUSED, EPIPE, other side closed)
- *  6. Stream truncation (malformed JSON from mid-stream cut)
+ *  4. Stream truncation (malformed JSON from mid-stream cut)
+ *  5. Server (500/502/503, overloaded, server_error)
+ *  6. Connection (terminated, ECONNREFUSED, EPIPE, other side closed)
  *  7. Unknown
  */
 export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorClass {
@@ -92,21 +92,21 @@ export function classifyError(errorMsg: string, retryAfterMs?: number): ErrorCla
     return { kind: "network", retryAfterMs: retryAfterMs ?? 3_000 };
   }
 
-  // 4. Server errors — try fallback model
+  // 4. Stream truncation — downstream symptom of connection drop
+  if (STREAM_RE.test(errorMsg)) {
+    return { kind: "stream", retryAfterMs: retryAfterMs ?? 15_000 };
+  }
+
+  // 5. Server errors — try fallback model
   if (SERVER_RE.test(errorMsg)) {
     return { kind: "server", retryAfterMs: retryAfterMs ?? 30_000 };
   }
 
-  // 5. Connection errors — try fallback model
+  // 6. Connection errors — try fallback model
   if (CONNECTION_RE.test(errorMsg)) {
     return { kind: "connection", retryAfterMs: retryAfterMs ?? 15_000 };
   }
 
-  // 6. Stream truncation — downstream symptom of connection drop
-  if (STREAM_RE.test(errorMsg)) {
-    return { kind: "stream", retryAfterMs: retryAfterMs ?? 15_000 };
-  }
-
   // 7. Unknown
   return { kind: "unknown" };
 }
diff --git a/src/resources/extensions/gsd/extension-manifest.json b/src/resources/extensions/gsd/extension-manifest.json
index 2c01ab4ee..ca0063a5f 100644
--- a/src/resources/extensions/gsd/extension-manifest.json
+++ b/src/resources/extensions/gsd/extension-manifest.json
@@ -12,7 +12,22 @@
       "gsd_requirement_update", "gsd_milestone_generate_id"
     ],
     "commands": ["gsd", "kill", "worktree", "exit"],
-    "hooks": ["session_start", "session_switch"],
+    "hooks": [
+      "session_start",
+      "session_switch",
+      "bash_transform",
+      "session_fork",
+      "before_agent_start",
+      "agent_end",
+      "session_before_compact",
+      "session_shutdown",
+      "tool_call",
+      "tool_result",
+      "tool_execution_start",
+      "tool_execution_end",
+      "model_select",
+      "before_provider_request"
+    ],
     "shortcuts": ["Ctrl+Alt+G"]
   }
 }
diff --git a/src/resources/extensions/gsd/forensics.ts b/src/resources/extensions/gsd/forensics.ts
index 78c074202..81cc69da2 100644
--- a/src/resources/extensions/gsd/forensics.ts
+++ b/src/resources/extensions/gsd/forensics.ts
@@ -28,6 +28,8 @@ import { deriveState } from "./state.js";
 import { isAutoActive } from "./auto.js";
 import { loadPrompt } from "./prompt-loader.js";
 import { gsdRoot } from "./paths.js";
+import { isDbAvailable, getAllMilestones, getMilestoneSlices, getSliceTasks } from "./gsd-db.js";
+import { isClosedStatus } from "./status-guards.js";
 import { formatDuration } from "../shared/format-utils.js";
 import { getAutoWorktreePath } from "./auto-worktree.js";
 import { loadEffectiveGSDPreferences, loadGlobalGSDPreferences, getGlobalGSDPreferencesPath } from "./preferences.js";
@@ -85,6 +87,15 @@ interface JournalSummary {
   fileCount: number;
 }
 
+interface DbCompletionCounts {
+  milestones: number;
+  milestonesTotal: number;
+  slices: number;
+  slicesTotal: number;
+  tasks: number;
+  tasksTotal: number;
+}
+
 interface ForensicReport {
   gsdVersion: string;
   timestamp: string;
@@ -95,6 +106,7 @@ interface ForensicReport {
   unitTraces: UnitTrace[];
   metrics: MetricsLedger | null;
   completedKeys: string[];
+  dbCompletionCounts: DbCompletionCounts | null;
   crashLock: LockData | null;
   doctorIssues: DoctorIssue[];
   anomalies: ForensicAnomaly[];
@@ -106,13 +118,15 @@ interface ForensicReport {
 // ─── Duplicate Detection ──────────────────────────────────────────────────────
 
 const DEDUP_PROMPT_SECTION = `
-## Duplicate Detection (REQUIRED before issue creation)
+## Pre-Investigation: Duplicate Check (REQUIRED)
 
-Before offering to create a GitHub issue, you MUST search for existing issues and PRs that may already address this bug. This step uses the user's AI tokens for analysis.
+Before reading GSD source code or performing deep analysis, you MUST search for existing issues and PRs that may already address this bug. This avoids wasting tokens on already-fixed bugs.
 
 ### Search Steps
 
-1. **Search closed issues** for similar keywords from your diagnosis:
+Use keywords from the user's problem description and the anomaly summaries in the forensic report above.
+
+1. **Search closed issues** for similar keywords:
    \`\`\`
    gh issue list --repo gsd-build/gsd-2 --state closed --search "<keywords from root cause>" --limit 20
    \`\`\`
@@ -129,20 +143,16 @@ Before offering to create a GitHub issue, you MUST search for existing issues an
 
 ### Analysis
 
-For each result, compare it against your root-cause diagnosis:
+For each result, compare it against the user's reported symptoms and the forensic anomalies:
 - Does the issue describe the same code path or file?
-- Does the PR modify the same file:line you identified?
+- Does the PR modify the area related to the reported symptoms?
 - Is the symptom description semantically similar even if keywords differ?
 
-### Present Findings
+### Decision Gate
 
-If you find potential matches, present them to the user:
-
-1. **"Already fixed by PR #X — skip issue creation"** — when a merged PR or closed issue clearly addresses the same root cause. Explain why you believe it matches.
-2. **"Add my findings to existing issue #Y"** — when an open issue exists for the same bug. Use \`gh issue comment #Y --repo gsd-build/gsd-2\` to add forensic evidence.
-3. **"Create new issue anyway"** — when existing results do not cover this specific failure.
-
-Only proceed to issue creation if no matches were found OR the user explicitly chooses "Create new issue anyway".
+- **Merged PR clearly fixes the described symptom** → Report "Already fixed by PR #X" with brief explanation. Skip full investigation.
+- **Open issue matches** → Report "Existing issue #Y covers this." Offer to add forensic evidence. Skip full investigation unless user asks for deeper analysis.
+- **No matches** → Proceed to full investigation below.
 `;
 
 async function writeForensicsDedupPref(ctx: ExtensionCommandContext, enabled: boolean): Promise<void> {
@@ -250,6 +260,9 @@ export async function handleForensics(
     { customType: "gsd-forensics", content, display: false },
     { triggerTurn: true },
   );
+
+  // Persist forensics context so follow-up turns can re-inject it (#2941)
+  writeForensicsMarker(basePath, savedPath, content);
 }
 
 // ─── Report Builder ───────────────────────────────────────────────────────────
@@ -275,8 +288,9 @@ export async function buildForensicReport(basePath: string): Promise<ForensicRep
   // 3. Load metrics
   const metrics = loadLedgerFromDisk(basePath);
 
-  // 4. Load completed keys
+  // 4. Load completed keys (legacy) and DB completion counts
   const completedKeys = loadCompletedKeys(basePath);
+  const dbCompletionCounts = getDbCompletionCounts();
 
   // 5. Check crash lock
   const crashLock = readCrashLock(basePath);
@@ -335,6 +349,7 @@ export async function buildForensicReport(basePath: string): Promise<ForensicRep
     unitTraces,
     metrics,
     completedKeys,
+    dbCompletionCounts,
     crashLock,
     doctorIssues,
     anomalies,
@@ -585,6 +600,44 @@ function loadCompletedKeys(basePath: string): string[] {
   return [];
 }
 
+// ─── DB Completion Counts ────────────────────────────────────────────────────
+
+function getDbCompletionCounts(): DbCompletionCounts | null {
+  if (!isDbAvailable()) return null;
+
+  const milestones = getAllMilestones();
+  let completedMilestones = 0;
+  let totalSlices = 0;
+  let completedSlices = 0;
+  let totalTasks = 0;
+  let completedTasks = 0;
+
+  for (const m of milestones) {
+    if (isClosedStatus(m.status)) completedMilestones++;
+
+    const slices = getMilestoneSlices(m.id);
+    for (const s of slices) {
+      totalSlices++;
+      if (isClosedStatus(s.status)) completedSlices++;
+
+      const tasks = getSliceTasks(m.id, s.id);
+      for (const t of tasks) {
+        totalTasks++;
+        if (isClosedStatus(t.status)) completedTasks++;
+      }
+    }
+  }
+
+  return {
+    milestones: completedMilestones,
+    milestonesTotal: milestones.length,
+    slices: completedSlices,
+    slicesTotal: totalSlices,
+    tasks: completedTasks,
+    tasksTotal: totalTasks,
+  };
+}
+
 // ─── Anomaly Detectors ───────────────────────────────────────────────────────
 
 function detectStuckLoops(units: UnitMetrics[], anomalies: ForensicAnomaly[]): void {
@@ -649,15 +702,42 @@ function detectTimeouts(traces: UnitTrace[], anomalies: ForensicAnomaly[]): void
   }
 }
 
+/**
+ * Parse a completed-unit key into its unitType and unitId.
+ *
+ * Hook units use a compound slash-delimited type ("hook/<hookName>"), so a
+ * naive `key.indexOf("/")` would split "hook/telegram-progress/M007/S01" into
+ * unitType="hook" (wrong) instead of "hook/telegram-progress".
+ *
+ * Returns `null` for malformed keys that cannot be split.
+ */
+export function splitCompletedKey(key: string): { unitType: string; unitId: string } | null {
+  if (key.startsWith("hook/")) {
+    // Hook unit types are two segments: "hook/<hookName>/<unitId...>"
+    const secondSlash = key.indexOf("/", 5); // skip past "hook/"
+    if (secondSlash === -1) return null;     // malformed — no unitId after hook name
+    return {
+      unitType: key.slice(0, secondSlash),
+      unitId: key.slice(secondSlash + 1),
+    };
+  }
+
+  const slashIdx = key.indexOf("/");
+  if (slashIdx === -1) return null;
+  return {
+    unitType: key.slice(0, slashIdx),
+    unitId: key.slice(slashIdx + 1),
+  };
+}
+
 function detectMissingArtifacts(completedKeys: string[], basePath: string, activeMilestone: string | null, anomalies: ForensicAnomaly[]): void {
   // Also check the worktree path for artifacts — they may exist there but not at root
   const wtBasePath = activeMilestone ? getAutoWorktreePath(basePath, activeMilestone) : null;
 
   for (const key of completedKeys) {
-    const slashIdx = key.indexOf("/");
-    if (slashIdx === -1) continue;
-    const unitType = key.slice(0, slashIdx);
-    const unitId = key.slice(slashIdx + 1);
+    const parsed = splitCompletedKey(key);
+    if (!parsed) continue;
+    const { unitType, unitId } = parsed;
 
     const rootHasArtifact = verifyExpectedArtifact(unitType, unitId, basePath);
     const wtHasArtifact = wtBasePath ? verifyExpectedArtifact(unitType, unitId, wtBasePath) : false;
@@ -896,6 +976,42 @@ function saveForensicReport(basePath: string, report: ForensicReport, problemDes
   return filePath;
 }
 
+// ─── Forensics Session Marker ────────────────────────────────────────────────
+
+export interface ForensicsMarker {
+  reportPath: string;
+  promptContent: string;
+  createdAt: string;
+}
+
+/**
+ * Write a marker file so that buildBeforeAgentStartResult() can re-inject
+ * the forensics prompt on follow-up turns.  (#2941)
+ */
+export function writeForensicsMarker(basePath: string, reportPath: string, promptContent: string): void {
+  const dir = join(gsdRoot(basePath), "runtime");
+  mkdirSync(dir, { recursive: true });
+  const marker: ForensicsMarker = {
+    reportPath,
+    promptContent,
+    createdAt: new Date().toISOString(),
+  };
+  writeFileSync(join(dir, "active-forensics.json"), JSON.stringify(marker), "utf-8");
+}
+
+/**
+ * Read the active forensics marker, or null if none exists.
+ */
+export function readForensicsMarker(basePath: string): ForensicsMarker | null {
+  const markerPath = join(gsdRoot(basePath), "runtime", "active-forensics.json");
+  if (!existsSync(markerPath)) return null;
+  try {
+    return JSON.parse(readFileSync(markerPath, "utf-8")) as ForensicsMarker;
+  } catch {
+    return null;
+  }
+}
+
 // ─── Prompt Formatter ─────────────────────────────────────────────────────────
 
 function formatReportForPrompt(report: ForensicReport): string {
@@ -1008,8 +1124,16 @@ function formatReportForPrompt(report: ForensicReport): string {
     sections.push("");
   }
 
-  // Completed keys count
-  sections.push(`### Completed Keys: ${report.completedKeys.length}`);
+  // Completion status — prefer DB counts, fall back to legacy completed-units.json
+  if (report.dbCompletionCounts) {
+    const c = report.dbCompletionCounts;
+    sections.push(`### Completion Status (from DB)`);
+    sections.push(`- ${c.milestones}/${c.milestonesTotal} milestones complete`);
+    sections.push(`- ${c.slices}/${c.slicesTotal} slices complete`);
+    sections.push(`- ${c.tasks}/${c.tasksTotal} tasks complete`);
+  } else {
+    sections.push(`### Completed Keys: ${report.completedKeys.length}`);
+  }
   sections.push(`### GSD Version: ${report.gsdVersion}`);
   sections.push(`### Active Milestone: ${report.activeMilestone ?? "none"}`);
   sections.push(`### Active Slice: ${report.activeSlice ?? "none"}`);
diff --git a/src/resources/extensions/gsd/git-service.ts b/src/resources/extensions/gsd/git-service.ts
index d7c543182..53c8d85a2 100644
--- a/src/resources/extensions/gsd/git-service.ts
+++ b/src/resources/extensions/gsd/git-service.ts
@@ -9,7 +9,7 @@
  */
 
 import { execFileSync, execSync } from "node:child_process";
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { gsdRoot } from "./paths.js";
 import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
@@ -50,9 +50,9 @@ export interface GitPreferences {
   main_branch?: string;
   merge_strategy?: "squash" | "merge";
   /** Controls auto-mode git isolation strategy.
-   *  - "worktree": (default) creates a milestone worktree for isolated work
+   *  - "worktree": creates a milestone worktree for isolated work
    *  - "branch": works directly in the project root (for submodule-heavy repos)
-   *  - "none": no git isolation — commits land on the user's current branch directly
+   *  - "none": (default) no git isolation — commits land on the user's current branch directly
    */
   isolation?: "worktree" | "branch" | "none";
   /** When false, GSD will not modify .gitignore at all — no baseline patterns
@@ -488,6 +488,29 @@ export class GitServiceImpl {
     // If .gsd/ IS in .gitignore (the default for external state projects),
     // git add -A already skips it and the exclusions are harmless no-ops.
     const allExclusions = [...RUNTIME_EXCLUSION_PATHS, ...extraExclusions];
+
+    // ── Parallel worker milestone scope (#1991) ──
+    // When GSD_MILESTONE_LOCK is set, this process is a parallel worker that
+    // must only commit files belonging to its own milestone. Exclude all other
+    // milestone directories from staging to prevent cross-milestone pollution
+    // (e.g., an M033 worker fabricating M032 artifacts in the same commit).
+    const milestoneLock = process.env.GSD_MILESTONE_LOCK;
+    if (milestoneLock) {
+      const msDir = join(gsdRoot(this.basePath), "milestones");
+      if (existsSync(msDir)) {
+        try {
+          const entries = readdirSync(msDir, { withFileTypes: true });
+          for (const entry of entries) {
+            if (entry.isDirectory() && entry.name !== milestoneLock) {
+              allExclusions.push(`.gsd/milestones/${entry.name}/`);
+            }
+          }
+        } catch {
+          // Best-effort — if we can't read the milestones dir, proceed without scoping
+        }
+      }
+    }
+
     nativeAddAllWithExclusions(this.basePath, allExclusions);
   }
 
diff --git a/src/resources/extensions/gsd/gitignore.ts b/src/resources/extensions/gsd/gitignore.ts
index da4b2ee91..c3c2f66b8 100644
--- a/src/resources/extensions/gsd/gitignore.ts
+++ b/src/resources/extensions/gsd/gitignore.ts
@@ -41,6 +41,7 @@ const GSD_RUNTIME_PATTERNS = [
 const BASELINE_PATTERNS = [
   // ── GSD state directory (symlink to external storage) ──
   ".gsd",
+  ".gsd-id",
 
   // ── OS junk ──
   ".DS_Store",
@@ -84,6 +85,38 @@ const BASELINE_PATTERNS = [
   "tmp/",
 ];
 
+/**
+ * Check whether `.gsd` is covered by the project's `.gitignore`.
+ *
+ * Uses `git check-ignore` for accurate evaluation — this respects nested
+ * .gitignore files, global gitignore, and negation patterns. Returns true
+ * only when git would actually ignore `.gsd/`.
+ *
+ * Returns false (not ignored) if:
+ *   - No `.gitignore` exists
+ *   - `.gsd` is not listed in any active ignore rule
+ *   - Not a git repo or git is unavailable
+ */
+export function isGsdGitignored(basePath: string): boolean {
+  // Check both `.gsd` and `.gsd/` because `.gsd/` in .gitignore (trailing
+  // slash = directory-only pattern) only matches the directory form. Using
+  // both paths covers all gitignore pattern variants.
+  for (const path of [".gsd", ".gsd/"]) {
+    try {
+      // git check-ignore exits 0 when the path IS ignored, 1 when it is NOT.
+      execFileSync("git", ["check-ignore", "-q", path], {
+        cwd: basePath,
+        stdio: "pipe",
+        env: GIT_NO_PROMPT_ENV,
+      });
+      return true; // exit 0 → .gsd is ignored
+    } catch {
+      // exit 1 → this form is NOT ignored, try the other
+    }
+  }
+  return false; // neither form is ignored (or git unavailable)
+}
+
 /**
  * Check whether `.gsd/` contains files tracked by git.
  * If so, the project intentionally keeps `.gsd/` in version control
diff --git a/src/resources/extensions/gsd/gsd-db.ts b/src/resources/extensions/gsd/gsd-db.ts
index 1559b8616..4c22b41dc 100644
--- a/src/resources/extensions/gsd/gsd-db.ts
+++ b/src/resources/extensions/gsd/gsd-db.ts
@@ -10,6 +10,7 @@ import { existsSync, copyFileSync, mkdirSync, realpathSync } from "node:fs";
 import { dirname } from "node:path";
 import type { Decision, Requirement, GateRow, GateId, GateScope, GateStatus, GateVerdict } from "./types.js";
 import { GSDError, GSD_STALE_STATE } from "./errors.js";
+import { logError } from "./workflow-logger.js";
 
 const _require = createRequire(import.meta.url);
 
@@ -778,8 +779,21 @@ export function openDatabase(path: string): boolean {
   try {
     initSchema(adapter, fileBacked);
   } catch (err) {
-    try { adapter.close(); } catch { /* swallow */ }
-    throw err;
+    // Corrupt freelist: DDL fails with "malformed" but VACUUM can rebuild.
+    // Attempt VACUUM recovery before giving up (see #2519).
+    if (fileBacked && err instanceof Error && err.message?.includes("malformed")) {
+      try {
+        adapter.exec("VACUUM");
+        initSchema(adapter, fileBacked);
+        process.stderr.write("gsd-db: recovered corrupt database via VACUUM\n");
+      } catch (retryErr) {
+        try { adapter.close(); } catch { /* swallow */ }
+        throw retryErr;
+      }
+    } else {
+      try { adapter.close(); } catch { /* swallow */ }
+      throw err;
+    }
   }
 
   currentDb = adapter;
@@ -1124,10 +1138,11 @@ export function insertMilestone(m: {
   });
 }
 
-export function upsertMilestonePlanning(milestoneId: string, planning: Partial<MilestonePlanningRecord>): void {
+export function upsertMilestonePlanning(milestoneId: string, planning: Partial<MilestonePlanningRecord>, title?: string): void {
   if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
   currentDb.prepare(
     `UPDATE milestones SET
+      title = COALESCE(:title, title),
       vision = COALESCE(:vision, vision),
       success_criteria = COALESCE(:success_criteria, success_criteria),
       key_risks = COALESCE(:key_risks, key_risks),
@@ -1142,6 +1157,7 @@ export function upsertMilestonePlanning(milestoneId: string, planning: Partial<M
      WHERE id = :id`,
   ).run({
     ":id": milestoneId,
+    ":title": title ?? null,
     ":vision": planning.vision ?? null,
     ":success_criteria": planning.successCriteria ? JSON.stringify(planning.successCriteria) : null,
     ":key_risks": planning.keyRisks ? JSON.stringify(planning.keyRisks) : null,
@@ -1519,6 +1535,26 @@ export function insertVerificationEvidence(e: {
   });
 }
 
+export interface VerificationEvidenceRow {
+  id: number;
+  task_id: string;
+  slice_id: string;
+  milestone_id: string;
+  command: string;
+  exit_code: number;
+  verdict: string;
+  duration_ms: number;
+  created_at: string;
+}
+
+export function getVerificationEvidence(milestoneId: string, sliceId: string, taskId: string): VerificationEvidenceRow[] {
+  if (!currentDb) return [];
+  const rows = currentDb.prepare(
+    "SELECT * FROM verification_evidence WHERE milestone_id = :mid AND slice_id = :sid AND task_id = :tid ORDER BY id",
+  ).all({ ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
+  return rows as unknown as VerificationEvidenceRow[];
+}
+
 export interface MilestoneRow {
   id: string;
   title: string;
@@ -1738,7 +1774,7 @@ export function copyWorktreeDb(srcDbPath: string, destDbPath: string): boolean {
     copyFileSync(srcDbPath, destDbPath);
     return true;
   } catch (err) {
-    process.stderr.write(`gsd-db: failed to copy DB to worktree: ${(err as Error).message}\n`);
+    logError("db", "failed to copy DB to worktree", { error: (err as Error).message });
     return false;
   }
 }
@@ -1770,13 +1806,13 @@ export function reconcileWorktreeDb(
   // ATTACH DATABASE doesn't support parameterized paths in all providers,
   // so we use strict allowlist validation instead.
   if (/['";\x00]/.test(worktreeDbPath)) {
-    process.stderr.write("gsd-db: worktree DB reconciliation failed: path contains unsafe characters\n");
+    logError("db", "worktree DB reconciliation failed: path contains unsafe characters");
     return zero;
   }
   if (!currentDb) {
     const opened = openDatabase(mainDbPath);
     if (!opened) {
-      process.stderr.write("gsd-db: worktree DB reconciliation failed: cannot open main DB\n");
+      logError("db", "worktree DB reconciliation failed: cannot open main DB");
       return zero;
     }
   }
@@ -1910,7 +1946,7 @@ export function reconcileWorktreeDb(
       try { adapter.exec("DETACH DATABASE wt"); } catch { /* best effort */ }
     }
   } catch (err) {
-    process.stderr.write(`gsd-db: worktree DB reconciliation failed: ${(err as Error).message}\n`);
+    logError("db", "worktree DB reconciliation failed", { error: (err as Error).message });
     return { ...zero, conflicts };
   }
 }
diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts
index c6fdb2ea9..ca71b5095 100644
--- a/src/resources/extensions/gsd/guided-flow.ts
+++ b/src/resources/extensions/gsd/guided-flow.ts
@@ -10,6 +10,7 @@ import type { ExtensionAPI, ExtensionContext, ExtensionCommandContext } from "@g
 import { showNextAction } from "../shared/tui.js";
 import { loadFile } from "./files.js";
 import { isDbAvailable, getMilestoneSlices } from "./gsd-db.js";
+import { parseRoadmapSlices } from "./roadmap-slices.js";
 import { loadPrompt, inlineTemplate } from "./prompt-loader.js";
 import { buildSkillActivationBlock } from "./auto-prompts.js";
 import { deriveState } from "./state.js";
@@ -37,7 +38,7 @@ import { showConfirm } from "../shared/tui.js";
 import { debugLog } from "./debug-logger.js";
 import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js";
 import { parkMilestone, discardMilestone } from "./milestone-actions.js";
-import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
+import { selectAndApplyModel } from "./auto-model-selection.js";
 
 // ─── Re-exports (preserve public API for existing importers) ────────────────
 export {
@@ -75,25 +76,72 @@ function buildDocsCommitInstruction(_message: string): string {
 
 // ─── Auto-start after discuss ─────────────────────────────────────────────────
 
-/** Stashed context + flag for auto-starting after discuss phase completes */
-let pendingAutoStart: {
+/** Pending auto-start context, keyed by basePath for session isolation (#2985). */
+interface PendingAutoStartEntry {
   ctx: ExtensionCommandContext;
   pi: ExtensionAPI;
   basePath: string;
   milestoneId: string; // the milestone being discussed
   step?: boolean; // preserve step mode through discuss → auto transition
-} | null = null;
+}
 
-/** Returns the milestoneId being discussed, or null if no discussion is active */
-export function getDiscussionMilestoneId(): string | null {
-  return pendingAutoStart?.milestoneId ?? null;
+const pendingAutoStartMap = new Map<string, PendingAutoStartEntry>();
+
+/**
+ * Backward-compat bridge: returns a mutable reference to the entry matching
+ * basePath, or the sole entry when only one session exists.
+ * Internal use only — external code should use the Map directly.
+ */
+function _getPendingAutoStart(basePath?: string): PendingAutoStartEntry | null {
+  if (basePath) return pendingAutoStartMap.get(basePath) ?? null;
+  if (pendingAutoStartMap.size === 1) return pendingAutoStartMap.values().next().value!;
+  return null;
+}
+
+/**
+ * Store pending auto-start state for a project.
+ * Exported for testing (#2985).
+ */
+export function setPendingAutoStart(basePath: string, entry: { basePath: string; milestoneId: string; ctx?: ExtensionCommandContext; pi?: ExtensionAPI; step?: boolean }): void {
+  pendingAutoStartMap.set(basePath, entry as PendingAutoStartEntry);
+}
+
+/**
+ * Clear pending auto-start state.
+ * If basePath is given, clears only that project.  Otherwise clears all.
+ * Exported for testing (#2985).
+ */
+export function clearPendingAutoStart(basePath?: string): void {
+  if (basePath) {
+    pendingAutoStartMap.delete(basePath);
+  } else {
+    pendingAutoStartMap.clear();
+  }
+}
+
+/**
+ * Returns the milestoneId being discussed for the given project.
+ * When basePath is omitted and only one session is active, returns that
+ * session's milestoneId for backward compatibility.  Returns null when
+ * multiple sessions exist and basePath is not specified (#2985 Bug 4).
+ */
+export function getDiscussionMilestoneId(basePath?: string): string | null {
+  if (basePath) {
+    return pendingAutoStartMap.get(basePath)?.milestoneId ?? null;
+  }
+  // Backward compat: return the sole entry's milestoneId, or null if ambiguous
+  if (pendingAutoStartMap.size === 1) {
+    return pendingAutoStartMap.values().next().value!.milestoneId;
+  }
+  return null;
 }
 
 /** Called from agent_end to check if auto-mode should start after discuss */
 export function checkAutoStartAfterDiscuss(): boolean {
-  if (!pendingAutoStart) return false;
+  const entry = _getPendingAutoStart();
+  if (!entry) return false;
 
-  const { ctx, pi, basePath, milestoneId, step } = pendingAutoStart;
+  const { ctx, pi, basePath, milestoneId, step } = entry;
 
   // Gate 1: Primary milestone must have CONTEXT.md or ROADMAP.md
   // The "discuss" path creates CONTEXT.md; the "plan" path creates ROADMAP.md.
@@ -177,7 +225,7 @@ export function checkAutoStartAfterDiscuss(): boolean {
   // Cleanup: remove discussion manifest after auto-start (only needed during discussion)
   try { unlinkSync(manifestPath); } catch { /* may not exist for single-milestone */ }
 
-  pendingAutoStart = null;
+  pendingAutoStartMap.delete(basePath);
   ctx.ui.notify(`Milestone ${milestoneId} ready.`, "info");
   startAuto(ctx, pi, basePath, false, { step }).catch((err) => {
     ctx.ui.notify(`Auto-start failed: ${getErrorMessage(err)}`, "error");
@@ -223,24 +271,20 @@ async function dispatchWorkflow(
   ctx?: ExtensionContext,
   unitType?: string,
 ): Promise<void> {
-  // Apply model preference for this unit type (if configured)
+  // Route through the dynamic routing pipeline (complexity classification,
+  // tier downgrade, fallback chains) — same path as auto-mode dispatches (#2958).
   if (ctx && unitType) {
-    const modelConfig = resolveModelWithFallbacksForUnit(unitType);
-    if (modelConfig) {
-      const availableModels = ctx.modelRegistry.getAvailable();
-      const modelsToTry = [modelConfig.primary, ...modelConfig.fallbacks];
-
-      for (const modelId of modelsToTry) {
-        // Resolve model from available models (same logic as auto-model-selection)
-        const model = resolveAvailableModel(modelId, availableModels, ctx.model?.provider);
-        if (!model) continue;
-
-        const ok = await pi.setModel(model, { persist: false });
-        if (ok) {
-          debugLog("guided-flow-model-applied", { unitType, model: `${model.provider}/${model.id}` });
-          break;
-        }
-      }
+    const prefs = loadEffectiveGSDPreferences()?.preferences;
+    const result = await selectAndApplyModel(
+      ctx, pi, unitType, /* unitId */ "", /* basePath */ process.cwd(),
+      prefs, /* verbose */ false, /* autoModeStartModel */ null,
+    );
+    if (result.appliedModel) {
+      debugLog("guided-flow-model-applied", {
+        unitType,
+        model: `${result.appliedModel.provider}/${result.appliedModel.id}`,
+        routing: result.routing,
+      });
     }
   }
 
@@ -393,7 +437,7 @@ export async function showHeadlessMilestoneCreation(
   const prompt = buildHeadlessDiscussPrompt(nextId, seedContext, basePath);
 
   // Set pending auto start (auto-mode triggers on "Milestone X ready." via checkAutoStartAfterDiscuss)
-  pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId };
+  pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId });
 
   // Dispatch — headless milestone creation is a planning activity
   await dispatchWorkflow(pi, prompt, "gsd-run", ctx, "plan-milestone");
@@ -573,12 +617,12 @@ export async function showDiscuss(
       const seed = draftContent
         ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
         : basePrompt;
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: mid, step: false };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: mid, step: false });
       await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone");
     } else if (choice === "discuss_fresh") {
       const discussMilestoneTemplates = inlineTemplate("context", "Context");
       const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: mid, step: false };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: mid, step: false });
       await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", {
         milestoneId: mid, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable,
         commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`),
@@ -587,7 +631,7 @@ export async function showDiscuss(
       const milestoneIds = findMilestoneIds(basePath);
       const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
       const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: false };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: false });
       await dispatchWorkflow(pi, buildDiscussPrompt(nextId, `New milestone ${nextId}.`, basePath), "gsd-run", ctx, "discuss-milestone");
     }
     return;
@@ -617,9 +661,21 @@ export async function showDiscuss(
   } else {
     normSlices = [];
   }
+  // DB is open but returned zero slices despite a roadmap existing —
+  // the DB may be empty due to WAL loss or truncation (see #2815, #2892).
+  // Fall back to roadmap parsing to prevent false "all complete" exit.
+  if (normSlices.length === 0 && roadmapContent) {
+    normSlices = parseRoadmapSlices(roadmapContent).map(s => ({ id: s.id, done: s.done, title: s.title }));
+  }
   const pendingSlices = normSlices.filter(s => !s.done);
 
   if (pendingSlices.length === 0) {
+    // All slices complete — but queued milestones may still need discussion (#3150)
+    const pendingMilestones = state.registry.filter(m => m.status === "pending");
+    if (pendingMilestones.length > 0) {
+      await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
+      return;
+    }
     ctx.ui.notify("All slices are complete — nothing to discuss.", "info");
     return;
   }
@@ -636,9 +692,14 @@ export async function showDiscuss(
       discussedMap.set(s.id, !!contextFile);
     }
 
-    // If all pending slices are discussed, notify and exit instead of looping
+    // If all pending slices are discussed, check for queued milestones before exiting (#3150)
     const allDiscussed = pendingSlices.every(s => discussedMap.get(s.id));
     if (allDiscussed) {
+      const pendingMilestones = state.registry.filter(m => m.status === "pending");
+      if (pendingMilestones.length > 0) {
+        await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
+        return;
+      }
       const lockData = readSessionLockData(basePath);
       const remoteAutoRunning = lockData && lockData.pid !== process.pid && isSessionLockProcessAlive(lockData);
       const nextStep = remoteAutoRunning
@@ -932,7 +993,7 @@ async function handleMilestoneActions(
     const milestoneIds = findMilestoneIds(basePath);
     const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
     const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-    pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
+    pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode });
     await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
       `New milestone ${nextId}.`,
       basePath
@@ -974,7 +1035,15 @@ export async function showSmartEntry(
   }
 
   // ── Detection preamble — run before any bootstrap ────────────────────
-  if (!existsSync(gsdRoot(basePath))) {
+  // Check bootstrap completeness, not just .gsd/ directory existence.
+  // A zombie .gsd/ state (symlink exists but missing PREFERENCES.md and
+  // milestones/) must trigger the init wizard, not skip it (#2942).
+  const gsdPath = gsdRoot(basePath);
+  const hasBootstrapArtifacts = existsSync(gsdPath)
+    && (existsSync(join(gsdPath, "PREFERENCES.md"))
+        || existsSync(join(gsdPath, "milestones")));
+
+  if (!hasBootstrapArtifacts) {
     const detection = detectProjectState(basePath);
 
     // v1 .planning/ detected — offer migration before anything else
@@ -989,7 +1058,7 @@ export async function showSmartEntry(
       // "fresh" — fall through to init wizard
     }
 
-    // No .gsd/ — run the project init wizard
+    // No .gsd/ or zombie .gsd/ — run the project init wizard
     const result = await showProjectInit(ctx, pi, basePath, detection);
     if (!result.completed) return; // User cancelled
 
@@ -1048,9 +1117,9 @@ export async function showSmartEntry(
   if (!state.activeMilestone?.id) {
     // Guard: if a discuss session is already in flight, don't re-inject the prompt.
     // Both /gsd and /gsd auto reach this branch when no milestone exists yet.
-    // Without this guard, every subsequent /gsd call overwrites pendingAutoStart
+    // Without this guard, every subsequent /gsd call overwrites the pending auto-start
     // and fires another dispatchWorkflow, resetting the conversation mid-interview.
-    if (pendingAutoStart) {
+    if (pendingAutoStartMap.has(basePath)) {
       ctx.ui.notify("Discussion already in progress — answer the question above to continue.", "info");
       return;
     }
@@ -1083,7 +1152,7 @@ export async function showSmartEntry(
 
     if (isFirst) {
       // First ever — skip wizard, just ask directly
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode });
       await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
         `New project, milestone ${nextId}. Do NOT read or explore .gsd/ — it's empty scaffolding.`,
         basePath
@@ -1104,7 +1173,7 @@ export async function showSmartEntry(
       });
 
       if (choice === "new_milestone") {
-        pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
+        pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode });
         await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
           `New milestone ${nextId}.`,
           basePath
@@ -1143,7 +1212,7 @@ export async function showSmartEntry(
       const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
       const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
 
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode });
       await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
         `New milestone ${nextId}.`,
         basePath
@@ -1194,12 +1263,12 @@ export async function showSmartEntry(
       const seed = draftContent
         ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
         : basePrompt;
-      pendingAutoStart = { ctx, pi, basePath, milestoneId, step: stepMode };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode });
       await dispatchWorkflow(pi, seed, "gsd-discuss", ctx, "discuss-milestone");
     } else if (choice === "discuss_fresh") {
       const discussMilestoneTemplates = inlineTemplate("context", "Context");
       const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
-      pendingAutoStart = { ctx, pi, basePath, milestoneId, step: stepMode };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode });
       await dispatchWorkflow(pi, loadPrompt("guided-discuss-milestone", {
         milestoneId, milestoneTitle, inlinedTemplates: discussMilestoneTemplates, structuredQuestionsAvailable,
         commitInstruction: buildDocsCommitInstruction(`docs(${milestoneId}): milestone context from discuss`),
@@ -1208,7 +1277,7 @@ export async function showSmartEntry(
       const milestoneIds = findMilestoneIds(basePath);
       const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
       const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-      pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
+      pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode });
       await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
         `New milestone ${nextId}.`,
         basePath
@@ -1261,7 +1330,7 @@ export async function showSmartEntry(
       });
 
       if (choice === "plan") {
-        pendingAutoStart = { ctx, pi, basePath, milestoneId, step: stepMode };
+        pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId, step: stepMode });
         const planMilestoneTemplates = [
           inlineTemplate("roadmap", "Roadmap"),
           inlineTemplate("plan", "Slice Plan"),
@@ -1292,7 +1361,7 @@ export async function showSmartEntry(
         const milestoneIds = findMilestoneIds(basePath);
         const uniqueMilestoneIds = !!loadEffectiveGSDPreferences()?.preferences?.unique_milestone_ids;
         const nextId = nextMilestoneIdReserved(milestoneIds, uniqueMilestoneIds);
-        pendingAutoStart = { ctx, pi, basePath, milestoneId: nextId, step: stepMode };
+        pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, step: stepMode });
         await dispatchWorkflow(pi, buildDiscussPrompt(nextId,
           `New milestone ${nextId}.`,
           basePath
diff --git a/src/resources/extensions/gsd/health-widget-core.ts b/src/resources/extensions/gsd/health-widget-core.ts
index cc50f2099..783baf1da 100644
--- a/src/resources/extensions/gsd/health-widget-core.ts
+++ b/src/resources/extensions/gsd/health-widget-core.ts
@@ -18,6 +18,10 @@ export interface HealthWidgetData {
   providerIssue: string | null;
   environmentErrorCount: number;
   environmentWarningCount: number;
+  /** Unix epoch (seconds) of the last commit, or null if unavailable. */
+  lastCommitEpoch: number | null;
+  /** Subject line of the last commit, or null if unavailable. */
+  lastCommitMessage: string | null;
   lastRefreshed: number;
 }
 
@@ -32,6 +36,29 @@ function formatCost(n: number): string {
   return n >= 1 ? `$${n.toFixed(2)}` : `${(n * 100).toFixed(1)}¢`;
 }
 
+/**
+ * Format a Unix epoch (seconds) as a human-readable relative time string.
+ * Returns "just now" for <1m, "Xm ago" for <1h, "Xh ago" for <24h, "Xd ago" otherwise.
+ */
+export function formatRelativeTime(epochSeconds: number): string {
+  const diffSeconds = Math.floor(Date.now() / 1000) - epochSeconds;
+  if (diffSeconds < 60) return "just now";
+  const minutes = Math.floor(diffSeconds / 60);
+  if (minutes < 60) return `${minutes}m ago`;
+  const hours = Math.floor(minutes / 60);
+  if (hours < 24) return `${hours}h ago`;
+  const days = Math.floor(hours / 24);
+  return `${days}d ago`;
+}
+
+/**
+ * Truncate a commit message to fit the widget, appending "…" if needed.
+ */
+function truncateMessage(msg: string, maxLen: number): string {
+  if (msg.length <= maxLen) return msg;
+  return msg.slice(0, maxLen - 1) + "…";
+}
+
 /**
  * Build compact health lines for the widget.
  * Returns a string array suitable for setWidget().
@@ -73,5 +100,12 @@ export function buildHealthLines(data: HealthWidgetData): string[] {
     parts.push(`Env: ${data.environmentWarningCount} warning${data.environmentWarningCount > 1 ? "s" : ""}`);
   }
 
+  // Always-on last commit display — shows relative time + truncated message
+  if (data.lastCommitEpoch !== null && data.lastCommitEpoch > 0) {
+    const relTime = formatRelativeTime(data.lastCommitEpoch);
+    const msg = data.lastCommitMessage ? ` — ${truncateMessage(data.lastCommitMessage, 50)}` : "";
+    parts.push(`Last commit: ${relTime}${msg}`);
+  }
+
   return [`  ${parts.join("  │  ")}`];
 }
diff --git a/src/resources/extensions/gsd/health-widget.ts b/src/resources/extensions/gsd/health-widget.ts
index fa63e6677..f3f2d262a 100644
--- a/src/resources/extensions/gsd/health-widget.ts
+++ b/src/resources/extensions/gsd/health-widget.ts
@@ -13,6 +13,7 @@ import type { GSDState } from "./types.js";
 import { runProviderChecks, summariseProviderIssues } from "./doctor-providers.js";
 import { runEnvironmentChecks } from "./doctor-environment.js";
 import { loadEffectiveGSDPreferences } from "./preferences.js";
+import { nativeIsRepo, nativeLastCommitEpoch, nativeGetCurrentBranch, nativeCommitSubject } from "./native-git-bridge.js";
 import { loadLedgerFromDisk, getProjectTotals } from "./metrics.js";
 import { describeNextUnit, estimateTimeRemaining, updateSliceProgressCache } from "./auto-dashboard.js";
 import { projectRoot } from "./commands/context.js";
@@ -31,6 +32,8 @@ function loadHealthWidgetData(basePath: string): HealthWidgetData {
   let providerIssue: string | null = null;
   let environmentErrorCount = 0;
   let environmentWarningCount = 0;
+  let lastCommitEpoch: number | null = null;
+  let lastCommitMessage: string | null = null;
 
   const projectState = detectHealthWidgetProjectState(basePath);
 
@@ -58,6 +61,18 @@ function loadHealthWidgetData(basePath: string): HealthWidgetData {
     }
   } catch { /* non-fatal */ }
 
+  // ── Last commit info ──
+  try {
+    if (nativeIsRepo(basePath)) {
+      const branch = nativeGetCurrentBranch(basePath);
+      const epoch = nativeLastCommitEpoch(basePath, branch || "HEAD");
+      if (epoch > 0) {
+        lastCommitEpoch = epoch;
+        lastCommitMessage = nativeCommitSubject(basePath, branch || "HEAD") || null;
+      }
+    }
+  } catch { /* non-fatal */ }
+
   return {
     projectState,
     budgetCeiling,
@@ -65,6 +80,8 @@ function loadHealthWidgetData(basePath: string): HealthWidgetData {
     providerIssue,
     environmentErrorCount,
     environmentWarningCount,
+    lastCommitEpoch,
+    lastCommitMessage,
     lastRefreshed: Date.now(),
   };
 }
diff --git a/src/resources/extensions/gsd/index.ts b/src/resources/extensions/gsd/index.ts
index 13e6dc97c..836aae28b 100644
--- a/src/resources/extensions/gsd/index.ts
+++ b/src/resources/extensions/gsd/index.ts
@@ -5,6 +5,7 @@ export {
   isQueuePhaseActive,
   setQueuePhaseActive,
   shouldBlockContextWrite,
+  shouldBlockQueueExecution,
 } from "./bootstrap/write-gate.js";
 
 export default async function registerExtension(pi: ExtensionAPI) {
diff --git a/src/resources/extensions/gsd/memory-extractor.ts b/src/resources/extensions/gsd/memory-extractor.ts
index c63a385a5..acca3c7a0 100644
--- a/src/resources/extensions/gsd/memory-extractor.ts
+++ b/src/resources/extensions/gsd/memory-extractor.ts
@@ -87,14 +87,22 @@ export function buildMemoryLLMCall(ctx: ExtensionContext): LLMCallFn | null {
 
     const selectedModel = model as Model<Api>;
 
+    // Resolve API key via modelRegistry so OAuth tokens (auth.json) are used.
+    // Without this, streamSimpleAnthropic only checks env vars via getEnvApiKey,
+    // which returns undefined for OAuth users (Claude Max / Claude Pro).
+    // See: https://github.com/gsd-build/gsd-2/issues/2959
+    const resolvedKeyPromise = ctx.modelRegistry.getApiKey(selectedModel).catch(() => undefined);
+
     return async (system: string, user: string): Promise<string> => {
       const { completeSimple } = await import('@gsd/pi-ai');
+      const resolvedApiKey = await resolvedKeyPromise;
       const result: AssistantMessage = await completeSimple(selectedModel, {
         systemPrompt: system,
         messages: [{ role: 'user', content: [{ type: 'text', text: user }], timestamp: Date.now() }],
       }, {
         maxTokens: 2048,
         temperature: 0,
+        ...(resolvedApiKey ? { apiKey: resolvedApiKey } : {}),
       });
 
       // Extract text from response
diff --git a/src/resources/extensions/gsd/migrate-external.ts b/src/resources/extensions/gsd/migrate-external.ts
index 4fd53e7d1..1f9786799 100644
--- a/src/resources/extensions/gsd/migrate-external.ts
+++ b/src/resources/extensions/gsd/migrate-external.ts
@@ -9,7 +9,7 @@
 import { execFileSync } from "node:child_process";
 import { existsSync, lstatSync, mkdirSync, readdirSync, realpathSync, renameSync, cpSync, rmSync, symlinkSync } from "node:fs";
 import { join } from "node:path";
-import { externalGsdRoot } from "./repo-identity.js";
+import { externalGsdRoot, isInsideWorktree } from "./repo-identity.js";
 import { getErrorMessage } from "./error-utils.js";
 import { hasGitTrackedGsdFiles } from "./gitignore.js";
 import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
@@ -34,6 +34,14 @@ export interface MigrationResult {
  * 3. On failure: rename `.gsd.migrating` back to `.gsd` (rollback)
  */
 export function migrateToExternalState(basePath: string): MigrationResult {
+  // Worktrees get their .gsd via syncGsdStateToWorktree(), not migration.
+  // Migration inside a worktree would compute the same external hash as the
+  // main repo (externalGsdRoot hashes remoteUrl + gitRoot), creating a broken
+  // junction and orphaning .gsd.migrating (#2970).
+  if (isInsideWorktree(basePath)) {
+    return { migrated: false };
+  }
+
   const localGsd = join(basePath, ".gsd");
 
   // Skip if doesn't exist
diff --git a/src/resources/extensions/gsd/milestone-validation-gates.ts b/src/resources/extensions/gsd/milestone-validation-gates.ts
new file mode 100644
index 000000000..4dcd522b6
--- /dev/null
+++ b/src/resources/extensions/gsd/milestone-validation-gates.ts
@@ -0,0 +1,56 @@
+/**
+ * Milestone validation quality gate persistence.
+ *
+ * #2945 Bug 4: validate-milestone was writing VALIDATION.md to disk and
+ * inserting an assessment row, but never persisted structured quality_gates
+ * records in the DB. This module inserts milestone-level validation gates
+ * that correspond to the validation checks performed.
+ *
+ * Gate IDs for milestone validation:
+ *   MV01 — Success criteria checklist
+ *   MV02 — Slice delivery audit
+ *   MV03 — Cross-slice integration
+ *   MV04 — Requirement coverage
+ *
+ * These use the existing quality_gates table with scope "milestone".
+ */
+
+import { _getAdapter } from "./gsd-db.js";
+
+/** Milestone validation gate IDs. */
+const MILESTONE_GATE_IDS = ["MV01", "MV02", "MV03", "MV04"] as const;
+
+/**
+ * Insert milestone-level quality_gates records for a validation run.
+ *
+ * Each gate is inserted with status "complete" and a verdict derived
+ * from the overall milestone validation verdict. Individual gate-level
+ * verdicts are not available (the handler receives a single verdict),
+ * so all gates share the overall verdict.
+ */
+export function insertMilestoneValidationGates(
+  milestoneId: string,
+  sliceId: string,
+  verdict: string,
+  evaluatedAt: string,
+): void {
+  const db = _getAdapter();
+  if (!db) return;
+
+  const gateVerdict = verdict === "pass" ? "pass" : "flag";
+
+  for (const gateId of MILESTONE_GATE_IDS) {
+    db.prepare(
+      `INSERT OR REPLACE INTO quality_gates
+       (milestone_id, slice_id, gate_id, scope, task_id, status, verdict, rationale, findings, evaluated_at)
+       VALUES (:mid, :sid, :gid, 'milestone', '', 'complete', :verdict, :rationale, '', :evaluated_at)`,
+    ).run({
+      ":mid": milestoneId,
+      ":sid": sliceId,
+      ":gid": gateId,
+      ":verdict": gateVerdict,
+      ":rationale": `Milestone validation verdict: ${verdict}`,
+      ":evaluated_at": evaluatedAt,
+    });
+  }
+}
diff --git a/src/resources/extensions/gsd/model-cost-table.ts b/src/resources/extensions/gsd/model-cost-table.ts
index 82be7930d..4c4ebc81c 100644
--- a/src/resources/extensions/gsd/model-cost-table.ts
+++ b/src/resources/extensions/gsd/model-cost-table.ts
@@ -33,10 +33,29 @@ export const BUNDLED_COST_TABLE: ModelCostEntry[] = [
   // OpenAI
   { id: "gpt-4o", inputPer1k: 0.0025, outputPer1k: 0.01, updatedAt: "2025-03-15" },
   { id: "gpt-4o-mini", inputPer1k: 0.00015, outputPer1k: 0.0006, updatedAt: "2025-03-15" },
+  { id: "gpt-4.1", inputPer1k: 0.002, outputPer1k: 0.008, updatedAt: "2026-03-29" },
+  { id: "gpt-4.1-mini", inputPer1k: 0.0004, outputPer1k: 0.0016, updatedAt: "2026-03-29" },
+  { id: "gpt-4.1-nano", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2026-03-29" },
+  { id: "gpt-5", inputPer1k: 0.01, outputPer1k: 0.04, updatedAt: "2026-03-29" },
+  { id: "gpt-5-mini", inputPer1k: 0.0003, outputPer1k: 0.0012, updatedAt: "2026-03-29" },
+  { id: "gpt-5-nano", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2026-03-29" },
+  { id: "gpt-5-pro", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2026-03-29" },
   { id: "o1", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
   { id: "o3", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
+  { id: "o4-mini", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "o4-mini-deep-research", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
   { id: "gpt-4-turbo", inputPer1k: 0.01, outputPer1k: 0.03, updatedAt: "2025-03-15" },
 
+  // OpenAI Codex
+  { id: "gpt-5.1", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.1-codex-max", inputPer1k: 0.003, outputPer1k: 0.012, updatedAt: "2026-03-29" },
+  { id: "gpt-5.1-codex-mini", inputPer1k: 0.0003, outputPer1k: 0.0012, updatedAt: "2026-03-29" },
+  { id: "gpt-5.2", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.2-codex", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.3-codex", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+  { id: "gpt-5.3-codex-spark", inputPer1k: 0.0003, outputPer1k: 0.0012, updatedAt: "2026-03-29" },
+  { id: "gpt-5.4", inputPer1k: 0.005, outputPer1k: 0.02, updatedAt: "2026-03-29" },
+
   // Google
   { id: "gemini-2.0-flash", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2025-03-15" },
   { id: "gemini-flash-2.0", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2025-03-15" },
diff --git a/src/resources/extensions/gsd/model-router.ts b/src/resources/extensions/gsd/model-router.ts
index fe8bdf0a5..f97a69561 100644
--- a/src/resources/extensions/gsd/model-router.ts
+++ b/src/resources/extensions/gsd/model-router.ts
@@ -44,6 +44,12 @@ const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
   "claude-3-5-haiku-latest": "light",
   "claude-3-haiku-20240307": "light",
   "gpt-4o-mini": "light",
+  "gpt-4.1-mini": "light",
+  "gpt-4.1-nano": "light",
+  "gpt-5-mini": "light",
+  "gpt-5-nano": "light",
+  "gpt-5.1-codex-mini": "light",
+  "gpt-5.3-codex-spark": "light",
   "gemini-2.0-flash": "light",
   "gemini-flash-2.0": "light",
 
@@ -52,6 +58,8 @@ const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
   "claude-sonnet-4-5-20250514": "standard",
   "claude-3-5-sonnet-latest": "standard",
   "gpt-4o": "standard",
+  "gpt-4.1": "standard",
+  "gpt-5.1-codex-max": "standard",
   "gemini-2.5-pro": "standard",
   "deepseek-chat": "standard",
 
@@ -59,8 +67,17 @@ const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
   "claude-opus-4-6": "heavy",
   "claude-3-opus-latest": "heavy",
   "gpt-4-turbo": "heavy",
+  "gpt-5": "heavy",
+  "gpt-5-pro": "heavy",
+  "gpt-5.1": "heavy",
+  "gpt-5.2": "heavy",
+  "gpt-5.2-codex": "heavy",
+  "gpt-5.3-codex": "heavy",
+  "gpt-5.4": "heavy",
   "o1": "heavy",
   "o3": "heavy",
+  "o4-mini": "heavy",
+  "o4-mini-deep-research": "heavy",
 };
 
 // ─── Cost Table (per 1K input tokens, approximate USD) ───────────────────────
@@ -75,6 +92,23 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
   "claude-opus-4-6": 0.015,
   "gpt-4o-mini": 0.00015,
   "gpt-4o": 0.0025,
+  "gpt-4.1": 0.002,
+  "gpt-4.1-mini": 0.0004,
+  "gpt-4.1-nano": 0.0001,
+  "gpt-5": 0.01,
+  "gpt-5-mini": 0.0003,
+  "gpt-5-nano": 0.0001,
+  "gpt-5-pro": 0.015,
+  "gpt-5.1": 0.005,
+  "gpt-5.1-codex-max": 0.003,
+  "gpt-5.1-codex-mini": 0.0003,
+  "gpt-5.2": 0.005,
+  "gpt-5.2-codex": 0.005,
+  "gpt-5.3-codex": 0.005,
+  "gpt-5.3-codex-spark": 0.0003,
+  "gpt-5.4": 0.005,
+  "o4-mini": 0.005,
+  "o4-mini-deep-research": 0.005,
   "gemini-2.0-flash": 0.0001,
   "gemini-2.5-pro": 0.00125,
   "deepseek-chat": 0.00014,
@@ -191,7 +225,7 @@ export function escalateTier(currentTier: ComplexityTier): ComplexityTier | null
  */
 export function defaultRoutingConfig(): DynamicRoutingConfig {
   return {
-    enabled: false,
+    enabled: true,
     escalate_on_failure: true,
     budget_pressure: true,
     cross_provider: true,
diff --git a/src/resources/extensions/gsd/native-git-bridge.ts b/src/resources/extensions/gsd/native-git-bridge.ts
index edfe81188..48426dd14 100644
--- a/src/resources/extensions/gsd/native-git-bridge.ts
+++ b/src/resources/extensions/gsd/native-git-bridge.ts
@@ -931,6 +931,23 @@ export function nativeResetHard(basePath: string): void {
   execSync("git reset --hard HEAD", { cwd: basePath, stdio: "pipe" });
 }
 
+/**
+ * Get the subject line of a commit (git log -1 --format=%s <ref>).
+ * Returns empty string if the ref doesn't exist.
+ */
+export function nativeCommitSubject(basePath: string, ref: string): string {
+  try {
+    return execFileSync("git", ["log", "-1", "--format=%s", ref], {
+      cwd: basePath,
+      stdio: ["ignore", "pipe", "pipe"],
+      encoding: "utf-8",
+      env: GIT_NO_PROMPT_ENV,
+    }).trim();
+  } catch {
+    return "";
+  }
+}
+
 /**
  * Delete a branch.
  * Native: libgit2 branch delete.
diff --git a/src/resources/extensions/gsd/notifications.ts b/src/resources/extensions/gsd/notifications.ts
index 4a45eae94..0efd0d4c3 100644
--- a/src/resources/extensions/gsd/notifications.ts
+++ b/src/resources/extensions/gsd/notifications.ts
@@ -23,7 +23,13 @@ export function sendDesktopNotification(
   message: string,
   level: NotifyLevel = "info",
   kind: NotificationKind = "complete",
+  projectName?: string,
 ): void {
+  // When a projectName is provided and the title is the default "GSD",
+  // replace it with a project-qualified title for multi-project clarity.
+  if (projectName && title === "GSD") {
+    title = formatNotificationTitle(projectName);
+  }
   const loaded = loadEffectiveGSDPreferences()?.preferences;
   if (!shouldSendDesktopNotification(kind, loaded?.notifications)) return;
 
@@ -64,6 +70,16 @@ export function shouldSendDesktopNotification(
   }
 }
 
+/**
+ * Format a notification title that includes the project name for context.
+ * Returns "GSD — projectName" when a project name is available, otherwise "GSD".
+ */
+export function formatNotificationTitle(projectName?: string): string {
+  const trimmed = projectName?.trim();
+  if (trimmed) return `GSD — ${trimmed}`;
+  return "GSD";
+}
+
 export function buildDesktopNotificationCommand(
   platform: NodeJS.Platform,
   title: string,
diff --git a/src/resources/extensions/gsd/parallel-eligibility.ts b/src/resources/extensions/gsd/parallel-eligibility.ts
index 20e4a2327..ea30521b9 100644
--- a/src/resources/extensions/gsd/parallel-eligibility.ts
+++ b/src/resources/extensions/gsd/parallel-eligibility.ts
@@ -112,7 +112,20 @@ export async function analyzeParallelEligibility(
   for (const mid of milestoneIds) {
     const entry = registryMap.get(mid);
     const title = entry?.title ?? mid;
-    const status = entry?.status ?? "pending";
+
+    // Rule 0: milestones with no registry entry (ghost directories, unknown
+    // state) are ineligible — we cannot determine their status or deps (#2501)
+    if (!entry) {
+      ineligible.push({
+        milestoneId: mid,
+        title,
+        eligible: false,
+        reason: "Milestone has no planning data — cannot determine eligibility.",
+      });
+      continue;
+    }
+
+    const status = entry.status;
 
     // Rule 1: skip complete and parked milestones
     if (status === "complete" || status === "parked") {
@@ -126,7 +139,7 @@ export async function analyzeParallelEligibility(
     }
 
     // Rule 2: check dependency satisfaction
-    const deps = entry?.dependsOn ?? [];
+    const deps = entry.dependsOn ?? [];
     const unsatisfied = deps.filter(dep => {
       const depEntry = registryMap.get(dep);
       return !depEntry || depEntry.status !== "complete";
diff --git a/src/resources/extensions/gsd/parallel-merge.ts b/src/resources/extensions/gsd/parallel-merge.ts
index 74b526fdd..e777a5a35 100644
--- a/src/resources/extensions/gsd/parallel-merge.ts
+++ b/src/resources/extensions/gsd/parallel-merge.ts
@@ -5,6 +5,9 @@
  * with safety checks for parallel execution context.
  */
 
+import { existsSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+import { spawnSync } from "node:child_process";
 import { loadFile } from "./files.js";
 import { resolveMilestoneFile } from "./paths.js";
 import { mergeMilestoneToMain } from "./auto-worktree.js";
@@ -28,22 +31,102 @@ export type MergeOrder = "sequential" | "by-completion";
 
 // ─── Merge Queue ───────────────────────────────────────────────────────────
 
+/**
+ * Check whether a milestone is complete by querying its worktree SQLite DB.
+ * Uses a subprocess to avoid disrupting the global DB singleton.
+ * Returns true when milestones.status = 'complete' in the worktree's gsd.db.
+ */
+export function isMilestoneCompleteInWorktreeDb(basePath: string, mid: string): boolean {
+  const dbPath = join(basePath, ".gsd", "worktrees", mid, ".gsd", "gsd.db");
+  if (!existsSync(dbPath)) return false;
+
+  try {
+    const result = spawnSync(
+      "sqlite3",
+      [dbPath, `SELECT status FROM milestones WHERE id='${mid}' LIMIT 1`],
+      { timeout: 3000, encoding: "utf-8" },
+    );
+    return (result.stdout || "").trim() === "complete";
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Discover milestone IDs with status='complete' in their worktree DB,
+ * scanning .gsd/worktrees/<MID>/.gsd/gsd.db for each worktree directory.
+ */
+function discoverDbCompletedMilestones(basePath: string): Set<string> {
+  const completed = new Set<string>();
+  const worktreeDir = join(basePath, ".gsd", "worktrees");
+  try {
+    for (const entry of readdirSync(worktreeDir)) {
+      if (entry.startsWith("M") && isMilestoneCompleteInWorktreeDb(basePath, entry)) {
+        completed.add(entry);
+      }
+    }
+  } catch {
+    // worktrees dir may not exist
+  }
+  return completed;
+}
+
 /**
  * Determine safe merge order for completed milestones.
  * Sequential: merge in milestone ID order (M001 before M002).
  * By-completion: merge in the order milestones finished.
+ *
+ * When basePath is provided, also checks worktree SQLite DBs as the
+ * source of truth — workers with stale orchestrator state (e.g. "error")
+ * are included if their worktree DB shows status='complete'.
+ * See: https://github.com/gsd-build/gsd-2/issues/2812
  */
 export function determineMergeOrder(
   workers: WorkerInfo[],
   order: MergeOrder = "sequential",
+  basePath?: string,
 ): string[] {
-  const completed = workers.filter(w => w.state === "stopped");
+  // Start with workers the orchestrator already knows are stopped
+  const stoppedIds = new Set(
+    workers.filter(w => w.state === "stopped").map(w => w.milestoneId),
+  );
+
+  // When basePath is available, also check worktree DBs for milestones
+  // whose orchestrator state is stale but are actually complete (#2812)
+  const dbCompleted = basePath ? discoverDbCompletedMilestones(basePath) : new Set<string>();
+
+  // Union: milestone is mergeable if stopped OR DB-complete
+  const mergeableIds = new Set([...stoppedIds, ...dbCompleted]);
+
+  // Build the list from tracked workers + any DB-discovered milestones
+  // not tracked by the orchestrator at all
+  const workerMap = new Map(workers.map(w => [w.milestoneId, w]));
+  const allMergeable: WorkerInfo[] = [];
+  for (const mid of mergeableIds) {
+    const w = workerMap.get(mid);
+    if (w) {
+      allMergeable.push(w);
+    } else {
+      // Milestone discovered from worktree DB but not in workers list
+      allMergeable.push({
+        milestoneId: mid,
+        title: mid,
+        pid: 0,
+        process: null,
+        worktreePath: basePath ? join(basePath, ".gsd", "worktrees", mid) : "",
+        startedAt: 0,
+        state: "stopped",
+        cost: 0,
+      });
+    }
+  }
+
   if (order === "by-completion") {
-    return completed
+    return allMergeable
       .sort((a, b) => a.startedAt - b.startedAt) // earliest first
       .map(w => w.milestoneId);
   }
-  return completed
+  return allMergeable
     .sort((a, b) => a.milestoneId.localeCompare(b.milestoneId))
     .map(w => w.milestoneId);
 }
@@ -114,7 +197,7 @@ export async function mergeAllCompleted(
   workers: WorkerInfo[],
   order: MergeOrder = "sequential",
 ): Promise<MergeResult[]> {
-  const mergeOrder = determineMergeOrder(workers, order);
+  const mergeOrder = determineMergeOrder(workers, order, basePath);
   const results: MergeResult[] = [];
 
   for (const mid of mergeOrder) {
diff --git a/src/resources/extensions/gsd/parsers-legacy.ts b/src/resources/extensions/gsd/parsers-legacy.ts
index c1a00e554..ee0632fd2 100644
--- a/src/resources/extensions/gsd/parsers-legacy.ts
+++ b/src/resources/extensions/gsd/parsers-legacy.ts
@@ -196,18 +196,28 @@ function _parsePlanImpl(content: string): SlicePlan {
   const mhSection = extractSection(body, 'Must-Haves');
   const mustHaves = mhSection ? parseBullets(mhSection) : [];
 
+  // Parse tasks from ## Tasks section first, then scan the full body for any
+  // task checkboxes that were missed. Multi-task plans can interleave T01 detail
+  // headings (## Steps, ## Must-Haves) before T02's checkbox, which causes
+  // extractSection("Tasks") to stop at the first ## heading and miss T02+ (#3105).
   const tasksSection = extractSection(body, 'Tasks');
   const tasks: TaskPlanEntry[] = [];
 
-  if (tasksSection) {
-    const taskLines = tasksSection.split('\n');
+  // Parse task entries from a set of lines, appending to `tasks`.
+  const parseTaskLines = (lines: string[], knownIds: Set<string>): void => {
     let currentTask: TaskPlanEntry | null = null;
 
-    for (const line of taskLines) {
+    for (const line of lines) {
       const cbMatch = line.match(/^-\s+\[([ xX])\]\s+\*\*([\w.]+):\s+(.+?)\*\*\s*(.*)/);
       // Heading-style: ### T01 -- Title, ### T01: Title, ### T01 — Title
       const hdMatch = !cbMatch ? line.match(/^#{2,4}\s+([\w.]+)\s*(?:--|—|:)\s*(.+)/) : null;
       if (cbMatch || hdMatch) {
+        const taskId = cbMatch ? cbMatch[2] : hdMatch![1];
+        // Skip tasks already found in the Tasks section
+        if (knownIds.has(taskId)) {
+          currentTask = null;
+          continue;
+        }
         if (currentTask) tasks.push(currentTask);
 
         if (cbMatch) {
@@ -259,8 +269,17 @@ function _parsePlanImpl(content: string): SlicePlan {
       }
     }
     if (currentTask) tasks.push(currentTask);
+  };
+
+  if (tasksSection) {
+    parseTaskLines(tasksSection.split('\n'), new Set());
   }
 
+  // Second pass: scan the full body for task checkboxes outside ## Tasks.
+  // This handles interleaved plans where T02+ appear after T01's detail headings.
+  const foundIds = new Set(tasks.map(t => t.id));
+  parseTaskLines(body.split('\n'), foundIds);
+
   const filesSection = extractSection(body, 'Files Likely Touched');
   const filesLikelyTouched = filesSection ? parseBullets(filesSection) : [];
 
diff --git a/src/resources/extensions/gsd/paths.ts b/src/resources/extensions/gsd/paths.ts
index ccd3c59f6..8beaefdaa 100644
--- a/src/resources/extensions/gsd/paths.ts
+++ b/src/resources/extensions/gsd/paths.ts
@@ -264,6 +264,7 @@ export const GSD_ROOT_FILES = {
   REQUIREMENTS: "REQUIREMENTS.md",
   OVERRIDES: "OVERRIDES.md",
   KNOWLEDGE: "KNOWLEDGE.md",
+  CODEBASE: "CODEBASE.md",
 } as const;
 
 export type GSDRootFileKey = keyof typeof GSD_ROOT_FILES;
@@ -276,6 +277,7 @@ const LEGACY_GSD_ROOT_FILES: Record<GSDRootFileKey, string> = {
   REQUIREMENTS: "requirements.md",
   OVERRIDES: "overrides.md",
   KNOWLEDGE: "knowledge.md",
+  CODEBASE: "codebase.md",
 };
 
 // ─── GSD Root Discovery ───────────────────────────────────────────────────────
@@ -307,16 +309,58 @@ export function gsdRoot(basePath: string): string {
   return result;
 }
 
+/**
+ * Detect if a path is inside a .gsd/worktrees/<name>/ structure.
+ *
+ * GSD auto-worktrees live at <project>/.gsd/worktrees/<milestoneId>/.
+ * When gsdRoot() is called with such a path, we must NOT walk up to the
+ * project root's .gsd — each worktree manages its own .gsd state (#2594).
+ *
+ * Matches both forward-slash and platform-native separators to handle
+ * Windows paths (path.sep = '\\') and normalized Unix paths.
+ */
+function isInsideGsdWorktree(p: string): boolean {
+  // Match /.gsd/worktrees/<name> where <name> is the final segment or
+  // followed by a separator. The <name> segment must be non-empty.
+  const sepFwd = "/";
+  const sepNative = "\\";
+  const markers = [
+    `${sepFwd}.gsd${sepFwd}worktrees${sepFwd}`,
+    `${sepNative}.gsd${sepNative}worktrees${sepNative}`,
+  ];
+  for (const marker of markers) {
+    const idx = p.indexOf(marker);
+    if (idx === -1) continue;
+    // Verify there's a non-empty worktree name after the marker
+    const afterMarker = p.slice(idx + marker.length);
+    // The name is everything up to the next separator (or end of string)
+    const nameEnd = afterMarker.search(/[/\\]/);
+    const name = nameEnd === -1 ? afterMarker : afterMarker.slice(0, nameEnd);
+    if (name.length > 0) return true;
+  }
+  return false;
+}
+
 function probeGsdRoot(rawBasePath: string): string {
   // 1. Fast path — check the input path directly
   const local = join(rawBasePath, ".gsd");
   if (existsSync(local)) return local;
 
+  // 1b. Worktree guard (#2594) — if basePath is inside a .gsd/worktrees/<name>/
+  //     structure, return the worktree-local .gsd path immediately. Without this,
+  //     the git-root probe (step 2) or walk-up (step 3) escapes to the project
+  //     root's .gsd, causing ensurePreconditions() and deriveState() to read/write
+  //     state in the wrong location.
+  if (isInsideGsdWorktree(rawBasePath)) return local;
+
   // Resolve symlinks so path comparisons work correctly across platforms
   // (e.g. macOS /var → /private/var). Use rawBasePath as fallback if not resolvable.
   let basePath: string;
   try { basePath = realpathSync.native(rawBasePath); } catch { basePath = rawBasePath; }
 
+  // Also check the resolved path for the worktree pattern (macOS /tmp → /private/tmp)
+  if (basePath !== rawBasePath && isInsideGsdWorktree(basePath)) return local;
+
   // 2. Git root anchor — used as both probe target and walk-up boundary
   //    Only walk if we're inside a git project — prevents escaping into
   //    unrelated filesystem territory when running outside any repo.
diff --git a/src/resources/extensions/gsd/preferences-models.ts b/src/resources/extensions/gsd/preferences-models.ts
index f5a488672..22e6909b1 100644
--- a/src/resources/extensions/gsd/preferences-models.ts
+++ b/src/resources/extensions/gsd/preferences-models.ts
@@ -69,6 +69,7 @@ export function resolveModelWithFallbacksForUnit(unitType: string): ResolvedMode
       break;
     case "complete-slice":
     case "complete-milestone":
+    case "worktree-merge":
     case "run-uat":
       phaseConfig = m.completion;
       break;
@@ -137,6 +138,18 @@ export function getNextFallbackModel(
   }
 }
 
+/**
+ * Detect whether an error message indicates a transient network error
+ * (worth retrying the same model) vs a permanent provider error
+ * (auth failure, quota exceeded, etc. -- should fall back immediately).
+ */
+export function isTransientNetworkError(errorMsg: string): boolean {
+  if (!errorMsg) return false;
+  const hasNetworkSignal = /network|ECONNRESET|ETIMEDOUT|ECONNREFUSED|socket hang up|fetch failed|connection.*reset|dns/i.test(errorMsg);
+  const hasPermanentSignal = /auth|unauthorized|forbidden|invalid.*key|quota|billing/i.test(errorMsg);
+  return hasNetworkSignal && !hasPermanentSignal;
+}
+
 /**
  * Validate a model ID string.
  * Returns true if the ID looks like a valid model identifier.
@@ -308,7 +321,7 @@ export function resolveContextSelection(): import("./types.js").ContextSelection
 }
 
 /**
- * Resolve the search provider preference from PREFERENCES.md.
+ * Resolve the search provider preference from preferences.md.
  * Returns undefined if not configured (caller falls back to existing behavior).
  */
 export function resolveSearchProviderFromPreferences(): GSDPreferences["search_provider"] | undefined {
diff --git a/src/resources/extensions/gsd/preferences-types.ts b/src/resources/extensions/gsd/preferences-types.ts
index 663c58376..443dc4920 100644
--- a/src/resources/extensions/gsd/preferences-types.ts
+++ b/src/resources/extensions/gsd/preferences-types.ts
@@ -100,7 +100,8 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
 export const KNOWN_UNIT_TYPES = [
   "research-milestone", "plan-milestone", "research-slice", "plan-slice",
   "execute-task", "reactive-execute", "gate-evaluate", "complete-slice", "replan-slice", "reassess-roadmap",
-  "run-uat", "complete-milestone",
+  "run-uat", "complete-milestone", "validate-milestone", "rewrite-docs",
+  "discuss-milestone", "discuss-slice", "worktree-merge",
 ] as const;
 export type UnitType = (typeof KNOWN_UNIT_TYPES)[number];
 
diff --git a/src/resources/extensions/gsd/preferences.ts b/src/resources/extensions/gsd/preferences.ts
index 58badbd95..71183cb0b 100644
--- a/src/resources/extensions/gsd/preferences.ts
+++ b/src/resources/extensions/gsd/preferences.ts
@@ -69,6 +69,7 @@ export {
   resolveModelForUnit,
   resolveModelWithFallbacksForUnit,
   getNextFallbackModel,
+  isTransientNetworkError,
   validateModelId,
   updatePreferencesModels,
   resolveDynamicRoutingConfig,
@@ -87,7 +88,7 @@ function gsdHome(): string {
 }
 
 function globalPreferencesPath(): string {
-  return join(gsdHome(), "PREFERENCES.md");
+  return join(gsdHome(), "preferences.md");
 }
 
 function legacyGlobalPreferencesPath(): string {
@@ -95,16 +96,16 @@ function legacyGlobalPreferencesPath(): string {
 }
 
 function projectPreferencesPath(): string {
-  return join(gsdRoot(process.cwd()), "PREFERENCES.md");
-}
-// Legacy: older versions used lowercase preferences.md.
-// Check lowercase as a fallback so those files aren't silently ignored.
-function globalPreferencesPathLegacy(): string {
-  return join(gsdHome(), "preferences.md");
-}
-function projectPreferencesPathLegacy(): string {
   return join(gsdRoot(process.cwd()), "preferences.md");
 }
+// Bootstrap in gitignore.ts historically created PREFERENCES.md (uppercase) by mistake.
+// Check uppercase as a fallback so those files aren't silently ignored.
+function globalPreferencesPathUppercase(): string {
+  return join(gsdHome(), "PREFERENCES.md");
+}
+function projectPreferencesPathUppercase(): string {
+  return join(gsdRoot(process.cwd()), "PREFERENCES.md");
+}
 
 export function getGlobalGSDPreferencesPath(): string {
   return globalPreferencesPath();
@@ -122,13 +123,13 @@ export function getProjectGSDPreferencesPath(): string {
 
 export function loadGlobalGSDPreferences(): LoadedGSDPreferences | null {
   return loadPreferencesFile(globalPreferencesPath(), "global")
-    ?? loadPreferencesFile(globalPreferencesPathLegacy(), "global")
+    ?? loadPreferencesFile(globalPreferencesPathUppercase(), "global")
     ?? loadPreferencesFile(legacyGlobalPreferencesPath(), "global");
 }
 
 export function loadProjectGSDPreferences(): LoadedGSDPreferences | null {
   return loadPreferencesFile(projectPreferencesPath(), "project")
-    ?? loadPreferencesFile(projectPreferencesPathLegacy(), "project");
+    ?? loadPreferencesFile(projectPreferencesPathUppercase(), "project");
 }
 
 export function loadEffectiveGSDPreferences(): LoadedGSDPreferences | null {
@@ -223,7 +224,7 @@ export function parsePreferencesMarkdown(content: string): GSDPreferences | null
 
   if (!_warnedUnrecognizedFormat) {
     _warnedUnrecognizedFormat = true;
-    console.warn("[parsePreferencesMarkdown] PREFERENCES.md exists but uses an unrecognized format — skipping.");
+    console.warn("[parsePreferencesMarkdown] preferences.md exists but uses an unrecognized format — skipping.");
   }
   return null;
 }
@@ -370,9 +371,6 @@ function mergePreferences(base: GSDPreferences, override: GSDPreferences): GSDPr
     service_tier: override.service_tier ?? base.service_tier,
     forensics_dedup: override.forensics_dedup ?? base.forensics_dedup,
     show_token_cost: override.show_token_cost ?? base.show_token_cost,
-    experimental: (base.experimental || override.experimental)
-      ? { ...(base.experimental ?? {}), ...(override.experimental ?? {}) }
-      : undefined,
   };
 }
 
@@ -519,7 +517,7 @@ export function resolvePreDispatchHooks(): PreDispatchHookConfig[] {
  * Resolve the effective git isolation mode from preferences.
  * Returns "none" (default), "worktree", or "branch".
  *
- * Default is "none" so GSD works out of the box without PREFERENCES.md.
+ * Default is "none" so GSD works out of the box without preferences.md.
  * Worktree isolation requires explicit opt-in because it depends on git
  * branch infrastructure that must be set up before use.
  */
diff --git a/src/resources/extensions/gsd/prompt-loader.ts b/src/resources/extensions/gsd/prompt-loader.ts
index b5e2a37ab..2a92984a1 100644
--- a/src/resources/extensions/gsd/prompt-loader.ts
+++ b/src/resources/extensions/gsd/prompt-loader.ts
@@ -134,7 +134,10 @@ export function loadPrompt(name: string, vars: Record<string, string> = {}): str
   }
 
   for (const [key, value] of Object.entries(effectiveVars)) {
-    content = content.replaceAll(`{{${key}}}`, value);
+    // Use split/join instead of replaceAll to avoid JavaScript's special
+    // replacement patterns ($', $`, $&) being interpreted in the value.
+    // See: https://github.com/gsd-build/gsd-2/issues/2968
+    content = content.split(`{{${key}}}`).join(value);
   }
 
   return content.trim();
diff --git a/src/resources/extensions/gsd/prompts/complete-milestone.md b/src/resources/extensions/gsd/prompts/complete-milestone.md
index 91ac07e5d..eac76640e 100644
--- a/src/resources/extensions/gsd/prompts/complete-milestone.md
+++ b/src/resources/extensions/gsd/prompts/complete-milestone.md
@@ -56,7 +56,7 @@ Then:
    - `followUps` (string) — Follow-up items for future milestones
    - `deviations` (string) — Deviations from the original plan
 10. For each requirement whose status changed in step 8, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically.
-11. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
+11. Update `.gsd/PROJECT.md`: use the `write` tool with `path: ".gsd/PROJECT.md"` and `content` containing the full updated document reflecting milestone completion and current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
 12. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.
 13. Do not commit manually — the system auto-commits your changes after this unit completes.
 - Say: "Milestone {{milestoneId}} complete."
diff --git a/src/resources/extensions/gsd/prompts/complete-slice.md b/src/resources/extensions/gsd/prompts/complete-slice.md
index e062a4aee..7066d5fd9 100644
--- a/src/resources/extensions/gsd/prompts/complete-slice.md
+++ b/src/resources/extensions/gsd/prompts/complete-slice.md
@@ -29,9 +29,11 @@ Then:
 8. Write `{{sliceUatPath}}` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built.
 9. Review task summaries for `key_decisions`. Append any significant decisions to `.gsd/DECISIONS.md` if missing.
 10. Review task summaries for patterns, gotchas, or non-obvious lessons learned. If any would save future agents from repeating investigation or hitting the same issues, append them to `.gsd/KNOWLEDGE.md`. Only add entries that are genuinely useful — don't pad with obvious observations.
-11. Call `gsd_complete_slice` with milestone_id, slice_id, the slice summary, and the UAT result. Do NOT manually mark the roadmap checkbox — the tool writes to the DB and renders the ROADMAP.md projection automatically.
+11. Call `gsd_complete_slice` with milestoneId, sliceId, the slice summary, and the UAT result. Do NOT manually mark the roadmap checkbox — the tool writes to the DB and renders the ROADMAP.md projection automatically.
 12. Do not run git commands — the system commits your changes and handles any merge after this unit succeeds.
-13. Update `.gsd/PROJECT.md` if it exists — refresh current state if needed.
+13. Update `.gsd/PROJECT.md` if it exists — refresh current state if needed: use the `write` tool with `path: ".gsd/PROJECT.md"` and `content` containing the full updated document reflecting current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
+
+**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the slice summary. If a decision genuinely requires human input, note it in the summary and proceed with the best available option.
 
 **You MUST call `gsd_complete_slice` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.**
 
diff --git a/src/resources/extensions/gsd/prompts/discuss-headless.md b/src/resources/extensions/gsd/prompts/discuss-headless.md
index 6840fa749..ddd10d454 100644
--- a/src/resources/extensions/gsd/prompts/discuss-headless.md
+++ b/src/resources/extensions/gsd/prompts/discuss-headless.md
@@ -38,7 +38,7 @@ Do a mandatory investigation pass before making any decisions. This is not optio
 3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the spec references external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
 
 **Web search budget:** Budget carefully across investigation + focused research:
-- Prefer `resolve_library` / `get_library_docs` over `web_search` for library documentation.
+- Prefer `resolve_library` / `get_library_docs` over `search-the-web` for library documentation.
 - Prefer `search_and_read` for one-shot topic research.
 - Target 2-3 web searches in this investigation pass. Save remaining budget for focused research.
 - Do NOT repeat the same or similar queries.
diff --git a/src/resources/extensions/gsd/prompts/discuss.md b/src/resources/extensions/gsd/prompts/discuss.md
index 4a52b344e..334abcdc0 100644
--- a/src/resources/extensions/gsd/prompts/discuss.md
+++ b/src/resources/extensions/gsd/prompts/discuss.md
@@ -37,7 +37,7 @@ Before asking your first question, do a mandatory investigation pass. This is no
 3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the user referenced external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
 
 **Web search budget:** You have a limited number of web searches per turn (typically 3-5). The discuss phase spans many turns (investigation, question rounds, focused research, requirements), so budget carefully:
-- Prefer `resolve_library` / `get_library_docs` over `web_search` for library documentation — they don't consume the web search budget.
+- Prefer `resolve_library` / `get_library_docs` over `search-the-web` for library documentation — they don't consume the web search budget.
 - Prefer `search_and_read` for one-shot topic research — it combines search + page fetch in a single call.
 - Target 2-3 web searches in the investigation pass. Save remaining budget for the focused research pass before roadmap creation.
 - Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on.
diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md
index 9428fa68a..b433638ac 100644
--- a/src/resources/extensions/gsd/prompts/execute-task.md
+++ b/src/resources/extensions/gsd/prompts/execute-task.md
@@ -68,11 +68,13 @@ Then:
 17. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things.
 18. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md`
 19. Write `{{taskSummaryPath}}`
-20. Call `gsd_complete_task` with milestone_id, slice_id, task_id, and a summary of what was accomplished. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, and renders PLAN.md automatically.
+20. Call `gsd_complete_task` with milestoneId, sliceId, taskId, and a summary of what was accomplished. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, and renders PLAN.md automatically.
 21. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message.
 
 All work stays in your working directory: `{{workingDirectory}}`.
 
+**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the task summary. If a decision genuinely requires human input, note it in the summary and proceed with the best available option.
+
 **You MUST call `gsd_complete_task` AND write `{{taskSummaryPath}}` before finishing.**
 
 When done, say: "Task {{taskId}} complete."
diff --git a/src/resources/extensions/gsd/prompts/forensics.md b/src/resources/extensions/gsd/prompts/forensics.md
index 32933af20..dda9d163c 100644
--- a/src/resources/extensions/gsd/prompts/forensics.md
+++ b/src/resources/extensions/gsd/prompts/forensics.md
@@ -102,6 +102,8 @@ A stale lock (PID is dead) means the previous auto-mode session crashed mid-unit
 
 A unit dispatched more than once (`type/id` appears multiple times) indicates a stuck loop — the unit completed but artifact verification failed.
 
+{{dedupSection}}
+
 ## Investigation Protocol
 
 1. **Start with the pre-parsed forensic report** above. The anomaly section contains automated findings — treat these as leads, not conclusions.
@@ -133,8 +135,6 @@ Explain your findings:
 - **Code snippet** — the problematic code and what it should do instead
 - **Recovery** — what the user can do right now to get unstuck
 
-{{dedupSection}}
-
 Then **offer GitHub issue creation**: "Would you like me to create a GitHub issue for this on gsd-build/gsd-2?"
 
 **CRITICAL: The `github_issues` tool ONLY targets the current user's repository — it has no `repo` parameter. You MUST use `gh issue create --repo gsd-build/gsd-2` via the `bash` tool to file on the correct repo. Do NOT use the `github_issues` tool for this.**
diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
index b8746d1d1..4abf22606 100644
--- a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
+++ b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
@@ -13,7 +13,7 @@ Discuss milestone {{milestoneId}} ("{{milestoneTitle}}"). Identify gray areas, a
 Do a lightweight targeted investigation so your questions are grounded in reality:
 - Scout the codebase (`rg`, `find`, or `scout`) to understand what already exists that this milestone touches or builds on
 - Check the roadmap context above (if present) to understand what surrounds this milestone
-- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `web_search` for library documentation
+- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `search-the-web` for library documentation
 - Identify the 3–5 biggest behavioural and architectural unknowns: things where the user's answer will materially change what gets built
 
 **Web search budget:** You have a limited number of web searches per turn (typically 3-5). Prefer `resolve_library` / `get_library_docs` for library documentation and `search_and_read` for one-shot topic research — they are more budget-efficient. Target 2-3 web searches in the investigation pass. Distribute remaining searches across subsequent question rounds rather than clustering them.
diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
index c6ab831ee..d27487336 100644
--- a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
+++ b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
@@ -13,7 +13,7 @@ Your goal is **not** to center the discussion on tech stack trivia, naming conve
 Do a lightweight targeted investigation so your questions are grounded in reality:
 - Scout the codebase (`rg`, `find`, or `scout` for broad unfamiliar areas) to understand what already exists that this slice touches or builds on
 - Check the roadmap context above to understand what surrounds this slice — what comes before, what depends on it
-- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `web_search` for library documentation
+- Use `resolve_library` / `get_library_docs` for unfamiliar libraries — prefer this over `search-the-web` for library documentation
 - Identify the 3–5 biggest behavioural unknowns: things where the user's answer will materially change what gets built
 
 **Web search budget:** You have a limited number of web searches per turn (typically 3-5). Prefer `resolve_library` / `get_library_docs` for library documentation and `search_and_read` for one-shot topic research — they are more budget-efficient. Target 2-3 web searches in the investigation pass. Distribute remaining searches across subsequent question rounds rather than clustering them.
diff --git a/src/resources/extensions/gsd/prompts/plan-slice.md b/src/resources/extensions/gsd/prompts/plan-slice.md
index 6b38c4667..69e103f72 100644
--- a/src/resources/extensions/gsd/prompts/plan-slice.md
+++ b/src/resources/extensions/gsd/prompts/plan-slice.md
@@ -82,6 +82,8 @@ Then:
 
 The slice directory and tasks/ subdirectory already exist. Do NOT mkdir. All work stays in your working directory: `{{workingDirectory}}`.
 
+**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the plan. If a decision genuinely requires human input, write a note in the relevant task's description and call `gsd_plan_slice` with what you have.
+
 **You MUST call `gsd_plan_slice` to persist the planning state before finishing.**
 
 When done, say: "Slice {{sliceId}} planned."
diff --git a/src/resources/extensions/gsd/prompts/rethink.md b/src/resources/extensions/gsd/prompts/rethink.md
index da2a91495..e1222b9d0 100644
--- a/src/resources/extensions/gsd/prompts/rethink.md
+++ b/src/resources/extensions/gsd/prompts/rethink.md
@@ -80,4 +80,4 @@ If a proposed order would violate constraints, explain the issue and suggest alt
 - Do NOT park completed milestones — it would corrupt dependency satisfaction
 - Park is preferred over discard when a milestone has any completed work
 - Always persist queue order changes to `.gsd/QUEUE-ORDER.json`
-- After changes, run `git add .gsd/ && git commit -m "docs(gsd): rethink milestone plan"` to persist (rethink runs interactively outside auto-mode, so no system auto-commit)
+- {{commitInstruction}}
diff --git a/src/resources/extensions/gsd/prompts/triage-captures.md b/src/resources/extensions/gsd/prompts/triage-captures.md
index 60dd5ca95..23545c265 100644
--- a/src/resources/extensions/gsd/prompts/triage-captures.md
+++ b/src/resources/extensions/gsd/prompts/triage-captures.md
@@ -54,6 +54,7 @@ For each capture, classify it as one of:
    - Add `**Resolution:** <brief description of what will happen>`
    - Add `**Rationale:** <why this classification>`
    - Add `**Resolved:** <current ISO timestamp>`
+   - Add `**Milestone:** <current milestone ID>` (e.g., `**Milestone:** M003`)
 
 4. **Summarize** what was triaged: how many captures, what classifications were assigned, and what actions are pending (e.g., "2 quick-tasks ready for execution, 1 deferred to S03").
 
diff --git a/src/resources/extensions/gsd/repo-identity.ts b/src/resources/extensions/gsd/repo-identity.ts
index 39204ab91..8de304f36 100644
--- a/src/resources/extensions/gsd/repo-identity.ts
+++ b/src/resources/extensions/gsd/repo-identity.ts
@@ -8,7 +8,7 @@
 
 import { createHash } from "node:crypto";
 import { execFileSync } from "node:child_process";
-import { existsSync, lstatSync, mkdirSync, readdirSync, readFileSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs";
+import { cpSync, existsSync, lstatSync, mkdirSync, readdirSync, readFileSync, realpathSync, renameSync, rmSync, symlinkSync, unlinkSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { basename, dirname, join, resolve } from "node:path";
 
@@ -276,9 +276,14 @@ export function validateProjectId(id: string): boolean {
  * If `GSD_PROJECT_ID` is set, returns it directly (validation is expected
  * to have already happened at startup via `validateProjectId`).
  *
- * Otherwise returns SHA-256 of `${remoteUrl}\n${resolvedRoot}`, truncated
- * to 12 hex chars. Deterministic: same repo always produces the same hash
- * regardless of which worktree the caller is inside.
+ * For repos with a remote URL, returns SHA-256 of the remote URL only —
+ * this makes the identity stable across directory moves/renames (#2750).
+ *
+ * For local-only repos (no remote), includes the git root in the hash.
+ * Local repos use a `.gsd-id` marker file for recovery after moves.
+ *
+ * Deterministic: same repo always produces the same hash regardless of
+ * which worktree the caller is inside.
  */
 export function repoIdentity(basePath: string): string {
   const projectId = process.env.GSD_PROJECT_ID;
@@ -286,8 +291,14 @@ export function repoIdentity(basePath: string): string {
     return projectId;
   }
   const remoteUrl = getRemoteUrl(basePath);
+  if (remoteUrl) {
+    // Remote URL alone uniquely identifies the repo — path is redundant.
+    // This makes moves transparent for repos with remotes (#2750).
+    return createHash("sha256").update(remoteUrl).digest("hex").slice(0, 12);
+  }
+  // Local-only repo: include git root since there's no remote to anchor identity.
   const root = resolveGitRoot(basePath);
-  const input = `${remoteUrl}\n${root}`;
+  const input = `\n${root}`;
   return createHash("sha256").update(input).digest("hex").slice(0, 12);
 }
 
@@ -351,21 +362,148 @@ export function cleanNumberedGsdVariants(projectPath: string): string[] {
   return removed;
 }
 
+// ─── .gsd-id Marker ─────────────────────────────────────────────────────────
+
+/**
+ * Write a `.gsd-id` marker file in the project root.
+ *
+ * This file records the identity hash used for the external state directory.
+ * For local-only repos (no remote), this marker survives directory moves and
+ * enables automatic recovery of orphaned state (#2750).
+ *
+ * The marker is gitignored by ensureGitignore(). Non-fatal: failure to write
+ * the marker must never block project setup.
+ */
+function writeGsdIdMarker(projectPath: string, identity: string): void {
+  try {
+    const markerPath = join(projectPath, ".gsd-id");
+    // Only write if content differs to avoid unnecessary disk writes.
+    if (existsSync(markerPath)) {
+      try {
+        if (readFileSync(markerPath, "utf-8").trim() === identity) return;
+      } catch { /* fall through and overwrite */ }
+    }
+    writeFileSync(markerPath, identity + "\n", "utf-8");
+  } catch {
+    // Non-fatal — marker write failure should not block project setup
+  }
+}
+
+/**
+ * Read the `.gsd-id` marker from the project root.
+ * Returns the identity hash, or null if the marker doesn't exist or is unreadable.
+ */
+function readGsdIdMarker(projectPath: string): string | null {
+  try {
+    const markerPath = join(projectPath, ".gsd-id");
+    if (!existsSync(markerPath)) return null;
+    const content = readFileSync(markerPath, "utf-8").trim();
+    return /^[a-zA-Z0-9_-]+$/.test(content) ? content : null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Check whether an external state directory has meaningful content.
+ * Returns true if the directory contains any files or subdirectories
+ * beyond just repo-meta.json.
+ */
+function hasProjectState(externalPath: string): boolean {
+  try {
+    if (!existsSync(externalPath)) return false;
+    const entries = readdirSync(externalPath);
+    return entries.some(e => e !== "repo-meta.json");
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Resolve the external state directory, with recovery for relocated projects.
+ *
+ * For local-only repos where the computed identity produces an empty state dir,
+ * checks the `.gsd-id` marker for the original identity hash and recovers
+ * the old state directory if it still exists and contains data (#2750).
+ *
+ * Returns the resolved external path (may differ from the computed identity).
+ */
+function resolveExternalPathWithRecovery(projectPath: string): string {
+  const computedPath = externalGsdRoot(projectPath);
+  const computedId = repoIdentity(projectPath);
+
+  // Check if computed path already has state — fast path, no recovery needed.
+  if (hasProjectState(computedPath)) {
+    return computedPath;
+  }
+
+  // Check for .gsd-id marker from a previous location.
+  const markerId = readGsdIdMarker(projectPath);
+  if (markerId && markerId !== computedId) {
+    // The marker points to a different identity — the repo was likely moved.
+    const base = process.env.GSD_STATE_DIR || gsdHome;
+    const markerPath = join(base, "projects", markerId);
+    if (hasProjectState(markerPath)) {
+      // Recover: use the old state directory and update the marker to the new identity.
+      // Move the state from the old hash dir to the new one so future lookups work
+      // without the marker.
+      try {
+        mkdirSync(computedPath, { recursive: true });
+        const entries = readdirSync(markerPath);
+        for (const entry of entries) {
+          try {
+            const src = join(markerPath, entry);
+            const dst = join(computedPath, entry);
+            // Use rename for same-filesystem (fast) or fall back to copy.
+            try {
+              renameSync(src, dst);
+            } catch {
+              cpSync(src, dst, { recursive: true, force: true });
+            }
+          } catch { /* continue with remaining entries */ }
+        }
+        // Clean up old directory after successful migration.
+        try { rmSync(markerPath, { recursive: true, force: true }); } catch { /* non-fatal */ }
+      } catch {
+        // If migration fails, just point at the old directory.
+        return markerPath;
+      }
+    }
+  }
+
+  return computedPath;
+}
+
 // ─── Symlink Management ─────────────────────────────────────────────────────
 
 /**
  * Ensure the `<project>/.gsd` symlink points to the external state directory.
  *
  * 1. Clean up any macOS numbered collision variants (`.gsd 2`, `.gsd 3`, etc.)
- * 2. mkdir -p the external dir
- * 3. If `<project>/.gsd` doesn't exist → create symlink
- * 4. If `<project>/.gsd` is already the correct symlink → no-op
- * 5. If `<project>/.gsd` is a real directory → return as-is (migration handles later)
+ * 2. Resolve external dir (with relocation recovery via `.gsd-id` marker)
+ * 3. mkdir -p the external dir
+ * 4. If `<project>/.gsd` doesn't exist → create symlink
+ * 5. If `<project>/.gsd` is already the correct symlink → no-op
+ * 6. If `<project>/.gsd` is a real directory → return as-is (migration handles later)
+ * 7. Write `.gsd-id` marker for future relocation recovery
  *
  * Returns the resolved external path.
  */
 export function ensureGsdSymlink(projectPath: string): string {
-  const externalPath = externalGsdRoot(projectPath);
+  const result = ensureGsdSymlinkCore(projectPath);
+
+  // Write .gsd-id marker so future relocations can recover this state (#2750).
+  // Only write for the project root (not subdirectories or worktrees that
+  // delegate to a parent .gsd).
+  if (!isInsideWorktree(projectPath)) {
+    writeGsdIdMarker(projectPath, repoIdentity(projectPath));
+  }
+
+  return result;
+}
+
+function ensureGsdSymlinkCore(projectPath: string): string {
+  const externalPath = resolveExternalPathWithRecovery(projectPath);
   const localGsd = join(projectPath, ".gsd");
   const inWorktree = isInsideWorktree(projectPath);
 
@@ -418,12 +556,28 @@ export function ensureGsdSymlink(projectPath: string): string {
 
   const replaceWithSymlink = (): string => {
     rmSync(localGsd, { recursive: true, force: true });
+    // Defensive: remove any residual entry (e.g. dangling symlink) before creating.
+    try { unlinkSync(localGsd); } catch { /* already gone */ }
     symlinkSync(externalPath, localGsd, "junction");
     return externalPath;
   };
 
+  // Check for dangling symlinks (e.g. after relocation recovery removed the old
+  // state dir). existsSync follows symlinks, so it returns false for dangling ones.
+  // lstatSync does NOT follow, so we can detect the dangling symlink and replace it.
   if (!existsSync(localGsd)) {
-    // Nothing exists yet — create symlink
+    try {
+      const stat = lstatSync(localGsd);
+      if (stat.isSymbolicLink()) {
+        // Dangling symlink — replace with correct one (#2750).
+        return replaceWithSymlink();
+      }
+    } catch {
+      // lstat also failed — nothing exists at this path
+    }
+    // Nothing exists yet — create symlink.
+    // Defensive: remove any residual entry to avoid EEXIST race (#2750).
+    try { unlinkSync(localGsd); } catch { /* nothing to remove */ }
     symlinkSync(externalPath, localGsd, "junction");
     return externalPath;
   }
@@ -442,6 +596,27 @@ export function ensureGsdSymlink(projectPath: string): string {
       if (inWorktree) {
         return replaceWithSymlink();
       }
+      // After identity hash change (e.g. upgrade from path-based to remote-only
+      // hash, or relocation recovery), migrate data from old target to new path
+      // and update the symlink (#2750).
+      if (!hasProjectState(externalPath) && hasProjectState(target)) {
+        try {
+          mkdirSync(externalPath, { recursive: true });
+          const oldEntries = readdirSync(target);
+          for (const entry of oldEntries) {
+            try {
+              const src = join(target, entry);
+              const dst = join(externalPath, entry);
+              try { renameSync(src, dst); } catch { cpSync(src, dst, { recursive: true, force: true }); }
+            } catch { /* continue */ }
+          }
+          try { rmSync(target, { recursive: true, force: true }); } catch { /* non-fatal */ }
+          return replaceWithSymlink();
+        } catch {
+          // Migration failed — preserve old symlink
+          return target;
+        }
+      }
       // Outside worktrees, preserve custom overrides or legacy symlinks.
       return target;
     }
diff --git a/src/resources/extensions/gsd/rethink.ts b/src/resources/extensions/gsd/rethink.ts
index a6f049b77..599cbc32c 100644
--- a/src/resources/extensions/gsd/rethink.ts
+++ b/src/resources/extensions/gsd/rethink.ts
@@ -19,6 +19,7 @@ import { isParked, getParkedReason } from "./milestone-actions.js";
 import { getMilestoneSlices, isDbAvailable } from "./gsd-db.js";
 import { buildExistingMilestonesContext } from "./guided-flow-queue.js";
 import { loadPrompt } from "./prompt-loader.js";
+import { isGsdGitignored } from "./gitignore.js";
 
 // ─── Entry Point ──────────────────────────────────────────────────────────────
 
@@ -53,9 +54,14 @@ export async function handleRethink(
   const rethinkData = buildRethinkData(basePath, milestoneIds, state, queueOrder);
   const existingMilestonesContext = await buildExistingMilestonesContext(basePath, milestoneIds, state);
 
+  const commitInstruction = isGsdGitignored(basePath)
+    ? "Do not commit planning artifacts — .gsd/ is gitignored in this project."
+    : 'After changes, run `git add .gsd/ && git commit -m "docs(gsd): rethink milestone plan"` to persist (rethink runs interactively outside auto-mode, so no system auto-commit)';
+
   const content = loadPrompt("rethink", {
     rethinkData,
     existingMilestonesContext,
+    commitInstruction,
   });
 
   pi.sendMessage(
diff --git a/src/resources/extensions/gsd/roadmap-slices.ts b/src/resources/extensions/gsd/roadmap-slices.ts
index 5031f004f..93fb05038 100644
--- a/src/resources/extensions/gsd/roadmap-slices.ts
+++ b/src/resources/extensions/gsd/roadmap-slices.ts
@@ -219,13 +219,14 @@ export function parseRoadmapSlices(content: string): RoadmapSliceEntry[] {
 function parseProseSliceHeaders(content: string): RoadmapSliceEntry[] {
   const slices: RoadmapSliceEntry[] = [];
   // Match H1-H4 headers containing S<digits> with optional "Slice" prefix, bold markers,
-  // and optional checkmark completion marker before the slice ID.
+  // numeric prefixes (e.g., "1.", "(1)"), bracketed IDs (e.g., "[S01]"),
+  // optional checkmark completion marker, and optional leading indentation.
   // Separator after the ID is flexible: colon, dash, em/en dash, dot, or just whitespace.
-  const headerPattern = /^#{1,4}\s+\*{0,2}(?:\u2713\s+)?(?:Slice\s+)?(S\d+)\*{0,2}[:\s.\u2014\u2013-]*\s*(.+)/gm;
+  const headerPattern = /^\s*#{1,4}\s+\*{0,2}(?:\u2713\s+)?(?:\d+[.)]\s+)?(?:\(\d+\)\s+)?(?:Slice\s+)?\[?(S\d+)\]?\*{0,2}[:\s.\u2014\u2013-]*\s*(.+)/gm;
   let match: RegExpExecArray | null;
 
   // Check for checkmark before the slice ID (e.g., "## checkmark S01: Title")
-  const prefixCheckPattern = /^#{1,4}\s+\*{0,2}\u2713\s+/;
+  const prefixCheckPattern = /^\s*#{1,4}\s+\*{0,2}\u2713\s+/;
 
   while ((match = headerPattern.exec(content)) !== null) {
     const id = match[1]!;
@@ -251,7 +252,7 @@ function parseProseSliceHeaders(content: string): RoadmapSliceEntry[] {
 
     // Try to extract depends from prose: "Depends on: S01" or "**Depends on:** S01, S02"
     const afterHeader = content.slice(match.index + match[0].length);
-    const nextHeader = afterHeader.search(/^#{1,4}\s/m);
+    const nextHeader = afterHeader.search(/^\s*#{1,4}\s/m);
     const section = nextHeader !== -1 ? afterHeader.slice(0, nextHeader) : afterHeader.slice(0, 500);
 
     const depsMatch = section.match(/\*{0,2}Depends\s+on:?\*{0,2}\s*(.+)/i);
diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts
index e6ff91895..628ea5907 100644
--- a/src/resources/extensions/gsd/state.ts
+++ b/src/resources/extensions/gsd/state.ts
@@ -36,22 +36,24 @@ import {
 
 import { findMilestoneIds } from './milestone-ids.js';
 import { loadQueueOrder, sortByQueueOrder } from './queue-order.js';
-import { isClosedStatus } from './status-guards.js';
 import { nativeBatchParseGsdFiles, type BatchParsedFile } from './native-parser-bridge.js';
 
 import { join, resolve } from 'path';
-import { existsSync, readdirSync } from 'node:fs';
+import { existsSync, readdirSync, readFileSync } from 'node:fs';
 import { debugCount, debugTime } from './debug-logger.js';
 import { extractVerdict } from './verdict-parser.js';
+import { logWarning, logError } from './workflow-logger.js';
 
 import {
   isDbAvailable,
   getAllMilestones,
+  getMilestone,
   getMilestoneSlices,
   getSliceTasks,
   getReplanHistory,
   getSlice,
   insertMilestone,
+  insertSlice,
   updateTaskStatus,
   getPendingSliceGateCount,
   type MilestoneRow,
@@ -64,8 +66,29 @@ import {
  * files like CONTEXT, CONTEXT-DRAFT, ROADMAP, or SUMMARY).  These appear when
  * a milestone is created but never initialised.  Treating them as active causes
  * auto-mode to stall or falsely declare completion.
+ *
+ * However, a milestone is NOT a ghost if:
+ * - It has a DB row with a meaningful status (queued, active, etc.) — the DB
+ *   knows about it even if content files haven't been created yet.
+ * - It has a worktree directory — a worktree proves the milestone was
+ *   legitimately created and is expected to be populated.
+ *
+ * Fixes #2921: queued milestones with worktrees were incorrectly classified
+ * as ghosts, causing auto-mode to skip them entirely.
  */
 export function isGhostMilestone(basePath: string, mid: string): boolean {
+  // If the milestone has a DB row, it's a known milestone — not a ghost.
+  if (isDbAvailable()) {
+    const dbRow = getMilestone(mid);
+    if (dbRow) return false;
+  }
+
+  // If a worktree exists for this milestone, it was legitimately created.
+  const root = gsdRoot(basePath);
+  const wtPath = join(root, 'worktrees', mid);
+  if (existsSync(wtPath)) return false;
+
+  // Fall back to content-file check: no substantive files means ghost.
   const context   = resolveMilestoneFile(basePath, mid, "CONTEXT");
   const draft     = resolveMilestoneFile(basePath, mid, "CONTEXT-DRAFT");
   const roadmap   = resolveMilestoneFile(basePath, mid, "ROADMAP");
@@ -209,10 +232,10 @@ export async function deriveState(basePath: string): Promise<GSDState> {
   if (isDbAvailable()) {
     let dbMilestones = getAllMilestones();
 
-    // Disk→DB reconciliation (#2631): when the milestones table is empty
-    // (e.g. failed initial migration per #2529), the reconciliation code
-    // inside deriveStateFromDb is unreachable. Populate from disk here so
-    // the DB path activates correctly.
+    // Disk→DB reconciliation when DB is empty but disk has milestones (#2631).
+    // deriveStateFromDb() does its own reconciliation, but deriveState() skips
+    // it entirely when the DB is empty. Sync here so the DB path is used when
+    // disk milestones exist but haven't been migrated yet.
     if (dbMilestones.length === 0) {
       const diskIds = findMilestoneIds(basePath);
       let synced = false;
@@ -231,7 +254,7 @@ export async function deriveState(basePath: string): Promise<GSDState> {
       stopDbTimer({ phase: result.phase, milestone: result.activeMilestone?.id });
       _telemetry.dbDeriveCount++;
     } else {
-      // DB open but empty hierarchy tables — pre-migration project, use filesystem
+      // DB open but no milestones on disk either — use filesystem path
       result = await _deriveStateImpl(basePath);
       _telemetry.markdownDeriveCount++;
     }
@@ -268,6 +291,13 @@ function extractContextTitle(content: string | null, fallback: string): string {
 
 // ─── DB-backed State Derivation ────────────────────────────────────────────
 
+/**
+ * Helper: check if a DB status counts as "done" (handles K002 ambiguity).
+ */
+function isStatusDone(status: string): boolean {
+  return status === 'complete' || status === 'done';
+}
+
 /**
  * Derive GSD state from the milestones/slices/tasks DB tables.
  * Flag files (PARKED, VALIDATION, CONTINUE, REPLAN, REPLAN-TRIGGER, CONTEXT-DRAFT)
@@ -298,6 +328,36 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
   }
   if (synced) allMilestones = getAllMilestones();
 
+  // Disk→DB slice reconciliation (#2533): slices defined in ROADMAP.md but
+  // missing from the DB cause permanent "No slice eligible" blocks because
+  // the dependency resolver only sees DB rows. Parse each milestone's roadmap
+  // and insert any missing slices, checking SUMMARY files to set correct status.
+  // insertSlice uses INSERT OR IGNORE, so existing rows are never overwritten.
+  for (const mid of diskIds) {
+    if (isGhostMilestone(basePath, mid)) continue;
+    const roadmapPath = resolveMilestoneFile(basePath, mid, "ROADMAP");
+    if (!roadmapPath) continue;
+
+    const dbSlices = getMilestoneSlices(mid);
+    const dbSliceIds = new Set(dbSlices.map(s => s.id));
+
+    let roadmapContent: string;
+    try { roadmapContent = readFileSync(roadmapPath, "utf-8"); }
+    catch { continue; }
+
+    const parsed = parseRoadmap(roadmapContent);
+    for (const s of parsed.slices) {
+      if (dbSliceIds.has(s.id)) continue;
+      const summaryPath = resolveSliceFile(basePath, mid, s.id, "SUMMARY");
+      const sliceStatus = (s.done || summaryPath) ? "complete" : "pending";
+      insertSlice({
+        id: s.id, milestoneId: mid, title: s.title,
+        status: sliceStatus, risk: s.risk,
+        depends: s.depends, demo: s.demo,
+      });
+    }
+  }
+
   // Reconcile: discover milestones that exist on disk but are missing from
   // the DB. This happens when milestones were created before the DB migration
   // or were manually added to the filesystem. Without this, disk-only
@@ -357,7 +417,7 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
       continue;
     }
 
-    if (isClosedStatus(m.status)) {
+    if (isStatusDone(m.status)) {
       completeMilestoneIds.add(m.id);
       continue;
     }
@@ -371,7 +431,7 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
 
     // Check roadmap: all slices done means milestone is complete
     const slices = getMilestoneSlices(m.id);
-    if (slices.length > 0 && slices.every(s => isClosedStatus(s.status))) {
+    if (slices.length > 0 && slices.every(s => isStatusDone(s.status))) {
       // All slices done but no summary — still counts as complete for dep resolution
       // if a summary file exists
       // Note: without summary file, the milestone is in validating/completing state, not complete
@@ -393,7 +453,7 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
 
     // Ghost milestone check: no slices in DB AND no substantive files on disk
     const slices = getMilestoneSlices(m.id);
-    if (slices.length === 0 && !isClosedStatus(m.status)) {
+    if (slices.length === 0 && !isStatusDone(m.status)) {
       // Check disk for ghost detection
       if (isGhostMilestone(basePath, m.id)) continue;
     }
@@ -416,7 +476,7 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
     }
 
     // Not complete — determine if it should be active
-    const allSlicesDone = slices.length > 0 && slices.every(s => isClosedStatus(s.status));
+    const allSlicesDone = slices.length > 0 && slices.every(s => isStatusDone(s.status));
 
     // Get title — prefer DB, fall back to context file extraction
     let title = stripMilestonePrefix(m.title) || m.id;
@@ -526,7 +586,8 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
       ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.`
       : 'All milestones complete.';
     return {
-      activeMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null,
+      activeMilestone: null,
+      lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null,
       activeSlice: null, activeTask: null,
       phase: 'complete',
       recentDecisions: [], blockers: [],
@@ -568,10 +629,7 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
   }
 
   // ── All slices done → validating/completing ─────────────────────────
-  // Guard: [].every() === true (vacuous truth). Without the length check,
-  // an empty slice array causes a premature phase transition to
-  // validating-milestone. See: https://github.com/gsd-build/gsd-2/issues/2667
-  const allSlicesDone = activeMilestoneSlices.length > 0 && activeMilestoneSlices.every(s => isClosedStatus(s.status));
+  const allSlicesDone = activeMilestoneSlices.every(s => isStatusDone(s.status));
   if (allSlicesDone) {
     const validationFile = resolveMilestoneFile(basePath, activeMilestone.id, "VALIDATION");
     const validationContent = validationFile ? await loadFile(validationFile) : null;
@@ -604,19 +662,19 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
 
   // ── Find active slice (first incomplete with deps satisfied) ─────────
   const sliceProgress = {
-    done: activeMilestoneSlices.filter(s => isClosedStatus(s.status)).length,
+    done: activeMilestoneSlices.filter(s => isStatusDone(s.status)).length,
     total: activeMilestoneSlices.length,
   };
 
   const doneSliceIds = new Set(
-    activeMilestoneSlices.filter(s => isClosedStatus(s.status)).map(s => s.id)
+    activeMilestoneSlices.filter(s => isStatusDone(s.status)).map(s => s.id)
   );
 
   let activeSlice: ActiveRef | null = null;
   let activeSliceRow: SliceRow | null = null;
 
   for (const s of activeMilestoneSlices) {
-    if (isClosedStatus(s.status)) continue;
+    if (isStatusDone(s.status)) continue;
     if (s.depends.every(dep => doneSliceIds.has(dep))) {
       activeSlice = { id: s.id, title: s.title };
       activeSliceRow = s;
@@ -659,20 +717,16 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
   // causing the dispatcher to re-dispatch the same completed task forever.
   let reconciled = false;
   for (const t of tasks) {
-    if (isClosedStatus(t.status)) continue;
+    if (isStatusDone(t.status)) continue;
     const summaryPath = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, t.id, "SUMMARY");
     if (summaryPath && existsSync(summaryPath)) {
       try {
         updateTaskStatus(activeMilestone.id, activeSlice.id, t.id, "complete");
-        process.stderr.write(
-          `gsd-reconcile: task ${activeMilestone.id}/${activeSlice.id}/${t.id} had SUMMARY on disk but DB status was "${t.status}" — updated to "complete" (#2514)\n`,
-        );
+        logWarning("reconcile", `task ${activeMilestone.id}/${activeSlice.id}/${t.id} status reconciled from "${t.status}" to "complete" (#2514)`, { mid: activeMilestone.id, sid: activeSlice.id, tid: t.id });
         reconciled = true;
       } catch (e) {
         // DB write failed — continue with stale status rather than crash
-        process.stderr.write(
-          `gsd-reconcile: failed to update task ${t.id}: ${(e as Error).message}\n`,
-        );
+        logError("reconcile", `failed to update task ${t.id}`, { tid: t.id, error: (e as Error).message });
       }
     }
   }
@@ -682,11 +736,11 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
   }
 
   const taskProgress = {
-    done: tasks.filter(t => isClosedStatus(t.status)).length,
+    done: tasks.filter(t => isStatusDone(t.status)).length,
     total: tasks.length,
   };
 
-  const activeTaskRow = tasks.find(t => !isClosedStatus(t.status));
+  const activeTaskRow = tasks.find(t => !isStatusDone(t.status));
 
   if (!activeTaskRow && tasks.length > 0) {
     // All tasks done but slice not marked complete → summarizing
@@ -747,7 +801,7 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
   }
 
   // ── Blocker detection: check completed tasks for blocker_discovered ──
-  const completedTasks = tasks.filter(t => isClosedStatus(t.status));
+  const completedTasks = tasks.filter(t => isStatusDone(t.status));
   let blockerTaskId: string | null = null;
   for (const ct of completedTasks) {
     if (ct.blocker_discovered) {
@@ -1327,9 +1381,7 @@ export async function _deriveStateImpl(basePath: string): Promise<GSDState> {
     const summaryPath = resolveTaskFile(basePath, activeMilestone.id, activeSlice.id, t.id, "SUMMARY");
     if (summaryPath && existsSync(summaryPath)) {
       t.done = true;
-      process.stderr.write(
-        `gsd-reconcile: task ${activeMilestone.id}/${activeSlice.id}/${t.id} has SUMMARY on disk but plan shows incomplete — marking done (#2514)\n`,
-      );
+      logWarning("reconcile", `task ${activeMilestone.id}/${activeSlice.id}/${t.id} reconciled via SUMMARY on disk (#2514)`, { mid: activeMilestone.id, sid: activeSlice.id, tid: t.id });
     }
   }
 
diff --git a/src/resources/extensions/gsd/tests/auto-loop.test.ts b/src/resources/extensions/gsd/tests/auto-loop.test.ts
index c472780cc..3a548f326 100644
--- a/src/resources/extensions/gsd/tests/auto-loop.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-loop.test.ts
@@ -317,6 +317,35 @@ test("auto/resolve.ts one-shot pattern: _currentResolve is nulled before calling
   );
 });
 
+test("auto/phases.ts: selectAndApplyModel called exactly once and before updateProgressWidget (#2907)", () => {
+  const src = readFileSync(
+    resolve(import.meta.dirname, "..", "auto", "phases.ts"),
+    "utf-8",
+  );
+  // Extract the runUnitPhase function body
+  const fnStart = src.indexOf("export async function runUnitPhase");
+  assert.ok(fnStart > 0, "runUnitPhase should exist in phases.ts");
+  const fnBody = src.slice(fnStart, fnStart + 8000);
+
+  // selectAndApplyModel must appear exactly once
+  const allOccurrences = [...fnBody.matchAll(/selectAndApplyModel\(/g)];
+  assert.equal(
+    allOccurrences.length,
+    1,
+    `selectAndApplyModel should be called exactly once in runUnitPhase, found ${allOccurrences.length} calls`,
+  );
+
+  // selectAndApplyModel must appear BEFORE updateProgressWidget
+  const modelIdx = fnBody.indexOf("selectAndApplyModel(");
+  const widgetIdx = fnBody.indexOf("updateProgressWidget(");
+  assert.ok(modelIdx > 0, "selectAndApplyModel should exist in runUnitPhase");
+  assert.ok(widgetIdx > 0, "updateProgressWidget should exist in runUnitPhase");
+  assert.ok(
+    modelIdx < widgetIdx,
+    "selectAndApplyModel must be called BEFORE updateProgressWidget (#2899/#2907)",
+  );
+});
+
 // ─── autoLoop tests (T02) ─────────────────────────────────────────────────
 
 /**
diff --git a/src/resources/extensions/gsd/tests/auto-mode-interactive-guard.test.ts b/src/resources/extensions/gsd/tests/auto-mode-interactive-guard.test.ts
new file mode 100644
index 000000000..ee830e081
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/auto-mode-interactive-guard.test.ts
@@ -0,0 +1,71 @@
+/**
+ * Test: auto-mode prompts must prohibit ask_user_questions / secure_env_collect
+ *
+ * Bug #2936: When the LLM calls ask_user_questions during auto-mode units
+ * (plan-slice, execute-task, complete-slice), the interactive tool queues a
+ * user response which causes the subsequent gsd_plan_slice / gsd_complete_task
+ * call to fail with "Skipped due to queued user message." The canonical GSD
+ * tool call is never recorded, verifyExpectedArtifact finds no artifact, and
+ * the dispatch loop re-dispatches the same unit 2-4x.
+ *
+ * Fix: Each auto-mode prompt must contain an "Autonomous execution" guard
+ * that explicitly prohibits ask_user_questions and secure_env_collect.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const promptsDir = join(__dirname, "..", "prompts");
+
+function loadPromptRaw(name: string): string {
+  return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
+}
+
+const AUTO_MODE_PROMPTS = ["plan-slice", "execute-task", "complete-slice"];
+
+for (const promptName of AUTO_MODE_PROMPTS) {
+  test(`${promptName} prompt prohibits ask_user_questions in auto-mode`, () => {
+    const content = loadPromptRaw(promptName);
+
+    assert.ok(
+      content.includes("ask_user_questions"),
+      `${promptName}.md must mention ask_user_questions (to prohibit it)`,
+    );
+
+    assert.ok(
+      content.includes("secure_env_collect"),
+      `${promptName}.md must mention secure_env_collect (to prohibit it)`,
+    );
+
+    // The guard must clearly state this is autonomous / auto-mode
+    assert.ok(
+      content.toLowerCase().includes("auto-mode") || content.toLowerCase().includes("autonomous"),
+      `${promptName}.md must reference auto-mode or autonomous execution`,
+    );
+
+    // The guard must indicate no human is available
+    assert.ok(
+      content.includes("no human") || content.includes("no user"),
+      `${promptName}.md must state that no human/user is available to answer`,
+    );
+  });
+}
+
+test("auto-mode prompts contain autonomous guard before final tool call reminder", () => {
+  for (const promptName of AUTO_MODE_PROMPTS) {
+    const content = loadPromptRaw(promptName);
+
+    // The guard should appear before the final "MUST call" line
+    const guardIndex = content.indexOf("ask_user_questions");
+    const mustCallIndex = content.lastIndexOf("MUST call");
+
+    assert.ok(
+      guardIndex !== -1 && mustCallIndex !== -1 && guardIndex < mustCallIndex,
+      `${promptName}.md: autonomous guard (ask_user_questions prohibition) must appear before the final MUST call reminder`,
+    );
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/auto-model-selection.test.ts b/src/resources/extensions/gsd/tests/auto-model-selection.test.ts
index 2bc41fa9e..4ea3245a3 100644
--- a/src/resources/extensions/gsd/tests/auto-model-selection.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-model-selection.test.ts
@@ -4,7 +4,7 @@ import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
 
-import { resolvePreferredModelConfig } from "../auto-model-selection.js";
+import { resolvePreferredModelConfig, resolveModelId } from "../auto-model-selection.js";
 
 function makeTempDir(prefix: string): string {
   return mkdtempSync(join(tmpdir(), prefix));
@@ -137,3 +137,73 @@ test("resolvePreferredModelConfig keeps explicit phase models as the ceiling", (
     rmSync(tempGsdHome, { recursive: true, force: true });
   }
 });
+
+// ─── resolveModelId tests ─────────────────────────────────────────────────
+
+test("resolveModelId: bare ID resolves to anthropic over claude-code when session is claude-code (#2905)", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+  ];
+
+  // Bug: when currentProvider is "claude-code", bare ID "claude-sonnet-4-6"
+  // resolves to claude-code/claude-sonnet-4-6 instead of anthropic/claude-sonnet-4-6
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "claude-code");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "anthropic", "bare ID must resolve to anthropic, not claude-code");
+});
+
+test("resolveModelId: bare ID still prefers current provider when it is a first-class API provider", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+    { id: "claude-sonnet-4-6", provider: "bedrock" },
+  ];
+
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "bedrock");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "bedrock", "bare ID should prefer current provider when it is a real API provider");
+});
+
+test("resolveModelId: explicit provider/model format still resolves to claude-code when specified", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+  ];
+
+  const result = resolveModelId("claude-code/claude-sonnet-4-6", availableModels, "anthropic");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "claude-code", "explicit provider prefix must be respected");
+});
+
+test("resolveModelId: bare ID with only one provider works normally", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+  ];
+
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "anthropic");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "anthropic");
+});
+
+test("resolveModelId: bare ID with claude-code as only provider still resolves", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+  ];
+
+  // If claude-code is the ONLY provider for this model, it should still resolve
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "claude-code");
+  assert.ok(result, "should resolve even when only available via claude-code");
+  assert.equal(result.provider, "claude-code");
+});
+
+test("resolveModelId: anthropic wins over claude-code regardless of list order", () => {
+  const availableModels = [
+    { id: "claude-sonnet-4-6", provider: "claude-code" },
+    { id: "claude-sonnet-4-6", provider: "anthropic" },
+  ];
+
+  // Even when claude-code appears first in the list, anthropic should win
+  const result = resolveModelId("claude-sonnet-4-6", availableModels, "claude-code");
+  assert.ok(result, "should resolve a model");
+  assert.equal(result.provider, "anthropic", "anthropic must win over claude-code regardless of list order");
+});
diff --git a/src/resources/extensions/gsd/tests/captures.test.ts b/src/resources/extensions/gsd/tests/captures.test.ts
index 2e6618604..e8497e6fc 100644
--- a/src/resources/extensions/gsd/tests/captures.test.ts
+++ b/src/resources/extensions/gsd/tests/captures.test.ts
@@ -19,8 +19,11 @@ import {
   appendCapture,
   loadAllCaptures,
   loadPendingCaptures,
+  loadActionableCaptures,
   hasPendingCaptures,
   markCaptureResolved,
+  markCaptureExecuted,
+  stampCaptureMilestone,
   resolveCapturesPath,
   parseTriageOutput,
 } from "../captures.ts";
@@ -419,3 +422,103 @@ test("triage: parseTriageOutput preserves affectedFiles and targetSlice", () =>
   assert.strictEqual(results[1].targetSlice, "S04");
   assert.strictEqual(results[1].affectedFiles, undefined);
 });
+
+// ─── Stale Quick-Task Captures (#2872) ────────────────────────────────────────
+
+test("captures: markCaptureResolved stores milestone ID when provided", (t) => {
+  const tmp = makeTempDir("cap-milestone");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "fix dialog width");
+  markCaptureResolved(tmp, id, "quick-task", "widen the dialog", "small fix", "M003");
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all.length, 1);
+  assert.strictEqual(all[0].resolvedInMilestone, "M003", "should store milestone ID");
+});
+
+test("captures: loadActionableCaptures excludes captures resolved in prior milestones", (t) => {
+  const tmp = makeTempDir("cap-stale-filter");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  // Capture resolved in M003 (prior milestone)
+  const id1 = appendCapture(tmp, "dialog too narrow");
+  markCaptureResolved(tmp, id1, "quick-task", "widen it", "small fix", "M003");
+
+  // Capture resolved in M004 (current milestone)
+  const id2 = appendCapture(tmp, "button misaligned");
+  markCaptureResolved(tmp, id2, "quick-task", "fix alignment", "css fix", "M004");
+
+  // Capture resolved without milestone context (legacy)
+  const id3 = appendCapture(tmp, "typo in label");
+  markCaptureResolved(tmp, id3, "quick-task", "fix typo", "trivial");
+
+  // When loading for M004, only M004 and no-milestone captures should be returned
+  const actionable = loadActionableCaptures(tmp, "M004");
+  const ids = actionable.map(c => c.id);
+
+  assert.ok(!ids.includes(id1), "should exclude capture resolved in M003");
+  assert.ok(ids.includes(id2), "should include capture resolved in M004");
+  assert.ok(ids.includes(id3), "should include capture with no milestone (legacy)");
+});
+
+test("captures: loadActionableCaptures without milestone returns all actionable", (t) => {
+  const tmp = makeTempDir("cap-no-milestone-filter");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id1 = appendCapture(tmp, "issue one");
+  markCaptureResolved(tmp, id1, "quick-task", "fix it", "small", "M003");
+
+  const id2 = appendCapture(tmp, "issue two");
+  markCaptureResolved(tmp, id2, "inject", "inject it", "needed", "M004");
+
+  // Without milestone filter, all actionable captures are returned (backward compat)
+  const actionable = loadActionableCaptures(tmp);
+  assert.strictEqual(actionable.length, 2, "should return all actionable without filter");
+});
+
+test("captures: loadActionableCaptures excludes already-executed captures", (t) => {
+  const tmp = makeTempDir("cap-executed-filter");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id1 = appendCapture(tmp, "already done");
+  markCaptureResolved(tmp, id1, "quick-task", "fix it", "small", "M004");
+  markCaptureExecuted(tmp, id1);
+
+  const id2 = appendCapture(tmp, "still pending");
+  markCaptureResolved(tmp, id2, "quick-task", "fix it too", "small", "M004");
+
+  const actionable = loadActionableCaptures(tmp, "M004");
+  assert.strictEqual(actionable.length, 1, "should exclude executed capture");
+  assert.strictEqual(actionable[0].id, id2);
+});
+
+test("captures: stampCaptureMilestone adds milestone to capture missing it", (t) => {
+  const tmp = makeTempDir("cap-stamp-milestone");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "fix alignment");
+  markCaptureResolved(tmp, id, "quick-task", "fix it", "small");
+
+  // Before stamping, no milestone
+  let all = loadAllCaptures(tmp);
+  assert.strictEqual(all[0].resolvedInMilestone, undefined, "should have no milestone initially");
+
+  stampCaptureMilestone(tmp, id, "M004");
+
+  all = loadAllCaptures(tmp);
+  assert.strictEqual(all[0].resolvedInMilestone, "M004", "should have milestone after stamping");
+});
+
+test("captures: stampCaptureMilestone is no-op if milestone already present", (t) => {
+  const tmp = makeTempDir("cap-stamp-noop");
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+
+  const id = appendCapture(tmp, "fix alignment");
+  markCaptureResolved(tmp, id, "quick-task", "fix it", "small", "M003");
+
+  stampCaptureMilestone(tmp, id, "M004");
+
+  const all = loadAllCaptures(tmp);
+  assert.strictEqual(all[0].resolvedInMilestone, "M003", "should keep original milestone");
+});
diff --git a/src/resources/extensions/gsd/tests/cli-provider-rate-limit.test.ts b/src/resources/extensions/gsd/tests/cli-provider-rate-limit.test.ts
new file mode 100644
index 000000000..cd79cf9a2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/cli-provider-rate-limit.test.ts
@@ -0,0 +1,47 @@
+/**
+ * cli-provider-rate-limit.test.ts — Verify rate-limit backoff capping
+ * for CLI-style providers (openai-codex, google-gemini-cli). (#2922)
+ *
+ * These providers use per-user quotas with shorter windows, so the
+ * default 60s backoff should be capped at 30s to avoid leaving users
+ * stuck in an apparent permanent "rate limit" state.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const RECOVERY_PATH = join(__dirname, "..", "bootstrap", "agent-end-recovery.ts");
+
+function getRecoverySource(): string {
+  return readFileSync(RECOVERY_PATH, "utf-8");
+}
+
+test("agent-end-recovery references openai-codex for rate-limit handling (#2922)", () => {
+  const src = getRecoverySource();
+  assert.ok(
+    src.includes("openai-codex"),
+    'agent-end-recovery.ts must reference "openai-codex" for CLI provider rate-limit handling (#2922)',
+  );
+});
+
+test("agent-end-recovery references google-gemini-cli for rate-limit handling (#2922)", () => {
+  const src = getRecoverySource();
+  assert.ok(
+    src.includes("google-gemini-cli"),
+    'agent-end-recovery.ts must reference "google-gemini-cli" for CLI provider rate-limit handling (#2922)',
+  );
+});
+
+test("agent-end-recovery caps rate-limit backoff for CLI providers (#2922)", () => {
+  const src = getRecoverySource();
+  // Must have a Math.min capping pattern for CLI provider rate-limit backoff
+  const cappingRe = /Math\.min\s*\(/;
+  assert.ok(
+    cappingRe.test(src),
+    'agent-end-recovery.ts must cap rate-limit backoff with Math.min for CLI providers (#2922)',
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/codebase-generator.test.ts b/src/resources/extensions/gsd/tests/codebase-generator.test.ts
new file mode 100644
index 000000000..c698fc65f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/codebase-generator.test.ts
@@ -0,0 +1,488 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, writeFileSync, readFileSync, existsSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+import { execSync } from "node:child_process";
+
+import {
+  parseCodebaseMap,
+  generateCodebaseMap,
+  updateCodebaseMap,
+  writeCodebaseMap,
+  readCodebaseMap,
+  getCodebaseMapStats,
+} from "../codebase-generator.ts";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────
+
+function makeTmpRepo(): string {
+  const base = join(tmpdir(), `gsd-codebase-test-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd"), { recursive: true });
+  execSync("git init", { cwd: base, stdio: "ignore" });
+  return base;
+}
+
+function addFile(base: string, path: string, content = ""): void {
+  const fullPath = join(base, path);
+  mkdirSync(join(fullPath, ".."), { recursive: true });
+  writeFileSync(fullPath, content || `// ${path}\n`, "utf-8");
+  execSync(`git add "${path}"`, { cwd: base, stdio: "ignore" });
+}
+
+function cleanup(base: string): void {
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* */ }
+}
+
+// ─── parseCodebaseMap ────────────────────────────────────────────────────
+
+test("parseCodebaseMap: parses file with description", () => {
+  const content = `# Codebase Map
+
+### src/
+- \`main.ts\` — Application entry point
+- \`utils.ts\` — Shared utilities
+`;
+
+  const map = parseCodebaseMap(content);
+  assert.equal(map.size, 2);
+  assert.equal(map.get("main.ts"), "Application entry point");
+  assert.equal(map.get("utils.ts"), "Shared utilities");
+});
+
+test("parseCodebaseMap: parses file without description", () => {
+  const content = `- \`config.ts\`\n- \`index.ts\` — Entry\n`;
+  const map = parseCodebaseMap(content);
+  assert.equal(map.size, 2);
+  assert.equal(map.get("config.ts"), "");
+  assert.equal(map.get("index.ts"), "Entry");
+});
+
+test("parseCodebaseMap: empty content returns empty map", () => {
+  const map = parseCodebaseMap("");
+  assert.equal(map.size, 0);
+});
+
+test("parseCodebaseMap: ignores non-matching lines", () => {
+  const content = `# Codebase Map\n\nGenerated: 2026-03-23\n\n### src/\n- \`file.ts\` — desc\n`;
+  const map = parseCodebaseMap(content);
+  assert.equal(map.size, 1);
+});
+
+test("parseCodebaseMap: recovers descriptions from collapsed-description comments", () => {
+  const content = `# Codebase Map
+
+### src/components/
+- *(25 files: 25 .ts)*
+<!-- gsd:collapsed-descriptions
+- \`src/components/Foo.ts\` — The Foo component
+- \`src/components/Bar.ts\` — The Bar component
+-->
+`;
+  const map = parseCodebaseMap(content);
+  assert.equal(map.get("src/components/Foo.ts"), "The Foo component");
+  assert.equal(map.get("src/components/Bar.ts"), "The Bar component");
+  // The collapsed summary line itself should not be parsed as a file
+  assert.ok(!map.has("*(25 files: 25 .ts)*"));
+});
+
+test("parseCodebaseMap: handles corrupted/malformed input gracefully", () => {
+  const content = [
+    "- `unclosed backtick",
+    "- `` — empty filename",
+    "- `valid.ts` — ok",
+    "random garbage line",
+    "- `a.ts` — desc with other text",
+  ].join("\n");
+  const map = parseCodebaseMap(content);
+  assert.ok(map.has("valid.ts"));
+  assert.ok(map.has("a.ts"));
+  // Malformed lines should be silently skipped
+  assert.equal(map.size, 2);
+});
+
+// ─── generateCodebaseMap ─────────────────────────────────────────────────
+
+test("generateCodebaseMap: generates from git ls-files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "src/utils.ts");
+    addFile(base, "README.md");
+
+    const result = generateCodebaseMap(base);
+    assert.ok(result.content.includes("# Codebase Map"));
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(result.content.includes("`src/utils.ts`"));
+    assert.ok(result.content.includes("README.md"));
+    assert.equal(result.fileCount, 3);
+    assert.equal(result.truncated, false);
+    assert.equal(result.files.length, 3);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: excludes .gsd/ files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, ".gsd/PROJECT.md");
+
+    const result = generateCodebaseMap(base);
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(!result.content.includes("PROJECT.md"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: excludes binary and lock files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "package-lock.json"); // .json not excluded
+    addFile(base, "yarn.lock");         // .lock excluded
+    addFile(base, "assets/logo.png");   // .png excluded
+
+    const result = generateCodebaseMap(base);
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(result.content.includes("package-lock.json"));
+    assert.ok(!result.content.includes("yarn.lock"));
+    assert.ok(!result.content.includes("logo.png"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: respects custom excludePatterns", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "docs/guide.md");
+    addFile(base, "docs/api.md");
+
+    const result = generateCodebaseMap(base, { excludePatterns: ["docs/"] });
+    assert.ok(result.content.includes("`src/main.ts`"));
+    assert.ok(!result.content.includes("guide.md"));
+    assert.ok(!result.content.includes("api.md"));
+    assert.equal(result.fileCount, 1);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: preserves existing descriptions", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "src/utils.ts");
+
+    const descriptions = new Map<string, string>();
+    descriptions.set("src/main.ts", "App entry point");
+
+    const result = generateCodebaseMap(base, undefined, descriptions);
+    assert.ok(result.content.includes("`src/main.ts` — App entry point"));
+    assert.ok(result.content.includes("`src/utils.ts`"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: collapses large directories", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 25; i++) {
+      addFile(base, `src/components/comp${String(i).padStart(2, "0")}.ts`);
+    }
+
+    const result = generateCodebaseMap(base);
+    // Collapsed summary should appear
+    assert.ok(result.content.includes("*(25 files: 25 .ts)*"));
+    // Individual file entries should NOT appear in main body
+    assert.ok(!result.content.includes("`src/components/comp00.ts`\n"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: respects custom collapseThreshold", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 5; i++) addFile(base, `src/comp${i}.ts`);
+
+    // Low threshold: 5 files should collapse
+    const collapsed = generateCodebaseMap(base, { collapseThreshold: 3 });
+    assert.ok(collapsed.content.includes("5 files"));
+
+    // High threshold: 5 files should expand
+    const expanded = generateCodebaseMap(base, { collapseThreshold: 10 });
+    assert.ok(expanded.content.includes("`src/comp0.ts`"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: truncated=false when file count is below maxFiles", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 4; i++) addFile(base, `file${i}.ts`);
+    const result = generateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.fileCount, 4);
+    assert.equal(result.truncated, false);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: truncated=false when file count equals maxFiles exactly", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 5; i++) addFile(base, `file${i}.ts`);
+    const result = generateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.fileCount, 5);
+    assert.equal(result.truncated, false); // exactly at limit — nothing was truncated
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: truncated=true when file count exceeds maxFiles", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 10; i++) addFile(base, `file${i}.ts`);
+    const result = generateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.fileCount, 5);
+    assert.equal(result.truncated, true);
+    assert.ok(result.content.includes("Truncated"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: returns empty map for non-git directory", () => {
+  const base = join(tmpdir(), `gsd-codebase-test-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd"), { recursive: true });
+  // No git init
+  try {
+    const result = generateCodebaseMap(base);
+    assert.equal(result.fileCount, 0);
+    assert.equal(result.truncated, false);
+    assert.ok(result.content.includes("# Codebase Map"));
+    assert.equal(result.files.length, 0);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: handles empty repository (no committed files)", () => {
+  const base = makeTmpRepo();
+  try {
+    const result = generateCodebaseMap(base);
+    assert.equal(result.fileCount, 0);
+    assert.equal(result.truncated, false);
+    assert.ok(result.content.includes("Files: 0"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("generateCodebaseMap: collapsed directories preserve descriptions in hidden comment", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 25; i++) {
+      addFile(base, `src/components/comp${String(i).padStart(2, "0")}.ts`);
+    }
+
+    // Generate with a description for one file in the collapsed dir
+    const descriptions = new Map([["src/components/comp00.ts", "The first component"]]);
+    const result = generateCodebaseMap(base, undefined, descriptions);
+
+    // The description should be in the hidden comment block
+    assert.ok(result.content.includes("<!-- gsd:collapsed-descriptions"));
+    assert.ok(result.content.includes("`src/components/comp00.ts` — The first component"));
+
+    // Re-parsing should recover the description
+    const recovered = parseCodebaseMap(result.content);
+    assert.equal(recovered.get("src/components/comp00.ts"), "The first component");
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── updateCodebaseMap ───────────────────────────────────────────────────
+
+test("updateCodebaseMap: preserves descriptions on update", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/main.ts");
+    addFile(base, "src/utils.ts");
+
+    const initial = generateCodebaseMap(base, undefined, new Map([["src/main.ts", "Entry point"]]));
+    writeCodebaseMap(base, initial.content);
+
+    addFile(base, "src/new.ts");
+
+    const result = updateCodebaseMap(base);
+    assert.ok(result.content.includes("`src/main.ts` — Entry point"));
+    assert.equal(result.added, 1);
+    assert.equal(result.fileCount, 3);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("updateCodebaseMap: tracks removed files", () => {
+  const base = makeTmpRepo();
+  try {
+    addFile(base, "src/keep.ts");
+    addFile(base, "src/remove.ts");
+    // Commit so git rm can operate
+    execSync("git -c user.email=t@t.com -c user.name=T commit -m init", { cwd: base, stdio: "ignore" });
+
+    const initial = generateCodebaseMap(base);
+    writeCodebaseMap(base, initial.content);
+
+    execSync("git rm src/remove.ts", { cwd: base, stdio: "ignore" });
+
+    const result = updateCodebaseMap(base);
+    assert.equal(result.removed, 1);
+    assert.equal(result.unchanged, 1);
+    assert.equal(result.fileCount, 1);
+    assert.ok(!result.content.includes("remove.ts"));
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("updateCodebaseMap: propagates truncated flag", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 10; i++) addFile(base, `file${i}.ts`);
+
+    const initial = generateCodebaseMap(base, { maxFiles: 5 });
+    writeCodebaseMap(base, initial.content);
+
+    const result = updateCodebaseMap(base, { maxFiles: 5 });
+    assert.equal(result.truncated, true);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("updateCodebaseMap: preserves descriptions from collapsed directories", () => {
+  const base = makeTmpRepo();
+  try {
+    for (let i = 0; i < 25; i++) {
+      addFile(base, `src/components/comp${String(i).padStart(2, "0")}.ts`);
+    }
+
+    // Generate with a description in the (collapsed) components dir
+    const descriptions = new Map([["src/components/comp00.ts", "The first component"]]);
+    const initial = generateCodebaseMap(base, undefined, descriptions);
+    writeCodebaseMap(base, initial.content);
+
+    // Update should recover description from the hidden comment
+    const result = updateCodebaseMap(base);
+    const recovered = parseCodebaseMap(result.content);
+    assert.equal(recovered.get("src/components/comp00.ts"), "The first component");
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── writeCodebaseMap / readCodebaseMap ──────────────────────────────────
+
+test("writeCodebaseMap + readCodebaseMap roundtrip", () => {
+  const base = makeTmpRepo();
+  try {
+    const content = "# Codebase Map\n\n- `test.ts` — A test file\n";
+    const outPath = writeCodebaseMap(base, content);
+    assert.ok(existsSync(outPath));
+
+    const read = readCodebaseMap(base);
+    assert.equal(read, content);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("readCodebaseMap: returns null when file missing", () => {
+  const base = makeTmpRepo();
+  try {
+    const result = readCodebaseMap(base);
+    assert.equal(result, null);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("writeCodebaseMap: creates .gsd/ directory if missing", () => {
+  const base = join(tmpdir(), `gsd-codebase-test-${randomUUID()}`);
+  mkdirSync(base, { recursive: true });
+  // Intentionally do NOT pre-create .gsd/
+  try {
+    const outPath = writeCodebaseMap(base, "# Codebase Map\n");
+    assert.ok(existsSync(outPath));
+  } finally {
+    cleanup(base);
+  }
+});
+
+// ─── getCodebaseMapStats ─────────────────────────────────────────────────
+
+test("getCodebaseMapStats: no map returns exists=false", () => {
+  const base = makeTmpRepo();
+  try {
+    const stats = getCodebaseMapStats(base);
+    assert.equal(stats.exists, false);
+    assert.equal(stats.fileCount, 0);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("getCodebaseMapStats: reports coverage", () => {
+  const base = makeTmpRepo();
+  try {
+    const content = `# Codebase Map\n\nGenerated: 2026-03-23T14:00:00Z | Files: 3 | Described: 2/3\n\n- \`a.ts\` — Has desc\n- \`b.ts\`\n- \`c.ts\` — Also has\n`;
+    writeCodebaseMap(base, content);
+
+    const stats = getCodebaseMapStats(base);
+    assert.equal(stats.exists, true);
+    assert.equal(stats.fileCount, 3); // from header, not parse count
+    assert.equal(stats.describedCount, 2);
+    assert.equal(stats.undescribedCount, 1);
+    assert.equal(stats.generatedAt, "2026-03-23T14:00:00Z");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("getCodebaseMapStats: reads total file count from header for accuracy with collapsed dirs", () => {
+  const base = makeTmpRepo();
+  try {
+    // Simulate a map with a collapsed dir: header says 30 files but parser only sees 2
+    const content = [
+      "# Codebase Map",
+      "",
+      "Generated: 2026-03-23T14:00:00Z | Files: 30 | Described: 2/30",
+      "",
+      "### src/components/",
+      "- *(28 files: 28 .ts)*",
+      "",
+      "### src/",
+      "- `main.ts` — Entry point",
+      "- `utils.ts` — Utilities",
+    ].join("\n");
+    writeCodebaseMap(base, content);
+
+    const stats = getCodebaseMapStats(base);
+    assert.equal(stats.fileCount, 30); // from header, not from parseCodebaseMap
+    assert.equal(stats.describedCount, 2);
+    assert.equal(stats.undescribedCount, 28);
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/complete-milestone.test.ts b/src/resources/extensions/gsd/tests/complete-milestone.test.ts
index 78211dc0e..c86eeb1e4 100644
--- a/src/resources/extensions/gsd/tests/complete-milestone.test.ts
+++ b/src/resources/extensions/gsd/tests/complete-milestone.test.ts
@@ -237,6 +237,33 @@ describe("complete-milestone", () => {
     }
   });
 
+  test("step 11 specifies write tool for PROJECT.md update (#2946)", () => {
+    const prompt = loadPromptFromWorktree("complete-milestone", {
+      workingDirectory: "/tmp/test-project",
+      milestoneId: "M001",
+      milestoneTitle: "Tool Guidance Test",
+      roadmapPath: ".gsd/milestones/M001/M001-ROADMAP.md",
+      inlinedContext: "context",
+      milestoneSummaryPath: ".gsd/milestones/M001/M001-SUMMARY.md",
+      skillActivation: "",
+    });
+
+    // Step 11 must explicitly name the `write` tool so the LLM doesn't
+    // confuse it with `edit` (which requires path + oldText + newText).
+    // See: https://github.com/gsd-build/gsd-2/issues/2946
+    assert.ok(
+      /PROJECT\.md.*\bwrite\b/i.test(prompt) || /\bwrite\b.*PROJECT\.md/i.test(prompt),
+      "step 11 must name the `write` tool when updating PROJECT.md",
+    );
+
+    // The prompt must NOT leave tool choice ambiguous for PROJECT.md
+    // Verify it mentions the required parameter (`content` or `path`)
+    assert.ok(
+      prompt.includes("`.gsd/PROJECT.md`") || prompt.includes('".gsd/PROJECT.md"'),
+      "step 11 must reference the PROJECT.md path explicitly",
+    );
+  });
+
   test("deriveState completing-milestone integration", async () => {
     const { deriveState, isMilestoneComplete } = await import("../state.ts");
     const { invalidateAllCaches: invalidateAllCachesDynamic } = await import("../cache.ts");
diff --git a/src/resources/extensions/gsd/tests/complete-slice.test.ts b/src/resources/extensions/gsd/tests/complete-slice.test.ts
index 5dcc16768..ed5073ff8 100644
--- a/src/resources/extensions/gsd/tests/complete-slice.test.ts
+++ b/src/resources/extensions/gsd/tests/complete-slice.test.ts
@@ -406,6 +406,27 @@ console.log('\n=== complete-slice: handler with missing roadmap ===');
   cleanup(dbPath);
 }
 
+// ═══════════════════════════════════════════════════════════════════════════
+// complete-slice: step 13 specifies write tool for PROJECT.md (#2946)
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n=== complete-slice: step 13 specifies write tool for PROJECT.md (#2946) ===');
+{
+  const promptPath = path.join(
+    path.dirname(new URL(import.meta.url).pathname),
+    '..', 'prompts', 'complete-slice.md',
+  );
+  const prompt = fs.readFileSync(promptPath, 'utf-8');
+
+  // Step 13 must explicitly name the `write` tool so the LLM doesn't
+  // confuse it with `edit` (which requires path + oldText + newText).
+  // See: https://github.com/gsd-build/gsd-2/issues/2946
+  const mentionsWriteTool =
+    /PROJECT\.md.*\bwrite\b/i.test(prompt) ||
+    /\bwrite\b.*PROJECT\.md/i.test(prompt);
+  assertTrue(mentionsWriteTool, 'step 13 must name the `write` tool when updating PROJECT.md');
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 
 report();
diff --git a/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts b/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts
new file mode 100644
index 000000000..030f948c2
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts
@@ -0,0 +1,192 @@
+// GSD State Machine Regression Tests — Completion Hierarchy & State Derivation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+  getMilestone,
+  getSliceTasks,
+  updateTaskStatus,
+  updateSliceStatus,
+} from "../gsd-db.ts";
+import { isClosedStatus } from "../status-guards.ts";
+
+// ─── Setup / Teardown ──────────────────────────────────────────────────────
+
+beforeEach(() => {
+  openDatabase(":memory:");
+});
+
+afterEach(() => {
+  try { closeDatabase(); } catch { /* swallow */ }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("completion-hierarchy-guards", () => {
+
+  // ─── Test 1: isClosedStatus ─────────────────────────────────────────────
+  test("isClosedStatus returns true for 'complete' and 'done'", () => {
+    assert.ok(isClosedStatus("complete"), "'complete' should be closed");
+    assert.ok(isClosedStatus("done"), "'done' should be closed");
+    assert.ok(!isClosedStatus("pending"), "'pending' should not be closed");
+    assert.ok(!isClosedStatus("in-progress"), "'in-progress' should not be closed");
+    assert.ok(!isClosedStatus("blocked"), "'blocked' should not be closed");
+    assert.ok(!isClosedStatus(""), "empty string should not be closed");
+    assert.ok(!isClosedStatus("active"), "'active' should not be closed");
+  });
+
+  // ─── Test 2: vacuous truth guard — slice with zero tasks ───────────────
+  test("cannot complete slice with zero tasks — vacuous truth guard", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const tasks = getSliceTasks("M001", "S01");
+    assert.equal(tasks.length, 0, "newly inserted slice has zero tasks");
+
+    // The guard: a slice with no tasks is not completable.
+    // isSliceComplete from state.ts: plan.tasks.length > 0 && every done.
+    // Here we replicate the DB-side equivalent: zero tasks means guard fires.
+    const isCompletable = tasks.length > 0 && tasks.every(t => isClosedStatus(t.status));
+    assert.equal(isCompletable, false, "vacuous truth guard: zero tasks → not completable");
+  });
+
+  // ─── Test 3: cannot complete slice with incomplete tasks ─────────────────
+  test("cannot complete slice with incomplete tasks", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "done" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    const tasks = getSliceTasks("M001", "S01");
+    assert.equal(tasks.length, 2, "slice has 2 tasks");
+
+    const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status));
+    assert.equal(incompleteTasks.length, 1, "exactly one task is not closed");
+    assert.equal(incompleteTasks[0]?.id, "T02", "the incomplete task is T02");
+    assert.equal(incompleteTasks[0]?.status, "pending", "incomplete task status is 'pending'");
+  });
+
+  // ─── Test 4: phantom parent milestone and slice (H6) ────────────────────
+  test("task completion auto-creates phantom parent milestone and slice (H6)", () => {
+    // H6 finding: insertMilestone/insertSlice accept empty titles — phantom
+    // parents can be created without substantive content.
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const milestone = getMilestone("M001");
+    assert.ok(milestone !== null, "phantom milestone M001 should exist in DB");
+    assert.equal(milestone!.title, "", "phantom milestone has empty title by default");
+
+    const slice = getSlice("M001", "S01");
+    assert.ok(slice !== null, "phantom slice S01 should exist in DB");
+    assert.equal(slice!.title, "", "phantom slice has empty title by default");
+
+    // This documents the H6 finding: the DB allows phantom parents with
+    // no meaningful content, which can silently accept task completion calls.
+  });
+
+  // ─── Test 5: double task completion is detectable via isClosedStatus ────
+  test("double task completion is detectable via isClosedStatus", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "done" });
+
+    const task = getTask("M001", "S01", "T01");
+    assert.ok(task !== null, "task T01 should exist");
+    assert.ok(
+      isClosedStatus(task!.status),
+      "isClosedStatus detects already-closed task — prevents double completion",
+    );
+
+    // The guard that prevents double completion: check isClosedStatus before
+    // calling updateTaskStatus again.
+    const wouldDoubleComplete = isClosedStatus(task!.status);
+    assert.ok(wouldDoubleComplete, "guard fires: task is already closed");
+  });
+
+  // ─── Test 6: updateSliceStatus rollback loses original status (M11) ─────
+  test("updateSliceStatus rollback goes to 'pending' not original status (M11)", () => {
+    insertMilestone({ id: "M001" });
+    // Insert with an explicit non-pending status to simulate an in-progress slice
+    insertSlice({ id: "S01", milestoneId: "M001", status: "pending" });
+
+    // Manually advance to "in_progress" equivalent via updateSliceStatus
+    updateSliceStatus("M001", "S01", "in_progress");
+    const afterProgress = getSlice("M001", "S01");
+    assert.equal(afterProgress!.status, "in_progress", "slice is in_progress after update");
+
+    // Simulate completion
+    updateSliceStatus("M001", "S01", "complete", new Date().toISOString());
+    const afterComplete = getSlice("M001", "S01");
+    assert.equal(afterComplete!.status, "complete", "slice is complete after completion");
+
+    // Simulate rollback — the DB only stores current status, not history.
+    // Rolling back means setting to "pending" — the original "in_progress" is lost.
+    updateSliceStatus("M001", "S01", "pending");
+    const afterRollback = getSlice("M001", "S01");
+    assert.equal(
+      afterRollback!.status,
+      "pending",
+      "M11: rollback sets status to 'pending', original 'in_progress' is lost",
+    );
+    // Document: there is no completed_at or status history to recover from.
+    // The rollback silently discards the in_progress state.
+  });
+
+  // ─── Test 7: milestone completion requires all slices closed ─────────────
+  test("milestone completion requires all slices closed", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001", status: "done" });
+    insertSlice({ id: "S02", milestoneId: "M001", status: "pending" });
+
+    const s01 = getSlice("M001", "S01");
+    const s02 = getSlice("M001", "S02");
+
+    assert.ok(s01 !== null, "S01 exists");
+    assert.ok(s02 !== null, "S02 exists");
+
+    const slices = [s01!, s02!];
+    const incompleteSlices = slices.filter(s => !isClosedStatus(s.status));
+    assert.ok(
+      incompleteSlices.length > 0,
+      "milestone is not completable — has incomplete slices",
+    );
+    assert.equal(incompleteSlices[0]?.id, "S02", "S02 is the incomplete slice");
+    assert.equal(incompleteSlices[0]?.status, "pending", "S02 status is 'pending'");
+  });
+
+  // ─── Test 8: closed parent blocks child completion ───────────────────────
+  test("closed parent blocks child completion", () => {
+    // Insert a milestone already in 'complete' state
+    insertMilestone({ id: "M001", status: "complete" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const milestone = getMilestone("M001");
+    assert.ok(milestone !== null, "milestone M001 exists");
+    assert.ok(
+      isClosedStatus(milestone!.status),
+      "parent milestone is closed — isClosedStatus returns true",
+    );
+
+    // The guard in complete-slice checks parent status via isClosedStatus.
+    // If isClosedStatus(milestone.status) === true, the child cannot be completed.
+    const parentIsClosed = isClosedStatus(milestone!.status);
+    assert.ok(parentIsClosed, "closed parent guard fires: milestone.status is 'complete'");
+
+    // Verify the slice itself is not yet closed
+    const slice = getSlice("M001", "S01");
+    assert.ok(slice !== null, "slice S01 exists");
+    assert.ok(!isClosedStatus(slice!.status), "slice S01 is not yet closed (parent is already closed)");
+  });
+
+});
diff --git a/src/resources/extensions/gsd/tests/complexity-classifier.test.ts b/src/resources/extensions/gsd/tests/complexity-classifier.test.ts
index 4c6a39c08..ec53ddcaa 100644
--- a/src/resources/extensions/gsd/tests/complexity-classifier.test.ts
+++ b/src/resources/extensions/gsd/tests/complexity-classifier.test.ts
@@ -41,14 +41,14 @@ test("research-slice classifies as standard", () => {
   assert.equal(result.tier, "standard");
 });
 
-test("plan-milestone classifies as standard", () => {
+test("plan-milestone classifies as heavy", () => {
   const result = classifyUnitComplexity("plan-milestone", "M001", "/tmp/fake");
-  assert.equal(result.tier, "standard");
+  assert.equal(result.tier, "heavy");
 });
 
-test("plan-slice classifies as standard", () => {
+test("plan-slice classifies as heavy", () => {
   const result = classifyUnitComplexity("plan-slice", "M001/S01", "/tmp/fake");
-  assert.equal(result.tier, "standard");
+  assert.equal(result.tier, "heavy");
 });
 
 test("replan-slice classifies as heavy", () => {
diff --git a/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts b/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts
new file mode 100644
index 000000000..f03b0eb93
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts
@@ -0,0 +1,131 @@
+/**
+ * db-path-worktree-symlink.test.ts — #2517
+ *
+ * Regression test for the db_unavailable loop in worktree/symlink layouts.
+ *
+ * The path resolver must handle BOTH worktree path families:
+ *   - /.gsd/worktrees/<MID>/...           (direct layout)
+ *   - /.gsd/projects/<hash>/worktrees/<MID>/...  (symlink-resolved layout)
+ *
+ * When the second layout is not recognised, ensureDbOpen derives a wrong DB
+ * path, the open fails silently, and every completion tool call returns
+ * db_unavailable — triggering an artifact retry re-dispatch loop.
+ *
+ * Additionally, the post-unit artifact retry path must NOT retry when the
+ * completion tool failed due to db_unavailable (infra failure), because
+ * retrying can never succeed and causes cost spikes.
+ */
+
+import { readFileSync } from "node:fs";
+import { join, sep } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+// ── Part 1: resolveProjectRootDbPath handles symlink-resolved layout ─────
+
+console.log("\n=== #2517 Part 1: resolveProjectRootDbPath symlink layout ===");
+
+// Import the resolver directly
+const { resolveProjectRootDbPath } = await import("../bootstrap/dynamic-tools.js");
+
+// Standard worktree layout (already works)
+const standardPath = `/home/user/myproject/.gsd/worktrees/M001/work`;
+const standardResult = resolveProjectRootDbPath(standardPath);
+assertEq(
+  standardResult,
+  join("/home/user/myproject", ".gsd", "gsd.db"),
+  "Standard worktree layout resolves to project root DB path",
+);
+
+// Symlink-resolved layout (the regression — /.gsd/projects/<hash>/worktrees/...)
+const symlinkPath = `/home/user/myproject/.gsd/projects/abc123def/worktrees/M001/work`;
+const symlinkResult = resolveProjectRootDbPath(symlinkPath);
+assertEq(
+  symlinkResult,
+  join("/home/user/myproject", ".gsd", "gsd.db"),
+  "Symlink-resolved layout (/.gsd/projects/<hash>/worktrees/) resolves to project root DB path (#2517)",
+);
+
+// Windows-style separators for symlink layout
+if (sep === "\\") {
+  const winSymlinkPath = `C:\\Users\\dev\\project\\.gsd\\projects\\abc123def\\worktrees\\M001\\work`;
+  const winResult = resolveProjectRootDbPath(winSymlinkPath);
+  assertEq(
+    winResult,
+    join("C:\\Users\\dev\\project", ".gsd", "gsd.db"),
+    "Windows symlink layout resolves correctly",
+  );
+} else {
+  // On non-Windows, test forward-slash variant explicitly
+  const fwdSymlinkPath = `/home/user/myproject/.gsd/projects/abc123def/worktrees/M001/work`;
+  const fwdResult = resolveProjectRootDbPath(fwdSymlinkPath);
+  assertEq(
+    fwdResult,
+    join("/home/user/myproject", ".gsd", "gsd.db"),
+    "Forward-slash symlink layout resolves correctly on POSIX",
+  );
+}
+
+// Edge: deeper nesting under projects/<hash>/worktrees
+const deepSymlinkPath = `/home/user/myproject/.gsd/projects/deadbeef42/worktrees/M003/sub/dir`;
+const deepResult = resolveProjectRootDbPath(deepSymlinkPath);
+assertEq(
+  deepResult,
+  join("/home/user/myproject", ".gsd", "gsd.db"),
+  "Deep symlink worktree path still resolves to project root DB",
+);
+
+// Non-worktree path should be unchanged
+const normalPath = `/home/user/myproject`;
+const normalResult = resolveProjectRootDbPath(normalPath);
+assertEq(
+  normalResult,
+  join("/home/user/myproject", ".gsd", "gsd.db"),
+  "Non-worktree path is unchanged",
+);
+
+// ── Part 2: ensureDbOpen returns structured failure context ──────────────
+
+console.log("\n=== #2517 Part 2: ensureDbOpen structured diagnostics ===");
+
+const dynamicToolsSrc = readFileSync(
+  join(import.meta.dirname, "..", "bootstrap", "dynamic-tools.ts"),
+  "utf-8",
+);
+
+// ensureDbOpen should return a structured result, not just boolean false
+// Check that the catch block provides diagnostic information
+assertTrue(
+  dynamicToolsSrc.includes("resolvedPath") || dynamicToolsSrc.includes("diagnostic"),
+  "ensureDbOpen catch block surfaces diagnostic information (resolvedPath or diagnostic) instead of bare false (#2517)",
+);
+
+// ── Part 3: post-unit does NOT artifact-retry on db_unavailable ──────────
+
+console.log("\n=== #2517 Part 3: post-unit db_unavailable is infra-fatal ===");
+
+const postUnitSrc = readFileSync(
+  join(import.meta.dirname, "..", "auto-post-unit.ts"),
+  "utf-8",
+);
+
+// The artifact retry block should check DB availability and skip retry
+// when the DB is unavailable (infra failure, not a missing artifact).
+assertTrue(
+  postUnitSrc.includes("db_unavailable") || postUnitSrc.includes("isDbAvailable"),
+  "post-unit artifact retry path checks DB availability to avoid retry loop (#2517)",
+);
+
+// Verify the retry block is guarded: when !isDbAvailable(), the code must
+// NOT return "retry". The pattern should be: if (!verified && !isDbAvailable()) { skip }
+// followed by else if (!verified) { ... return "retry" }
+const dbUnavailableGuard = postUnitSrc.match(
+  /!triggerArtifactVerified\s*&&\s*!isDbAvailable\(\)/,
+);
+assertTrue(
+  !!dbUnavailableGuard,
+  "The retry block explicitly guards against !isDbAvailable() before returning 'retry' (#2517)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/db-writer.test.ts b/src/resources/extensions/gsd/tests/db-writer.test.ts
index 180e8578b..922e070b5 100644
--- a/src/resources/extensions/gsd/tests/db-writer.test.ts
+++ b/src/resources/extensions/gsd/tests/db-writer.test.ts
@@ -416,23 +416,18 @@ describe('db-writer', () => {
     }
   });
 
-  test('updateRequirementInDb — not found', async () => {
+  test('updateRequirementInDb — upserts when not found (#2919)', async () => {
     const tmpDir = makeTmpDir();
     const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
     openDatabase(dbPath);
 
     try {
-      let threw = false;
-      try {
-        await updateRequirementInDb('R999', { status: 'validated' }, tmpDir);
-      } catch (err) {
-        threw = true;
-        assert.ok(
-          (err as Error).message.includes('R999'),
-          'error message mentions the missing ID',
-        );
-      }
-      assert.ok(threw, 'throws when requirement not found');
+      // Previously threw; now upserts a skeleton requirement with the provided updates
+      await updateRequirementInDb('R999', { status: 'validated' }, tmpDir);
+      const created = getRequirementById('R999');
+      assert.ok(created !== null, 'R999 should be created by upsert');
+      assert.deepStrictEqual(created!.status, 'validated', 'Upserted requirement should have validated status');
+      assert.deepStrictEqual(created!.id, 'R999', 'Upserted requirement should keep the provided ID');
     } finally {
       closeDatabase();
       cleanupDir(tmpDir);
diff --git a/src/resources/extensions/gsd/tests/derive-state-db.test.ts b/src/resources/extensions/gsd/tests/derive-state-db.test.ts
index 11f2bb500..81466f16a 100644
--- a/src/resources/extensions/gsd/tests/derive-state-db.test.ts
+++ b/src/resources/extensions/gsd/tests/derive-state-db.test.ts
@@ -4,7 +4,7 @@ import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { deriveState, invalidateStateCache, _deriveStateImpl, deriveStateFromDb } from '../state.ts';
+import { deriveState, invalidateStateCache, _deriveStateImpl, deriveStateFromDb, isGhostMilestone } from '../state.ts';
 import {
   openDatabase,
   closeDatabase,
@@ -934,8 +934,8 @@ describe('derive-state-db', async () => {
     }
   });
 
-  // ─── Test 21: Ghost milestone skipped ─────────────────────────────────
-  test('derive-state-db: ghost milestone skipped', async () => {
+  // ─── Test 21: Ghost milestone skipped (no DB row, no worktree) ─────────
+  test('derive-state-db: ghost milestone skipped when no DB row and no worktree', async () => {
     const base = createFixtureBase();
     try {
       // Ghost: milestone dir exists with only META.json, no context/roadmap/summary
@@ -948,8 +948,7 @@ describe('derive-state-db', async () => {
       const fileState = await _deriveStateImpl(base);
 
       openDatabase(':memory:');
-      // Ghost milestone in DB — no slices, status active
-      insertMilestone({ id: 'M001', title: '', status: 'active' });
+      // Only insert M002 — M001 has no DB row (simulates row loss / never inserted)
       insertMilestone({ id: 'M002', title: 'Real', status: 'active' });
 
       invalidateStateCache();
@@ -1051,4 +1050,78 @@ describe('derive-state-db', async () => {
       closeDatabase();
     }
   });
+
+  // ─── Queued milestone with worktree not flagged as ghost (#2921) ──────
+  test('derive-state-db: queued milestone with worktree not flagged as ghost (#2921)', async () => {
+    const base = createFixtureBase();
+    try {
+      // M001: complete milestone with summary
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
+
+      // M002: queued milestone — directory + slices dir exists, but no content files.
+      // This is what happens when ensureMilestoneDbRow creates M002 but the DB row
+      // is lost during worktree teardown.
+      mkdirSync(join(base, '.gsd', 'milestones', 'M002', 'slices'), { recursive: true });
+
+      // A worktree exists for M002, proving it's a legitimate milestone
+      mkdirSync(join(base, '.gsd', 'worktrees', 'M002'), { recursive: true });
+
+      // isGhostMilestone should NOT treat M002 as ghost when worktree exists
+      assert.ok(!isGhostMilestone(base, 'M002'), 'ghost-wt: M002 with worktree is NOT a ghost');
+
+      // DB has M001 complete but M002 row was lost
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'First', status: 'complete' });
+      // No M002 row — simulates DB row loss during worktree teardown
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      // M002 should be reconciled from disk (not skipped as ghost) and become active
+      const m002Entry = dbState.registry.find(e => e.id === 'M002');
+      assert.ok(m002Entry !== undefined, 'ghost-wt: M002 should be in registry');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'ghost-wt: M002 should be active');
+      // Should NOT be phase: complete
+      assert.notEqual(dbState.phase, 'complete', 'ghost-wt: phase should not be complete');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
+
+  // ─── Queued milestone with DB row not flagged as ghost (#2921) ────────
+  test('derive-state-db: queued milestone with DB row not flagged as ghost (#2921)', async () => {
+    const base = createFixtureBase();
+    try {
+      // M001: complete milestone with summary
+      writeFile(base, 'milestones/M001/M001-SUMMARY.md', '# M001 Summary\n\nDone.');
+
+      // M002: queued milestone — directory exists, no content files, but has DB row
+      mkdirSync(join(base, '.gsd', 'milestones', 'M002', 'slices'), { recursive: true });
+
+      // DB has both M001 complete and M002 queued
+      openDatabase(':memory:');
+      insertMilestone({ id: 'M001', title: 'First', status: 'complete' });
+      insertMilestone({ id: 'M002', title: 'Second', status: 'queued' });
+
+      // isGhostMilestone should NOT treat M002 as ghost when DB row exists
+      assert.ok(!isGhostMilestone(base, 'M002'), 'ghost-dbrow: M002 with DB row is NOT a ghost');
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      // M002 should not be skipped
+      const m002Entry = dbState.registry.find(e => e.id === 'M002');
+      assert.ok(m002Entry !== undefined, 'ghost-dbrow: M002 should be in registry');
+      assert.deepStrictEqual(dbState.activeMilestone?.id, 'M002', 'ghost-dbrow: M002 should be active');
+      assert.notEqual(dbState.phase, 'complete', 'ghost-dbrow: phase should not be complete');
+
+      closeDatabase();
+    } finally {
+      closeDatabase();
+      cleanup(base);
+    }
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/derive-state.test.ts b/src/resources/extensions/gsd/tests/derive-state.test.ts
index 6eeaaab1a..ce93f7ffa 100644
--- a/src/resources/extensions/gsd/tests/derive-state.test.ts
+++ b/src/resources/extensions/gsd/tests/derive-state.test.ts
@@ -930,6 +930,35 @@ slice: S01
     }
   });
 
+  // ─── Test: queued milestone with worktree not flagged as ghost (#2921) ──
+  test('queued milestone with worktree not flagged as ghost (#2921)', async () => {
+    const base = createFixtureBase();
+    try {
+      // Create a milestone directory with only an empty slices subdir — no content files.
+      // This would normally be a ghost, but it has a worktree directory.
+      const milestoneDir = join(base, '.gsd', 'milestones', 'M002');
+      mkdirSync(join(milestoneDir, 'slices'), { recursive: true });
+
+      // Create a worktree directory for M002, simulating an active worktree
+      const worktreeDir = join(base, '.gsd', 'worktrees', 'M002');
+      mkdirSync(worktreeDir, { recursive: true });
+
+      // isGhostMilestone should return false because the worktree exists
+      assert.ok(!isGhostMilestone(base, 'M002'), 'M002 with worktree should NOT be a ghost');
+
+      // Also create a completed M001 so deriveState has something before M002
+      writeMilestoneSummary(base, 'M001', '# M001 Summary\n\nDone.');
+
+      const state = await deriveState(base);
+      // M002 should appear in the registry (not filtered as ghost)
+      const m002Entry = state.registry.find(e => e.id === 'M002');
+      assert.ok(m002Entry !== undefined, 'M002 should be in registry when worktree exists');
+      assert.deepStrictEqual(state.activeMilestone?.id, 'M002', 'M002 should be active milestone');
+    } finally {
+      cleanup(base);
+    }
+  });
+
   // ─── Test: zero-slice roadmap → pre-planning, not blocked (#1785) ────
   test('zero-slice roadmap → pre-planning, not blocked (#1785)', async () => {
     const base = createFixtureBase();
diff --git a/src/resources/extensions/gsd/tests/discord-invite-links.test.ts b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts
new file mode 100644
index 000000000..8b82d4749
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discord-invite-links.test.ts
@@ -0,0 +1,47 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+/**
+ * Validates that all Discord invite links in user-facing files point to valid,
+ * consistent invite URLs — not expired vanity links.
+ *
+ * Regression test for https://github.com/gsd-build/gsd-2/issues/2699
+ */
+
+const ROOT = process.cwd();
+
+/** Canonical Discord invite for the GSD community. */
+const VALID_INVITE = "https://discord.com/invite/nKXTsAcmbT";
+
+/** Files that contain user-facing Discord invite links. */
+const FILES_WITH_INVITE_LINKS: string[] = [
+  "README.md",
+  "docs/what-is-pi/15-pi-packages-the-ecosystem.md",
+];
+
+describe("Discord invite links (#2699)", () => {
+  for (const relPath of FILES_WITH_INVITE_LINKS) {
+    it(`${relPath} contains only the canonical Discord invite`, () => {
+      const content = readFileSync(join(ROOT, relPath), "utf8");
+
+      // Extract all Discord invite URLs (discord.gg/X or discord.com/invite/X)
+      const invitePattern = /https?:\/\/(?:discord\.gg|discord\.com\/invite)\/[A-Za-z0-9]+/g;
+      const matches = content.match(invitePattern);
+
+      assert.ok(
+        matches && matches.length > 0,
+        `Expected at least one Discord invite link in ${relPath}`,
+      );
+
+      for (const link of matches) {
+        assert.equal(
+          link,
+          VALID_INVITE,
+          `Invalid Discord invite in ${relPath}: found "${link}", expected "${VALID_INVITE}"`,
+        );
+      }
+    });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/discuss-empty-db-fallback.test.ts b/src/resources/extensions/gsd/tests/discuss-empty-db-fallback.test.ts
new file mode 100644
index 000000000..a3268cf54
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/discuss-empty-db-fallback.test.ts
@@ -0,0 +1,127 @@
+/**
+ * discuss-empty-db-fallback.test.ts — Tests for #2892.
+ *
+ * When the DB is open but empty (e.g., after crash/truncation),
+ * getMilestoneSlices() returns [] and showDiscuss() incorrectly declares
+ * "All slices are complete." The fix adds a roadmap fallback: when the DB
+ * returns zero slices but a ROADMAP file exists, parse slices from the
+ * roadmap instead of treating zero slices as "all complete."
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+import { dirname, join } from "node:path";
+import { parseRoadmapSlices } from "../roadmap-slices.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function readGuidedFlowSource(): string {
+  const thisFile = fileURLToPath(import.meta.url);
+  const thisDir = dirname(thisFile);
+  return readFileSync(join(thisDir, "..", "guided-flow.ts"), "utf-8");
+}
+
+const SAMPLE_ROADMAP = `# M012 Roadmap
+
+## Slices
+- [ ] **S01: Core setup** \`risk:low\` \`depends:[]\`
+  > After this: basic project scaffolding works
+- [ ] **S02: Auth module** \`risk:medium\` \`depends:[S01]\`
+  > After this: users can log in
+- [ ] **S03: Dashboard** \`risk:low\` \`depends:[S02]\`
+  > After this: dashboard renders
+`;
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("discuss-empty-db-fallback (#2892)", () => {
+
+  test("1. parseRoadmapSlices extracts slices from a valid ROADMAP", () => {
+    const slices = parseRoadmapSlices(SAMPLE_ROADMAP);
+    assert.strictEqual(slices.length, 3, "should parse 3 slices from sample roadmap");
+    assert.strictEqual(slices[0]!.id, "S01");
+    assert.strictEqual(slices[1]!.id, "S02");
+    assert.strictEqual(slices[2]!.id, "S03");
+    // All slices are incomplete ([ ] not [x])
+    assert.ok(slices.every(s => !s.done), "all slices should be incomplete");
+  });
+
+  test("2. guided-flow imports parseRoadmapSlices for roadmap fallback", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("parseRoadmapSlices"),
+      "guided-flow must import parseRoadmapSlices to support roadmap fallback when DB is empty",
+    );
+  });
+
+  test("3. guided-flow has roadmap fallback when normSlices is empty but roadmapContent exists", () => {
+    const source = readGuidedFlowSource();
+    // The fix must add a fallback that checks normSlices.length === 0 && roadmapContent
+    // and repopulates normSlices from the roadmap before the pendingSlices guard.
+    //
+    // Pattern: after DB query produces normSlices, if empty + roadmap exists,
+    // fall back to parseRoadmapSlices(roadmapContent).
+    const fallbackPattern = /normSlices\.length\s*===\s*0\s*&&\s*roadmapContent/;
+    assert.ok(
+      fallbackPattern.test(source),
+      "guided-flow must check normSlices.length === 0 && roadmapContent to trigger roadmap fallback",
+    );
+  });
+
+  test("4. guided-flow no longer has unguarded pendingSlices === 0 exit after DB-only query", () => {
+    const source = readGuidedFlowSource();
+    // Extract the showDiscuss function body
+    const fnMatch = source.match(
+      /async function showDiscuss\s*\([^)]*\)[^{]*\{([\s\S]*?)\nfunction\s/,
+    );
+    assert.ok(!!fnMatch, "showDiscuss function body must be found");
+
+    if (fnMatch) {
+      const body = fnMatch[1]!;
+      // After the DB query block (isDbAvailable/getMilestoneSlices), there should
+      // be a roadmap fallback BEFORE the pendingSlices.length === 0 check.
+      // Find the getMilestoneSlices call and the pendingSlices === 0 check
+      const dbQueryIdx = body.indexOf("getMilestoneSlices");
+      const fallbackIdx = body.indexOf("parseRoadmapSlices");
+      const pendingGuardIdx = body.indexOf('pendingSlices.length === 0');
+
+      assert.ok(dbQueryIdx > 0, "getMilestoneSlices call must exist");
+      assert.ok(fallbackIdx > 0, "parseRoadmapSlices fallback must exist");
+      assert.ok(pendingGuardIdx > 0, "pendingSlices.length === 0 guard must exist");
+      assert.ok(
+        fallbackIdx > dbQueryIdx && fallbackIdx < pendingGuardIdx,
+        "parseRoadmapSlices fallback must appear BETWEEN DB query and pendingSlices === 0 guard",
+      );
+    }
+  });
+
+  test("5. roadmap-parsed slices map to NormSlice format with done=false by default", () => {
+    // When falling back to roadmap, incomplete slices ([ ]) should map to done:false,
+    // ensuring they appear as pending and are NOT falsely reported as complete.
+    const slices = parseRoadmapSlices(SAMPLE_ROADMAP);
+    const normSlices = slices.map(s => ({ id: s.id, done: s.done, title: s.title }));
+    const pendingSlices = normSlices.filter(s => !s.done);
+    assert.strictEqual(pendingSlices.length, 3,
+      "all 3 incomplete roadmap slices should be pending — not falsely treated as complete");
+  });
+
+  test("6. roadmap with completed slices correctly reports them as done", () => {
+    const completedRoadmap = `# M012 Roadmap
+
+## Slices
+- [x] **S01: Core setup** \`risk:low\` \`depends:[]\`
+  > After this: basic project scaffolding works
+- [ ] **S02: Auth module** \`risk:medium\` \`depends:[S01]\`
+  > After this: users can log in
+- [x] **S03: Dashboard** \`risk:low\` \`depends:[S02]\`
+  > After this: dashboard renders
+`;
+    const slices = parseRoadmapSlices(completedRoadmap);
+    const normSlices = slices.map(s => ({ id: s.id, done: s.done, title: s.title }));
+    const pendingSlices = normSlices.filter(s => !s.done);
+    assert.strictEqual(pendingSlices.length, 1, "only S02 should be pending");
+    assert.strictEqual(pendingSlices[0]!.id, "S02");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts b/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts
index 98c400f95..63e79f3f6 100644
--- a/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts
+++ b/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts
@@ -238,4 +238,44 @@ describe("discuss-queued-milestones (#2307)", () => {
       "queued milestone picker must label entries with [queued] to distinguish from active",
     );
   });
+
+  // ─── #3150: allDiscussed early-return must not block queued milestone discussion ──
+
+  test("12. allDiscussed path checks for pending milestones before returning (#3150)", () => {
+    const source = readGuidedFlowSource();
+
+    // Extract the allDiscussed block — the if (allDiscussed) { ... } body
+    const allDiscussedMatch = source.match(
+      /const allDiscussed = pendingSlices\.every\([\s\S]*?\n    if \(allDiscussed\) \{([\s\S]*?)\n    \}/,
+    );
+    assert.ok(!!allDiscussedMatch, "allDiscussed guard block must exist in showDiscuss()");
+
+    if (allDiscussedMatch) {
+      const body = allDiscussedMatch[1];
+      // The fix must check for pending milestones and route to showDiscussQueuedMilestone
+      assert.ok(
+        body.includes("pending") && body.includes("showDiscussQueuedMilestone"),
+        "allDiscussed block must check for pending milestones and call showDiscussQueuedMilestone before returning (#3150)",
+      );
+    }
+  });
+
+  test("13. pendingSlices.length===0 path checks for pending milestones before returning (#3150)", () => {
+    const source = readGuidedFlowSource();
+
+    // Find the pendingSlices.length === 0 guard block
+    const zeroSlicesMatch = source.match(
+      /if \(pendingSlices\.length === 0\) \{([\s\S]*?)\n  \}/,
+    );
+    assert.ok(!!zeroSlicesMatch, "pendingSlices.length === 0 guard block must exist in showDiscuss()");
+
+    if (zeroSlicesMatch) {
+      const body = zeroSlicesMatch[1];
+      // The fix must check for pending milestones and route to showDiscussQueuedMilestone
+      assert.ok(
+        body.includes("pending") && body.includes("showDiscussQueuedMilestone"),
+        "pendingSlices.length===0 block must check for pending milestones and call showDiscussQueuedMilestone before returning (#3150)",
+      );
+    }
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/dist-redirect.mjs b/src/resources/extensions/gsd/tests/dist-redirect.mjs
index 8fdf93c5b..2d476430e 100644
--- a/src/resources/extensions/gsd/tests/dist-redirect.mjs
+++ b/src/resources/extensions/gsd/tests/dist-redirect.mjs
@@ -87,7 +87,26 @@ export function load(url, context, nextLoad) {
         emitDecoratorMetadata: true,
       },
     });
-    return { format: 'module', source: outputText, shortCircuit: true };
+    // Inject CJS-compatible globals (__dirname, __filename, require) so that
+    // workspace packages compiled as ESM can still use them.  This avoids the
+    // need for import.meta.url behind indirect invocation patterns that fail in
+    // CJS and in dynamically-created scopes.
+    // Only inject globals that the source file doesn't already declare itself.
+    const preambleLines = [
+      'import { fileURLToPath as __preamble_fUTP } from "node:url";',
+      'import { dirname as __preamble_dn } from "node:path";',
+      'import { createRequire as __preamble_cR } from "node:module";',
+    ];
+    if (!outputText.includes('const __filename') && !outputText.includes('let __filename')) {
+      preambleLines.push('const __filename = __preamble_fUTP(import.meta.url);');
+    }
+    if (!outputText.includes('const __dirname') && !outputText.includes('let __dirname')) {
+      preambleLines.push('const __dirname = __preamble_dn(__preamble_fUTP(import.meta.url));');
+    }
+    if (!outputText.includes('const require') && !outputText.includes('let require')) {
+      preambleLines.push('const require = __preamble_cR(import.meta.url);');
+    }
+    return { format: 'module', source: preambleLines.join('\n') + '\n' + outputText, shortCircuit: true };
   }
   return nextLoad(url, context);
 }
diff --git a/src/resources/extensions/gsd/tests/doctor-providers.test.ts b/src/resources/extensions/gsd/tests/doctor-providers.test.ts
index 96f6abd3e..8df31fc10 100644
--- a/src/resources/extensions/gsd/tests/doctor-providers.test.ts
+++ b/src/resources/extensions/gsd/tests/doctor-providers.test.ts
@@ -484,3 +484,120 @@ test("runProviderChecks uses object provider field for anthropic-vertex models",
   rmSync(repo, { recursive: true, force: true });
   rmSync(tmpHome, { recursive: true, force: true });
 });
+
+// ─── Cross-provider routing: Codex & Gemini CLI (#2922) ────────────────────
+
+test("runProviderChecks reports ok for Google via google-gemini-cli auth.json (#2922)", () => {
+  const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-gemini-cli-repo-")));
+  mkdirSync(join(repo, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(repo, ".gsd", "PREFERENCES.md"),
+    [
+      "---",
+      "models:",
+      "  execution: gemini-2.5-pro",
+      "---",
+      "",
+    ].join("\n"),
+  );
+
+  const tmpHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-gemini-cli-home-")));
+  const agentDir = join(tmpHome, ".gsd", "agent");
+  mkdirSync(agentDir, { recursive: true });
+
+  // google-gemini-cli OAuth in auth.json (no google API key)
+  const authData = {
+    "google-gemini-cli": { type: "oauth", apiKey: "ya29.gemini-cli-token", expires: Date.now() + 3_600_000 },
+  };
+  writeFileSync(join(agentDir, "auth.json"), JSON.stringify(authData));
+
+  withEnv({
+    HOME: tmpHome,
+    GEMINI_API_KEY: undefined,
+    GOOGLE_API_KEY: undefined,
+  }, () => {
+    withCwd(repo, () => {
+      const results = runProviderChecks();
+      const google = results.find(r => r.name === "google");
+      assert.ok(google, "google result should exist");
+      assert.equal(google!.status, "ok", "should be ok when google-gemini-cli auth is available (#2922)");
+      assert.ok(google!.message.includes("Google Gemini CLI"), "should mention Gemini CLI as the source (#2922)");
+    });
+  });
+
+  rmSync(repo, { recursive: true, force: true });
+  rmSync(tmpHome, { recursive: true, force: true });
+});
+
+test("runProviderChecks reports ok for OpenAI via openai-codex auth.json (#2922)", () => {
+  const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-codex-repo-")));
+  mkdirSync(join(repo, ".gsd"), { recursive: true });
+  writeFileSync(
+    join(repo, ".gsd", "PREFERENCES.md"),
+    [
+      "---",
+      "models:",
+      "  execution: gpt-4o",
+      "---",
+      "",
+    ].join("\n"),
+  );
+
+  const tmpHome = realpathSync(mkdtempSync(join(tmpdir(), "gsd-providers-codex-home-")));
+  const agentDir = join(tmpHome, ".gsd", "agent");
+  mkdirSync(agentDir, { recursive: true });
+
+  // openai-codex OAuth in auth.json (no openai API key)
+  const authData = {
+    "openai-codex": { type: "oauth", apiKey: "codex-token", expires: Date.now() + 3_600_000 },
+  };
+  writeFileSync(join(agentDir, "auth.json"), JSON.stringify(authData));
+
+  withEnv({
+    HOME: tmpHome,
+    OPENAI_API_KEY: undefined,
+    // Clear Copilot env vars so it doesn't route through Copilot
+    COPILOT_GITHUB_TOKEN: undefined,
+    GH_TOKEN: undefined,
+    GITHUB_TOKEN: undefined,
+  }, () => {
+    withCwd(repo, () => {
+      const results = runProviderChecks();
+      const openai = results.find(r => r.name === "openai");
+      assert.ok(openai, "openai result should exist");
+      assert.equal(openai!.status, "ok", "should be ok when openai-codex auth is available (#2922)");
+      assert.ok(openai!.message.includes("Codex"), "should mention Codex as the source (#2922)");
+    });
+  });
+
+  rmSync(repo, { recursive: true, force: true });
+  rmSync(tmpHome, { recursive: true, force: true });
+});
+
+test("PROVIDER_ROUTES includes google-gemini-cli as route for google (#2922)", async () => {
+  const { readFileSync: readFS } = await import("node:fs");
+  const { dirname: dirn, join: joinPath } = await import("node:path");
+  const { fileURLToPath: fileUrl } = await import("node:url");
+  const __dir = dirn(fileUrl(import.meta.url));
+  const src = readFS(joinPath(__dir, "..", "doctor-providers.ts"), "utf-8");
+
+  // PROVIDER_ROUTES must map google -> [..., "google-gemini-cli"]
+  assert.ok(
+    src.includes('"google-gemini-cli"'),
+    'PROVIDER_ROUTES must include "google-gemini-cli" as a route (#2922)',
+  );
+});
+
+test("PROVIDER_ROUTES includes openai-codex as route for openai (#2922)", async () => {
+  const { readFileSync: readFS } = await import("node:fs");
+  const { dirname: dirn, join: joinPath } = await import("node:path");
+  const { fileURLToPath: fileUrl } = await import("node:url");
+  const __dir = dirn(fileUrl(import.meta.url));
+  const src = readFS(joinPath(__dir, "..", "doctor-providers.ts"), "utf-8");
+
+  // PROVIDER_ROUTES must map openai -> [..., "openai-codex"]
+  assert.ok(
+    src.includes('"openai-codex"'),
+    'PROVIDER_ROUTES must include "openai-codex" as a route (#2922)',
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/dynamic-routing-default.test.ts b/src/resources/extensions/gsd/tests/dynamic-routing-default.test.ts
new file mode 100644
index 000000000..f3cd15300
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/dynamic-routing-default.test.ts
@@ -0,0 +1,20 @@
+/**
+ * Dynamic routing default — verifies routing is enabled by default.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { defaultRoutingConfig } from "../model-router.js";
+
+test("defaultRoutingConfig returns enabled: true", () => {
+  const config = defaultRoutingConfig();
+  assert.equal(config.enabled, true, "dynamic routing should be enabled by default");
+});
+
+test("defaultRoutingConfig enables all routing features", () => {
+  const config = defaultRoutingConfig();
+  assert.equal(config.escalate_on_failure, true);
+  assert.equal(config.budget_pressure, true);
+  assert.equal(config.cross_provider, true);
+  assert.equal(config.hooks, true);
+});
diff --git a/src/resources/extensions/gsd/tests/empty-content-abort-loop.test.ts b/src/resources/extensions/gsd/tests/empty-content-abort-loop.test.ts
new file mode 100644
index 000000000..eb874c67f
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/empty-content-abort-loop.test.ts
@@ -0,0 +1,74 @@
+/**
+ * empty-content-abort-loop.test.ts — Regression test for #2695.
+ *
+ * When the LLM sends an assistant message with empty `content: []` and
+ * `stopReason: "aborted"`, this is NOT a fatal abort — it is a non-fatal
+ * end-of-turn. The abort handler in agent-end-recovery.ts must distinguish
+ * this case and NOT pause auto-mode, allowing the loop to continue via
+ * resolveAgentEnd instead of entering a stuck re-dispatch loop.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const RECOVERY_PATH = join(__dirname, "..", "bootstrap", "agent-end-recovery.ts");
+
+function getRecoverySource(): string {
+  return readFileSync(RECOVERY_PATH, "utf-8");
+}
+
+test("agent-end-recovery.ts does not pause on aborted messages with empty content (#2695)", () => {
+  const source = getRecoverySource();
+
+  // The abort handler at `stopReason === "aborted"` must check for empty content
+  // before deciding to pause. An empty content array is a non-fatal agent stop.
+  const abortIdx = source.indexOf('stopReason === "aborted"');
+  assert.ok(abortIdx > -1, "abort handler must exist in agent-end-recovery.ts");
+
+  // Extract the region around the abort handler (enough to see the guard logic)
+  const abortRegion = source.slice(Math.max(0, abortIdx - 200), abortIdx + 600);
+
+  // Must check for empty content before pausing
+  assert.ok(
+    abortRegion.includes("content") && (abortRegion.includes("length") || abortRegion.includes("?.length")),
+    "abort handler must inspect content array length to distinguish empty-content aborts from fatal aborts (#2695)",
+  );
+});
+
+test("agent-end-recovery.ts routes empty-content aborted messages to resolveAgentEnd (#2695)", () => {
+  const source = getRecoverySource();
+
+  // The abort block must have a path that calls resolveAgentEnd for empty-content messages
+  // instead of unconditionally calling pauseAuto
+  const abortIdx = source.indexOf('stopReason === "aborted"');
+  assert.ok(abortIdx > -1, "abort handler must exist");
+
+  // Get the full abort handling block (from the if to the next stopReason check or success path)
+  const afterAbort = source.slice(abortIdx, abortIdx + 800);
+
+  // The abort block must have a code path that calls resolveAgentEnd (for empty-content case)
+  assert.ok(
+    afterAbort.includes("resolveAgentEnd"),
+    "abort handler must route empty-content aborted messages to resolveAgentEnd instead of always pausing (#2695)",
+  );
+});
+
+test("agent-end-recovery.ts checks for errorMessage presence in abort handler (#2695)", () => {
+  const source = getRecoverySource();
+
+  const abortIdx = source.indexOf('stopReason === "aborted"');
+  assert.ok(abortIdx > -1, "abort handler must exist");
+
+  const abortRegion = source.slice(abortIdx, abortIdx + 600);
+
+  // Fatal aborts should have error context (errorMessage field).
+  // The handler should check for this to distinguish fatal from non-fatal aborts.
+  assert.ok(
+    abortRegion.includes("errorMessage"),
+    "abort handler must check for errorMessage to distinguish fatal aborts from empty-content non-fatal stops (#2695)",
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/empty-db-reconciliation.test.ts b/src/resources/extensions/gsd/tests/empty-db-reconciliation.test.ts
deleted file mode 100644
index 47d1a2c0b..000000000
--- a/src/resources/extensions/gsd/tests/empty-db-reconciliation.test.ts
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Regression test for #2631: deriveState disk→DB reconciliation must
- * run even when the milestones table starts empty.
- *
- * When getAllMilestones() returns [] (e.g. after a failed initial migration),
- * the reconciliation code inside deriveStateFromDb was unreachable because
- * deriveState only called it when dbMilestones.length > 0. The fix moves
- * disk→DB sync into deriveState itself, before the length check.
- */
-import { test } from "node:test";
-import assert from "node:assert/strict";
-import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
-
-import { deriveState, invalidateStateCache } from "../state.ts";
-import {
-  openDatabase,
-  closeDatabase,
-  getAllMilestones,
-} from "../gsd-db.ts";
-
-test("deriveState populates empty DB from disk milestones (#2631)", async () => {
-  const base = mkdtempSync(join(tmpdir(), "gsd-empty-db-"));
-  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
-
-  try {
-    // Create a milestone on disk with a CONTEXT file (not a ghost)
-    writeFileSync(
-      join(base, ".gsd", "milestones", "M001", "M001-CONTEXT.md"),
-      "# M001: Test Milestone\n\nSome context about this milestone.",
-    );
-
-    // Open DB — milestones table is empty (simulating failed migration)
-    openDatabase(":memory:");
-    const before = getAllMilestones();
-    assert.equal(before.length, 0, "DB should start with 0 milestones");
-
-    // deriveState should reconcile disk → DB
-    invalidateStateCache();
-    const state = await deriveState(base);
-
-    // After deriveState, the DB should now have the disk milestone
-    const after = getAllMilestones();
-    assert.ok(after.length > 0, "DB should have milestones after reconciliation");
-    assert.equal(after[0]!.id, "M001", "reconciled milestone should be M001");
-
-    // State should reflect the milestone (not "No milestones found")
-    assert.ok(
-      state.activeMilestone !== null,
-      "activeMilestone should not be null after reconciliation",
-    );
-
-    closeDatabase();
-  } finally {
-    closeDatabase();
-    rmSync(base, { recursive: true, force: true });
-  }
-});
-
-test("deriveState does NOT insert ghost milestones into DB (#2631 guard)", async () => {
-  const base = mkdtempSync(join(tmpdir(), "gsd-empty-db-"));
-  // Create a ghost milestone directory (empty — no CONTEXT, no ROADMAP)
-  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
-
-  try {
-    openDatabase(":memory:");
-    invalidateStateCache();
-    await deriveState(base);
-
-    const milestones = getAllMilestones();
-    assert.equal(milestones.length, 0, "ghost milestone should NOT be inserted");
-
-    closeDatabase();
-  } finally {
-    closeDatabase();
-    rmSync(base, { recursive: true, force: true });
-  }
-});
diff --git a/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts b/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts
new file mode 100644
index 000000000..b2dacb555
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts
@@ -0,0 +1,140 @@
+// GSD State Machine Regression Tests — Event Replay & Reconciliation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  updateTaskStatus,
+  insertVerificationEvidence,
+  upsertDecision,
+} from "../gsd-db.ts";
+import { extractEntityKey } from "../workflow-reconcile.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+const MID = "M001";
+const SID = "S01";
+const TID = "T01";
+const TS = new Date().toISOString();
+
+function setupDb(): void {
+  openDatabase(":memory:");
+  insertMilestone({ id: MID, title: "Test Milestone" });
+  insertSlice({ id: SID, milestoneId: MID, title: "Test Slice" });
+  insertTask({ id: TID, sliceId: SID, milestoneId: MID, title: "Test Task" });
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("event-replay-idempotency", () => {
+  beforeEach(() => {
+    setupDb();
+  });
+
+  afterEach(() => {
+    closeDatabase();
+  });
+
+  test("updateTaskStatus is idempotent for complete_task replay", () => {
+    // Simulates replaying a complete_task event twice (e.g. crash recovery)
+    updateTaskStatus(MID, SID, TID, "done", TS);
+    updateTaskStatus(MID, SID, TID, "done", TS);
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after status update");
+    assert.equal(task!.status, "done", "status should be 'done' after double replay");
+  });
+
+  test("updateTaskStatus is idempotent for start_task replay", () => {
+    // Simulates replaying a start_task event twice
+    updateTaskStatus(MID, SID, TID, "in-progress");
+    updateTaskStatus(MID, SID, TID, "in-progress");
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after status update");
+    assert.equal(task!.status, "in-progress", "status should be 'in-progress' after double replay");
+  });
+
+  test("updateTaskStatus for report_blocker does not set blocker_discovered flag (M4)", () => {
+    // M4 finding: report_blocker replay only calls updateTaskStatus("blocked").
+    // The blocker_discovered column is NOT set during replay — this is a known
+    // lossy replay: status is recovered but the blocker flag is not.
+    updateTaskStatus(MID, SID, TID, "blocked");
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after blocked status update");
+    assert.equal(task!.status, "blocked", "status should be 'blocked'");
+    assert.equal(
+      task!.blocker_discovered,
+      false,
+      "blocker_discovered should remain false — report_blocker replay is lossy (M4 finding)",
+    );
+  });
+
+  test("insertVerificationEvidence is NOT idempotent — duplicates accumulate (M5)", () => {
+    // M5 finding: insertVerificationEvidence uses a plain INSERT (no ON CONFLICT),
+    // so replaying the same record_verification event twice produces two rows.
+    // Both calls must succeed without throwing — the duplication is the risk.
+    const evidence = {
+      taskId: TID,
+      sliceId: SID,
+      milestoneId: MID,
+      command: "npm test",
+      exitCode: 0,
+      verdict: "pass",
+      durationMs: 1200,
+    };
+
+    assert.doesNotThrow(
+      () => insertVerificationEvidence(evidence),
+      "first insertVerificationEvidence call should not throw",
+    );
+    assert.doesNotThrow(
+      () => insertVerificationEvidence(evidence),
+      "second insertVerificationEvidence call should not throw — duplicates accumulate silently (M5 finding)",
+    );
+  });
+
+  test("upsertDecision is idempotent via INSERT OR REPLACE", () => {
+    // save_decision replay uses upsertDecision which is INSERT OR REPLACE,
+    // so replaying the same decision id twice overwrites without error.
+    const base = {
+      id: "arch:logging",
+      when_context: "during planning",
+      scope: "arch",
+      decision: "logging",
+      rationale: "structured logs",
+      revisable: "yes" as const,
+      made_by: "agent" as const,
+      superseded_by: null,
+    };
+
+    upsertDecision({ ...base, choice: "structured" });
+    upsertDecision({ ...base, choice: "unstructured" });
+
+    // No error means the second call replaced the first — idempotent at the id level.
+    // The final choice is "unstructured" per INSERT OR REPLACE semantics.
+  });
+
+  test("unknown event commands in replayEvents are silently skipped — extractEntityKey returns null for unknown commands", () => {
+    // replayEvents uses a switch/default that silently skips unrecognised commands.
+    // We verify this via extractEntityKey which follows the same command set.
+    // A future_command not in the switch must return null (not throw).
+    const event = {
+      cmd: "future_command",
+      params: { foo: "bar" },
+      ts: new Date().toISOString(),
+      hash: "0000000000000000",
+      actor: "agent" as const,
+      session_id: "test-session",
+    };
+
+    const key = extractEntityKey(event);
+    assert.equal(key, null, "extractEntityKey should return null for unknown commands");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-context-persist.test.ts b/src/resources/extensions/gsd/tests/forensics-context-persist.test.ts
new file mode 100644
index 000000000..ab6cf91e8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-context-persist.test.ts
@@ -0,0 +1,129 @@
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+/**
+ * Test suite for #2941: Forensics report context lost on follow-up turns.
+ *
+ * The forensics flow sends a one-shot message via sendMessage() with
+ * triggerTurn: true. On follow-up turns, the context is gone because
+ * there's no re-injection mechanism like buildGuidedExecuteContextInjection
+ * provides for task execution.
+ *
+ * Fix: write an active-forensics.json marker when forensics starts, and
+ * have buildBeforeAgentStartResult() re-inject the forensics prompt on
+ * subsequent turns.
+ */
+
+describe("forensics context persistence (#2941)", () => {
+  // ─── Source-level invariant tests ──────────────────────────────────────────
+
+  it("forensics.ts writes active-forensics marker after saving report", () => {
+    const src = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(
+      src.includes("active-forensics.json"),
+      "forensics.ts must reference active-forensics.json marker file",
+    );
+    assert.ok(
+      src.includes("writeForensicsMarker"),
+      "forensics.ts must call writeForensicsMarker to persist session state",
+    );
+  });
+
+  it("system-context.ts checks for active forensics marker in buildBeforeAgentStartResult", () => {
+    const src = readFileSync(join(gsdDir, "bootstrap", "system-context.ts"), "utf-8");
+    assert.ok(
+      src.includes("active-forensics.json"),
+      "system-context.ts must check for active-forensics.json marker",
+    );
+    assert.ok(
+      src.includes("gsd-forensics"),
+      "system-context.ts must inject gsd-forensics customType message",
+    );
+  });
+
+  it("system-context.ts exports clearForensicsMarker for cleanup", () => {
+    const src = readFileSync(join(gsdDir, "bootstrap", "system-context.ts"), "utf-8");
+    assert.ok(
+      src.includes("clearForensicsMarker"),
+      "system-context.ts must export clearForensicsMarker function",
+    );
+  });
+
+  // ─── Functional tests using temp directories ──────────────────────────────
+
+  const tmpBase = join(__dirname, "__tmp_forensics_persist__");
+
+  beforeEach(() => {
+    rmSync(tmpBase, { recursive: true, force: true });
+    mkdirSync(join(tmpBase, ".gsd", "runtime"), { recursive: true });
+    mkdirSync(join(tmpBase, ".gsd", "forensics"), { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tmpBase, { recursive: true, force: true });
+  });
+
+  it("writeForensicsMarker creates marker with reportPath and promptContent", async () => {
+    const { writeForensicsMarker } = await import("../forensics.ts");
+
+    const reportPath = join(tmpBase, ".gsd", "forensics", "report-2026-01-01.md");
+    writeFileSync(reportPath, "# Test Report", "utf-8");
+
+    writeForensicsMarker(tmpBase, reportPath, "Test forensics prompt content");
+
+    const markerPath = join(tmpBase, ".gsd", "runtime", "active-forensics.json");
+    assert.ok(existsSync(markerPath), "marker file must be created");
+
+    const marker = JSON.parse(readFileSync(markerPath, "utf-8"));
+    assert.equal(marker.reportPath, reportPath);
+    assert.equal(marker.promptContent, "Test forensics prompt content");
+    assert.ok(marker.createdAt, "marker must have createdAt timestamp");
+  });
+
+  it("readForensicsMarker returns null when no marker exists", async () => {
+    const { readForensicsMarker } = await import("../forensics.ts");
+
+    const result = readForensicsMarker(join(tmpBase, "nonexistent"));
+    assert.equal(result, null);
+  });
+
+  it("readForensicsMarker returns marker data when file exists", async () => {
+    const { readForensicsMarker } = await import("../forensics.ts");
+
+    const markerPath = join(tmpBase, ".gsd", "runtime", "active-forensics.json");
+    const markerData = {
+      reportPath: "/some/report.md",
+      promptContent: "forensics prompt",
+      createdAt: new Date().toISOString(),
+    };
+    writeFileSync(markerPath, JSON.stringify(markerData), "utf-8");
+
+    const result = readForensicsMarker(tmpBase);
+    assert.ok(result);
+    assert.equal(result.reportPath, "/some/report.md");
+    assert.equal(result.promptContent, "forensics prompt");
+  });
+
+  it("clearForensicsMarker removes the marker file", async () => {
+    const { clearForensicsMarker } = await import("../bootstrap/system-context.ts");
+
+    const markerPath = join(tmpBase, ".gsd", "runtime", "active-forensics.json");
+    writeFileSync(markerPath, JSON.stringify({ reportPath: "/x", promptContent: "y", createdAt: new Date().toISOString() }), "utf-8");
+    assert.ok(existsSync(markerPath), "precondition: marker must exist");
+
+    clearForensicsMarker(tmpBase);
+    assert.ok(!existsSync(markerPath), "marker must be removed after clear");
+  });
+
+  it("clearForensicsMarker is a no-op when no marker exists", async () => {
+    const { clearForensicsMarker } = await import("../bootstrap/system-context.ts");
+    // Should not throw
+    clearForensicsMarker(join(tmpBase, "nonexistent"));
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-db-completion.test.ts b/src/resources/extensions/gsd/tests/forensics-db-completion.test.ts
new file mode 100644
index 000000000..12fcf0bfc
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/forensics-db-completion.test.ts
@@ -0,0 +1,96 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+/**
+ * Tests for #3129: forensics reads DB for completion status instead of legacy file.
+ *
+ * The old loadCompletedKeys() reads completed-units.json which is never populated
+ * during normal auto-mode completion. The DB (milestones/slices/tasks tables) is
+ * the authoritative source for completion status.
+ */
+describe("forensics DB completion status (#3129)", () => {
+  const forensicsSrc = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+  const stateSrc = readFileSync(join(gsdDir, "state.ts"), "utf-8");
+
+  // ── Primary fix: forensics queries DB for completion counts ──────────
+
+  it("ForensicReport has dbCompletionCounts field for DB-sourced completion data", () => {
+    assert.ok(
+      forensicsSrc.includes("dbCompletionCounts"),
+      "ForensicReport must include dbCompletionCounts field for DB-sourced completion data",
+    );
+  });
+
+  it("buildForensicReport queries DB for completed milestones, slices, and tasks", () => {
+    assert.ok(
+      forensicsSrc.includes("getDbCompletionCounts"),
+      "buildForensicReport must call getDbCompletionCounts to query DB completion status",
+    );
+  });
+
+  it("getDbCompletionCounts checks isDbAvailable before querying", () => {
+    assert.ok(
+      forensicsSrc.includes("isDbAvailable"),
+      "getDbCompletionCounts must check isDbAvailable() before querying the DB",
+    );
+  });
+
+  it("getDbCompletionCounts queries getAllMilestones for milestone completion", () => {
+    assert.ok(
+      forensicsSrc.includes("getAllMilestones"),
+      "getDbCompletionCounts must use getAllMilestones() to count completed milestones",
+    );
+  });
+
+  it("completion counting uses isClosedStatus for consistent status checks", () => {
+    assert.ok(
+      forensicsSrc.includes("isClosedStatus"),
+      "forensics must use isClosedStatus() for consistent status checks",
+    );
+  });
+
+  it("report rendering shows DB completion counts instead of just legacy key count", () => {
+    assert.ok(
+      forensicsSrc.includes("milestones complete"),
+      "report must show '__ milestones complete' from DB data",
+    );
+    assert.ok(
+      forensicsSrc.includes("slices complete"),
+      "report must show '__ slices complete' from DB data",
+    );
+    assert.ok(
+      forensicsSrc.includes("tasks complete"),
+      "report must show '__ tasks complete' from DB data",
+    );
+  });
+
+  it("falls back to completed-units.json only when DB is unavailable", () => {
+    // loadCompletedKeys should still exist as fallback
+    assert.ok(
+      forensicsSrc.includes("loadCompletedKeys"),
+      "loadCompletedKeys must still exist as fallback for non-DB projects",
+    );
+    // But the report should prefer DB counts
+    assert.ok(
+      forensicsSrc.includes("dbCompletionCounts"),
+      "report must prefer dbCompletionCounts over legacy completedKeys",
+    );
+  });
+
+  // ── Secondary fix: STATE.md label when all milestones complete ───────
+
+  it("state.ts returns null activeMilestone when all milestones are complete", () => {
+    // When phase is "complete", activeMilestone should be null, not the last milestone
+    // The last completed milestone should be in a separate field
+    assert.ok(
+      stateSrc.includes("lastCompletedMilestone"),
+      "GSDState must have lastCompletedMilestone field for the final milestone when phase=complete",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/forensics-dedup.test.ts b/src/resources/extensions/gsd/tests/forensics-dedup.test.ts
index b08bd95a2..d407aa328 100644
--- a/src/resources/extensions/gsd/tests/forensics-dedup.test.ts
+++ b/src/resources/extensions/gsd/tests/forensics-dedup.test.ts
@@ -46,3 +46,34 @@ describe("forensics dedup (#2096)", () => {
       "opt-in notice must mention duplicate detection");
   });
 });
+
+describe("forensics dedup ordering (#2704)", () => {
+  it("{{dedupSection}} appears before Investigation Protocol in the prompt template", () => {
+    const prompt = readFileSync(join(gsdDir, "prompts", "forensics.md"), "utf-8");
+    const dedupIndex = prompt.indexOf("{{dedupSection}}");
+    const investigationIndex = prompt.indexOf("## Investigation Protocol");
+    assert.ok(dedupIndex !== -1, "prompt must contain {{dedupSection}}");
+    assert.ok(investigationIndex !== -1, "prompt must contain ## Investigation Protocol");
+    assert.ok(
+      dedupIndex < investigationIndex,
+      `{{dedupSection}} (index ${dedupIndex}) must appear before Investigation Protocol (index ${investigationIndex}) — dedup should run before expensive investigation to avoid wasting tokens on already-fixed bugs`,
+    );
+  });
+
+  it("DEDUP_PROMPT_SECTION contains a decision gate to skip investigation", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    // The dedup section must instruct the agent to skip investigation when a match is found
+    assert.ok(
+      source.includes("Skip full investigation") || source.includes("skip full investigation") || source.includes("Skip investigation"),
+      "DEDUP_PROMPT_SECTION must contain a decision gate telling the agent to skip full investigation when a duplicate is found",
+    );
+  });
+
+  it("DEDUP_PROMPT_SECTION heading reflects pre-investigation role", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(
+      source.includes("Pre-Investigation") || source.includes("pre-investigation"),
+      "DEDUP_PROMPT_SECTION heading must indicate it runs before investigation, not just before issue creation",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/gsd-tools.test.ts b/src/resources/extensions/gsd/tests/gsd-tools.test.ts
index ef1dedd11..e86f69b98 100644
--- a/src/resources/extensions/gsd/tests/gsd-tools.test.ts
+++ b/src/resources/extensions/gsd/tests/gsd-tools.test.ts
@@ -21,8 +21,10 @@ import {
 import {
   saveDecisionToDb,
   updateRequirementInDb,
+  saveRequirementToDb,
   saveArtifactToDb,
   nextDecisionId,
+  nextRequirementId,
 } from '../db-writer.ts';
 import type { Requirement } from '../types.ts';
 
@@ -160,18 +162,11 @@ describe('gsd-tools', () => {
       assert.ok(mdContent.includes('R001'), 'REQUIREMENTS.md should contain R001');
       assert.ok(mdContent.includes('validated'), 'REQUIREMENTS.md should reflect updated status');
 
-      // Updating non-existent requirement throws
-      let threwForMissing = false;
-      try {
-        await updateRequirementInDb('R999', { status: 'deferred' }, tmpDir);
-      } catch (err) {
-        threwForMissing = true;
-        assert.ok(
-          (err as Error).message.includes('R999'),
-          'Error should mention the missing requirement ID',
-        );
-      }
-      assert.ok(threwForMissing, 'Should throw for non-existent requirement');
+      // Updating non-existent requirement upserts (creates it) — see #2919
+      await updateRequirementInDb('R999', { status: 'deferred' }, tmpDir);
+      const upserted = getRequirementById('R999');
+      assert.ok(upserted !== null, 'R999 should be created by upsert');
+      assert.deepStrictEqual(upserted!.status, 'deferred', 'Upserted requirement should have the updated status');
 
       closeDatabase();
     } finally {
@@ -263,6 +258,124 @@ describe('gsd-tools', () => {
     assert.deepStrictEqual(fallbackId, 'D001', 'nextDecisionId should return D001 when DB unavailable');
   });
 
+  test('gsd_requirement_save creates new requirement', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+
+      // (a) saveRequirementToDb creates a new requirement with auto-assigned ID
+      const result = await saveRequirementToDb(
+        {
+          class: 'functional',
+          status: 'active',
+          description: 'Must support dark mode',
+          why: 'Accessibility requirement',
+          source: 'user-research',
+        },
+        tmpDir,
+      );
+
+      assert.deepStrictEqual(result.id, 'R001', 'First requirement should be R001');
+
+      // Verify DB row exists
+      const row = getRequirementById('R001');
+      assert.ok(row !== null, 'Requirement R001 should exist in DB');
+      assert.deepStrictEqual(row!.class, 'functional', 'Class should match');
+      assert.deepStrictEqual(row!.description, 'Must support dark mode', 'Description should match');
+      assert.deepStrictEqual(row!.status, 'active', 'Status should match');
+
+      // Verify REQUIREMENTS.md was generated
+      const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
+      assert.ok(fs.existsSync(mdPath), 'REQUIREMENTS.md should be created');
+      const mdContent = fs.readFileSync(mdPath, 'utf-8');
+      assert.ok(mdContent.includes('R001'), 'REQUIREMENTS.md should contain R001');
+      assert.ok(mdContent.includes('dark mode'), 'REQUIREMENTS.md should contain description');
+
+      // (b) Auto-assigns correct next ID
+      const result2 = await saveRequirementToDb(
+        {
+          class: 'non-functional',
+          status: 'active',
+          description: 'Must load in under 2 seconds',
+          why: 'Performance SLA',
+          source: 'design',
+        },
+        tmpDir,
+      );
+      assert.deepStrictEqual(result2.id, 'R002', 'Second requirement should be R002');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('nextRequirementId computes correct next ID', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+
+      // No requirements yet
+      const id1 = await nextRequirementId();
+      assert.deepStrictEqual(id1, 'R001', 'Should return R001 when no requirements exist');
+
+      // Add one requirement
+      upsertRequirement({
+        id: 'R001',
+        class: 'functional',
+        status: 'active',
+        description: 'Test',
+        why: '',
+        source: '',
+        primary_owner: '',
+        supporting_slices: '',
+        validation: '',
+        notes: '',
+        full_content: '',
+        superseded_by: null,
+      });
+
+      const id2 = await nextRequirementId();
+      assert.deepStrictEqual(id2, 'R002', 'Should return R002 after R001 exists');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
+
+  test('gsd_requirement_update upserts when requirement not in DB', async () => {
+    const tmpDir = makeTmpDir();
+    try {
+      const dbPath = path.join(tmpDir, '.gsd', 'gsd.db');
+      openDatabase(dbPath);
+
+      // Requirement R025 does NOT exist in DB — simulates the bug scenario
+      // where requirements exist in REQUIREMENTS.md but were never imported.
+      // updateRequirementInDb should create the row instead of throwing.
+      await updateRequirementInDb(
+        'R025',
+        { status: 'validated', validation: 'Integration tests pass' },
+        tmpDir,
+      );
+
+      const created = getRequirementById('R025');
+      assert.ok(created !== null, 'R025 should be created by upsert');
+      assert.deepStrictEqual(created!.status, 'validated', 'Status should be set');
+      assert.deepStrictEqual(created!.validation, 'Integration tests pass', 'Validation should be set');
+
+      // Verify REQUIREMENTS.md was generated
+      const mdPath = path.join(tmpDir, '.gsd', 'REQUIREMENTS.md');
+      assert.ok(fs.existsSync(mdPath), 'REQUIREMENTS.md should be created');
+
+      closeDatabase();
+    } finally {
+      cleanupDir(tmpDir);
+    }
+  });
+
   test('Tool result format', async () => {
     const tmpDir = makeTmpDir();
     try {
diff --git a/src/resources/extensions/gsd/tests/gsdroot-worktree-detection.test.ts b/src/resources/extensions/gsd/tests/gsdroot-worktree-detection.test.ts
new file mode 100644
index 000000000..542702f2e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/gsdroot-worktree-detection.test.ts
@@ -0,0 +1,164 @@
+/**
+ * gsdroot-worktree-detection.test.ts — Regression test for #2594.
+ *
+ * gsdRoot() must return the worktree's own .gsd directory when the basePath
+ * is inside a .gsd/worktrees/<name>/ structure, not walk up to the project
+ * root's .gsd via the git-root probe.
+ *
+ * The bug: when a git worktree lives at /project/.gsd/worktrees/M008/,
+ * probeGsdRoot() runs `git rev-parse --show-toplevel` which can return the
+ * main project root (not the worktree root) depending on git version and
+ * worktree setup. The walk-up then finds /project/.gsd and returns that
+ * instead of the worktree's own .gsd path.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { mkdtempSync, realpathSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { spawnSync } from "node:child_process";
+
+import { gsdRoot, _clearGsdRootCache } from "../paths.ts";
+
+describe("gsdRoot() worktree detection (#2594)", () => {
+  let projectRoot: string;
+  let projectGsd: string;
+
+  beforeEach(() => {
+    _clearGsdRootCache();
+    // Create a temporary project with a git repo to simulate real conditions.
+    // realpathSync handles macOS /tmp -> /private/tmp.
+    projectRoot = realpathSync(mkdtempSync(join(tmpdir(), "gsdroot-wt-")));
+    projectGsd = join(projectRoot, ".gsd");
+    mkdirSync(projectGsd, { recursive: true });
+
+    // Initialize a git repo in the project root so git rev-parse works
+    spawnSync("git", ["init", "--initial-branch=main"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+    spawnSync("git", ["config", "user.email", "test@test.com"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+    spawnSync("git", ["config", "user.name", "Test"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+    // Create an initial commit so we have a HEAD
+    writeFileSync(join(projectRoot, "README.md"), "# Test");
+    spawnSync("git", ["add", "."], { cwd: projectRoot, stdio: "ignore" });
+    spawnSync("git", ["commit", "-m", "init"], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+  });
+
+  afterEach(() => {
+    _clearGsdRootCache();
+    rmSync(projectRoot, { recursive: true, force: true });
+  });
+
+  test("returns worktree .gsd when basePath is a worktree with its own .gsd (fast path)", () => {
+    // Simulates a worktree that already had copyPlanningArtifacts() run,
+    // so it has its own .gsd/ directory.
+    const worktreeBase = join(projectGsd, "worktrees", "M008");
+    const worktreeGsd = join(worktreeBase, ".gsd");
+    mkdirSync(worktreeGsd, { recursive: true });
+
+    const result = gsdRoot(worktreeBase);
+    assert.equal(
+      result,
+      worktreeGsd,
+      `Expected worktree .gsd (${worktreeGsd}), got ${result}. ` +
+        "gsdRoot() should use the fast path for an existing worktree .gsd.",
+    );
+  });
+
+  test("returns worktree .gsd path (not project root .gsd) when worktree .gsd does not exist yet", () => {
+    // This is the core #2594 bug: the worktree directory exists but its .gsd
+    // subdirectory hasn't been created yet. Without the fix, probeGsdRoot()
+    // walks up from the worktree path, finds /project/.gsd, and returns it.
+    // With the fix, it detects the .gsd/worktrees/<name>/ pattern and returns
+    // the worktree-local .gsd path as the creation fallback.
+    const worktreeBase = join(projectGsd, "worktrees", "M008");
+    mkdirSync(worktreeBase, { recursive: true });
+    // NOTE: no .gsd/ inside worktreeBase
+
+    const result = gsdRoot(worktreeBase);
+    const expected = join(worktreeBase, ".gsd");
+
+    // Without the fix, this returns projectGsd (/project/.gsd) because the
+    // walk-up from worktreeBase finds it. With the fix, it returns the
+    // worktree-local path.
+    assert.notEqual(
+      result,
+      projectGsd,
+      "gsdRoot() must NOT return the project root .gsd when basePath is inside .gsd/worktrees/",
+    );
+    assert.equal(
+      result,
+      expected,
+      `Expected worktree-local .gsd (${expected}), got ${result}.`,
+    );
+  });
+
+  test("returns worktree .gsd when basePath is a real git worktree inside .gsd/worktrees/", () => {
+    // Create a real git worktree at .gsd/worktrees/M010
+    const worktreeName = "M010";
+    const worktreeBase = join(projectGsd, "worktrees", worktreeName);
+
+    // Use git worktree add to create a real worktree
+    const result = spawnSync(
+      "git",
+      ["worktree", "add", "-b", `milestone/${worktreeName}`, worktreeBase],
+      { cwd: projectRoot, encoding: "utf-8" },
+    );
+
+    if (result.status !== 0) {
+      // If git worktree add fails, skip the test gracefully
+      assert.ok(true, "Skipped: git worktree add not available");
+      return;
+    }
+
+    // The real git worktree exists at worktreeBase but has NO .gsd/ subdir yet
+    const gsdResult = gsdRoot(worktreeBase);
+    const expected = join(worktreeBase, ".gsd");
+
+    assert.notEqual(
+      gsdResult,
+      projectGsd,
+      "gsdRoot() must NOT escape to project root .gsd from inside a git worktree",
+    );
+    assert.equal(
+      gsdResult,
+      expected,
+      `Expected worktree-local .gsd (${expected}), got ${gsdResult}`,
+    );
+
+    // Cleanup worktree
+    spawnSync("git", ["worktree", "remove", "--force", worktreeBase], {
+      cwd: projectRoot,
+      stdio: "ignore",
+    });
+  });
+
+  test("still returns project .gsd for normal (non-worktree) basePath", () => {
+    const result = gsdRoot(projectRoot);
+    assert.equal(result, projectGsd);
+  });
+
+  test("still returns project .gsd for a subdirectory of the project", () => {
+    const subdir = join(projectRoot, "src", "lib");
+    mkdirSync(subdir, { recursive: true });
+
+    const result = gsdRoot(subdir);
+    assert.equal(
+      result,
+      projectGsd,
+      "Non-worktree subdirectories should still resolve to project .gsd",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/guided-flow-dynamic-routing.test.ts b/src/resources/extensions/gsd/tests/guided-flow-dynamic-routing.test.ts
new file mode 100644
index 000000000..d9b135426
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/guided-flow-dynamic-routing.test.ts
@@ -0,0 +1,135 @@
+/**
+ * Guided-flow dynamic routing — regression test for #2958.
+ *
+ * Verifies that dispatchWorkflow() routes through the dynamic routing pipeline
+ * (selectAndApplyModel from auto-model-selection.ts) instead of bypassing it
+ * with a direct call to resolveModelWithFallbacksForUnit.
+ *
+ * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+function readSrc(file: string): string {
+  return readFileSync(join(gsdDir, file), "utf-8");
+}
+
+const guidedFlowSrc = readSrc("guided-flow.ts");
+
+// ═══════════════════════════════════════════════════════════════════════════
+// #2958: dispatchWorkflow must route through dynamic routing pipeline
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("#2958: guided-flow imports selectAndApplyModel from auto-model-selection", () => {
+  assert.ok(
+    guidedFlowSrc.includes("selectAndApplyModel"),
+    "guided-flow.ts must import and use selectAndApplyModel from auto-model-selection.ts",
+  );
+});
+
+test("#2958: dispatchWorkflow does not call resolveModelWithFallbacksForUnit directly", () => {
+  // Extract the dispatchWorkflow function body
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  assert.ok(fnStart !== -1, "dispatchWorkflow function not found");
+
+  // Find the function body by tracking brace depth
+  const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+  let depth = 1;
+  let pos = openBrace + 1;
+  while (depth > 0 && pos < guidedFlowSrc.length) {
+    if (guidedFlowSrc[pos] === "{") depth++;
+    else if (guidedFlowSrc[pos] === "}") depth--;
+    pos++;
+  }
+  const fnBody = guidedFlowSrc.slice(openBrace, pos);
+
+  assert.ok(
+    !fnBody.includes("resolveModelWithFallbacksForUnit"),
+    "dispatchWorkflow must NOT call resolveModelWithFallbacksForUnit directly — " +
+    "it must route through selectAndApplyModel for dynamic routing support (#2958)",
+  );
+});
+
+test("#2958: dispatchWorkflow calls selectAndApplyModel for model selection", () => {
+  // Extract the dispatchWorkflow function body
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  assert.ok(fnStart !== -1, "dispatchWorkflow function not found");
+
+  const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+  let depth = 1;
+  let pos = openBrace + 1;
+  while (depth > 0 && pos < guidedFlowSrc.length) {
+    if (guidedFlowSrc[pos] === "{") depth++;
+    else if (guidedFlowSrc[pos] === "}") depth--;
+    pos++;
+  }
+  const fnBody = guidedFlowSrc.slice(openBrace, pos);
+
+  assert.ok(
+    fnBody.includes("selectAndApplyModel"),
+    "dispatchWorkflow must call selectAndApplyModel to route through the dynamic routing pipeline (#2958)",
+  );
+});
+
+test("#2958: dispatchWorkflow does not use resolveAvailableModel inline", () => {
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  assert.ok(fnStart !== -1, "dispatchWorkflow function not found");
+
+  const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+  let depth = 1;
+  let pos = openBrace + 1;
+  while (depth > 0 && pos < guidedFlowSrc.length) {
+    if (guidedFlowSrc[pos] === "{") depth++;
+    else if (guidedFlowSrc[pos] === "}") depth--;
+    pos++;
+  }
+  const fnBody = guidedFlowSrc.slice(openBrace, pos);
+
+  assert.ok(
+    !fnBody.includes("resolveAvailableModel"),
+    "dispatchWorkflow must NOT use resolveAvailableModel inline — " +
+    "model resolution is handled by selectAndApplyModel (#2958)",
+  );
+});
+
+test("#2958: guided-flow does not import resolveModelWithFallbacksForUnit", () => {
+  // The import should be removed since dispatchWorkflow was the only consumer
+  // Check if resolveModelWithFallbacksForUnit is still used elsewhere in the file
+  const fnStart = guidedFlowSrc.indexOf("async function dispatchWorkflow(");
+  const beforeDispatch = guidedFlowSrc.slice(0, fnStart);
+  const afterFnEnd = (() => {
+    const openBrace = guidedFlowSrc.indexOf("{", fnStart);
+    let depth = 1;
+    let p = openBrace + 1;
+    while (depth > 0 && p < guidedFlowSrc.length) {
+      if (guidedFlowSrc[p] === "{") depth++;
+      else if (guidedFlowSrc[p] === "}") depth--;
+      p++;
+    }
+    return guidedFlowSrc.slice(p);
+  })();
+
+  // If resolveModelWithFallbacksForUnit is not used outside dispatchWorkflow,
+  // the import should be removed
+  const usedOutside = beforeDispatch.includes("resolveModelWithFallbacksForUnit(")
+    || afterFnEnd.includes("resolveModelWithFallbacksForUnit(");
+
+  if (!usedOutside) {
+    // Verify the import line was cleaned up
+    const importLines = guidedFlowSrc.split("\n").filter(l =>
+      l.includes("import") && l.includes("resolveModelWithFallbacksForUnit"),
+    );
+    assert.equal(
+      importLines.length,
+      0,
+      "resolveModelWithFallbacksForUnit import should be removed when no longer used outside dispatchWorkflow",
+    );
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/guided-flow-session-isolation.test.ts b/src/resources/extensions/gsd/tests/guided-flow-session-isolation.test.ts
new file mode 100644
index 000000000..33e95e007
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/guided-flow-session-isolation.test.ts
@@ -0,0 +1,97 @@
+/**
+ * Regression test for #2985 Bugs 3 & 4:
+ *   Bug 3 — module-level pendingAutoStart singleton clobbers concurrent sessions.
+ *   Bug 4 — getDiscussionMilestoneId() returns wrong project's milestone under concurrency.
+ *
+ * pendingAutoStart must be keyed by basePath so concurrent discuss sessions
+ * in different projects are independent.  getDiscussionMilestoneId() must accept
+ * a basePath parameter to perform a keyed lookup.
+ */
+
+import { describe, test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  getDiscussionMilestoneId,
+  setPendingAutoStart,
+  clearPendingAutoStart,
+} from "../guided-flow.ts";
+
+// ─── Tests ─────────────────────────────────────────────────────────────────
+
+describe("#2985 Bug 3 — concurrent discuss sessions must be independent", () => {
+  beforeEach(() => {
+    clearPendingAutoStart();
+  });
+
+  test("second session does not clobber first session's pending auto-start", () => {
+    // Simulate two concurrent discuss sessions for different projects
+    const projectA = "/projects/alpha";
+    const projectB = "/projects/beta";
+
+    setPendingAutoStart(projectA, {
+      basePath: projectA,
+      milestoneId: "M001-aaa111",
+    });
+
+    setPendingAutoStart(projectB, {
+      basePath: projectB,
+      milestoneId: "M002-bbb222",
+    });
+
+    // Both sessions should be retrievable
+    const milestoneA = getDiscussionMilestoneId(projectA);
+    const milestoneB = getDiscussionMilestoneId(projectB);
+
+    assert.equal(milestoneA, "M001-aaa111", "projectA's milestone should be preserved");
+    assert.equal(milestoneB, "M002-bbb222", "projectB's milestone should be preserved");
+  });
+
+  test("clearing one session does not affect the other", () => {
+    const projectA = "/projects/alpha";
+    const projectB = "/projects/beta";
+
+    setPendingAutoStart(projectA, { basePath: projectA, milestoneId: "M001-aaa111" });
+    setPendingAutoStart(projectB, { basePath: projectB, milestoneId: "M002-bbb222" });
+
+    // Clear only projectA
+    clearPendingAutoStart(projectA);
+
+    assert.equal(getDiscussionMilestoneId(projectA), null, "projectA should be cleared");
+    assert.equal(getDiscussionMilestoneId(projectB), "M002-bbb222", "projectB should survive");
+  });
+});
+
+describe("#2985 Bug 4 — getDiscussionMilestoneId must be keyed by basePath", () => {
+  beforeEach(() => {
+    clearPendingAutoStart();
+  });
+
+  test("getDiscussionMilestoneId(basePath) returns correct milestone for each project", () => {
+    setPendingAutoStart("/proj/a", { basePath: "/proj/a", milestoneId: "M001" });
+    setPendingAutoStart("/proj/b", { basePath: "/proj/b", milestoneId: "M002" });
+
+    assert.equal(getDiscussionMilestoneId("/proj/a"), "M001");
+    assert.equal(getDiscussionMilestoneId("/proj/b"), "M002");
+    assert.equal(getDiscussionMilestoneId("/proj/unknown"), null);
+  });
+
+  test("getDiscussionMilestoneId() without basePath returns null when multiple sessions exist", () => {
+    setPendingAutoStart("/proj/a", { basePath: "/proj/a", milestoneId: "M001" });
+    setPendingAutoStart("/proj/b", { basePath: "/proj/b", milestoneId: "M002" });
+
+    // Without a key, the function should not blindly return the first entry
+    const result = getDiscussionMilestoneId();
+    // When there's ambiguity (multiple sessions), it should return null
+    // to force callers to be explicit
+    assert.equal(result, null, "should not return arbitrary milestone when multiple sessions exist");
+  });
+
+  test("getDiscussionMilestoneId() without basePath returns the milestone when only one session", () => {
+    setPendingAutoStart("/proj/a", { basePath: "/proj/a", milestoneId: "M001" });
+
+    // With only one session, backward compat — return it
+    const result = getDiscussionMilestoneId();
+    assert.equal(result, "M001", "should return the only active milestone for backward compat");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/health-widget.test.ts b/src/resources/extensions/gsd/tests/health-widget.test.ts
index b918e8b54..17893a5b5 100644
--- a/src/resources/extensions/gsd/tests/health-widget.test.ts
+++ b/src/resources/extensions/gsd/tests/health-widget.test.ts
@@ -6,6 +6,7 @@ import { tmpdir } from "node:os";
 import {
   buildHealthLines,
   detectHealthWidgetProjectState,
+  formatRelativeTime,
   type HealthWidgetData,
 } from "../health-widget-core.ts";
 
@@ -34,6 +35,8 @@ function activeData(overrides: Partial<HealthWidgetData> = {}): HealthWidgetData
     providerIssue: null,
     environmentErrorCount: 0,
     environmentWarningCount: 0,
+    lastCommitEpoch: null,
+    lastCommitMessage: null,
     lastRefreshed: Date.now(),
     ...overrides,
   };
@@ -98,6 +101,70 @@ test("buildHealthLines: active state with issues reports issue summary", (t) =>
   assert.match(lines[0]!, /Env: 1 error/);
 });
 
+// ── Last commit display ──────────────────────────────────────────────────
+
+test("buildHealthLines: shows last commit with relative time and message", (t) => {
+  const epoch = Math.floor(Date.now() / 1000) - 300; // 5 minutes ago
+  const lines = buildHealthLines(activeData({
+    lastCommitEpoch: epoch,
+    lastCommitMessage: "feat(widget): add health display",
+  }));
+  assert.equal(lines.length, 1);
+  assert.match(lines[0]!, /Last commit: 5m ago/);
+  assert.match(lines[0]!, /feat\(widget\): add health display/);
+});
+
+test("buildHealthLines: truncates long commit messages", (t) => {
+  const epoch = Math.floor(Date.now() / 1000) - 60;
+  const longMsg = "a".repeat(80);
+  const lines = buildHealthLines(activeData({
+    lastCommitEpoch: epoch,
+    lastCommitMessage: longMsg,
+  }));
+  assert.equal(lines.length, 1);
+  assert.match(lines[0]!, /a{49}…/);
+  assert.ok(!lines[0]!.includes("a".repeat(51)), "message is truncated");
+});
+
+test("buildHealthLines: no last commit section when epoch is null", (t) => {
+  const lines = buildHealthLines(activeData({ lastCommitEpoch: null }));
+  assert.equal(lines.length, 1);
+  assert.ok(!lines[0]!.includes("Last commit"), "no last commit when null");
+});
+
+test("buildHealthLines: last commit without message shows only time", (t) => {
+  const epoch = Math.floor(Date.now() / 1000) - 3600; // 1 hour ago
+  const lines = buildHealthLines(activeData({
+    lastCommitEpoch: epoch,
+    lastCommitMessage: null,
+  }));
+  assert.equal(lines.length, 1);
+  assert.match(lines[0]!, /Last commit: 1h ago/);
+  assert.ok(!lines[0]!.includes(" — "), "no dash separator when no message");
+});
+
+// ── formatRelativeTime ───────────────────────────────────────────────────
+
+test("formatRelativeTime: just now for <60s", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 30;
+  assert.equal(formatRelativeTime(epoch), "just now");
+});
+
+test("formatRelativeTime: minutes", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 300;
+  assert.equal(formatRelativeTime(epoch), "5m ago");
+});
+
+test("formatRelativeTime: hours", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 7200;
+  assert.equal(formatRelativeTime(epoch), "2h ago");
+});
+
+test("formatRelativeTime: days", () => {
+  const epoch = Math.floor(Date.now() / 1000) - 172800;
+  assert.equal(formatRelativeTime(epoch), "2d ago");
+});
+
 test("detectHealthWidgetProjectState: metrics file alone does not imply project", (t) => {
   const dir = makeTempDir("metrics-only");
   t.after(() => { cleanup(dir); });
diff --git a/src/resources/extensions/gsd/tests/hook-key-parsing.test.ts b/src/resources/extensions/gsd/tests/hook-key-parsing.test.ts
new file mode 100644
index 000000000..42424ad50
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/hook-key-parsing.test.ts
@@ -0,0 +1,107 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const gsdDir = join(__dirname, "..");
+
+/**
+ * Regression tests for #2826: hook/* completed-unit keys were parsed
+ * incorrectly by forensics + doctor, causing false-positive missing-artifact
+ * errors for all hook units.
+ *
+ * The root cause: `key.indexOf("/")` splits "hook/telegram-progress/M007/S01"
+ * into unitType="hook" + unitId="telegram-progress/M007/S01" instead of
+ * unitType="hook/telegram-progress" + unitId="M007/S01".
+ */
+
+describe("splitCompletedKey (#2826)", () => {
+  it("is exported from forensics.ts", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    assert.ok(
+      source.includes("export function splitCompletedKey"),
+      "forensics.ts must export splitCompletedKey helper",
+    );
+  });
+
+  it("splits simple unit types correctly", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    const result = splitCompletedKey("execute-task/M007/S01/T01");
+    assert.deepStrictEqual(result, {
+      unitType: "execute-task",
+      unitId: "M007/S01/T01",
+    });
+  });
+
+  it("splits hook unit types preserving the compound hook/<hookName> prefix", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    const result = splitCompletedKey("hook/telegram-progress/M007/S01");
+    assert.deepStrictEqual(result, {
+      unitType: "hook/telegram-progress",
+      unitId: "M007/S01",
+    });
+  });
+
+  it("splits hook unit types with task-level unitId", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    const result = splitCompletedKey("hook/telegram-progress/M007/S02/T01");
+    assert.deepStrictEqual(result, {
+      unitType: "hook/telegram-progress",
+      unitId: "M007/S02/T01",
+    });
+  });
+
+  it("returns null for malformed keys without a slash", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    assert.strictEqual(splitCompletedKey("noslash"), null);
+  });
+
+  it("returns null for malformed hook keys with only 'hook/' and no more segments", async () => {
+    const { splitCompletedKey } = await import("../forensics.ts");
+    // "hook/someName" has no unitId segment after the hook name
+    assert.strictEqual(splitCompletedKey("hook/someName"), null);
+  });
+});
+
+describe("forensics detectMissingArtifacts uses splitCompletedKey (#2826)", () => {
+  it("does not use indexOf for key splitting", () => {
+    const source = readFileSync(join(gsdDir, "forensics.ts"), "utf-8");
+    // Extract only the detectMissingArtifacts function body
+    const fnStart = source.indexOf("function detectMissingArtifacts");
+    assert.ok(fnStart !== -1, "detectMissingArtifacts must exist");
+    const fnBody = source.slice(fnStart, source.indexOf("\n}\n", fnStart) + 3);
+
+    assert.ok(
+      !fnBody.includes('key.indexOf("/")'),
+      "detectMissingArtifacts must not use key.indexOf('/') — use splitCompletedKey instead",
+    );
+    assert.ok(
+      fnBody.includes("splitCompletedKey"),
+      "detectMissingArtifacts must use splitCompletedKey helper",
+    );
+  });
+});
+
+describe("doctor-runtime-checks uses splitCompletedKey (#2826)", () => {
+  it("does not use indexOf for key splitting in orphaned-key check", () => {
+    const source = readFileSync(
+      join(gsdDir, "doctor-runtime-checks.ts"),
+      "utf-8",
+    );
+    // Find the orphaned completed-units section
+    const sectionStart = source.indexOf("Orphaned completed-units");
+    assert.ok(sectionStart !== -1, "orphaned completed-units section must exist");
+    const sectionBody = source.slice(sectionStart, source.indexOf("} catch", sectionStart));
+
+    assert.ok(
+      !sectionBody.includes('key.indexOf("/")'),
+      "doctor orphaned-key check must not use key.indexOf('/') — use splitCompletedKey instead",
+    );
+    assert.ok(
+      sectionBody.includes("splitCompletedKey"),
+      "doctor orphaned-key check must use splitCompletedKey helper",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts b/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts
index 8aef15b20..65bb58e5b 100644
--- a/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts
@@ -111,7 +111,51 @@ test("resolveExpectedArtifactPath returns correct path for all slice-level types
 
   const uatResult = resolveExpectedArtifactPath("run-uat", "M001/S01", base);
   assert.ok(uatResult);
-  assert.ok(uatResult!.includes("UAT"));
+  assert.ok(uatResult!.includes("ASSESSMENT"));
+});
+
+// ─── run-uat artifact path contract (#2873) ──────────────────────────────
+
+test("resolveExpectedArtifactPath for run-uat returns ASSESSMENT path, not UAT (#2873)", (t) => {
+  // The run-uat prompt instructs the agent to call gsd_summary_save with
+  // artifact_type: "ASSESSMENT", which writes S##-ASSESSMENT.md. The artifact
+  // verification path must match — otherwise verification fails and auto-mode
+  // retries the unit in an infinite loop.
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const result = resolveExpectedArtifactPath("run-uat", "M001/S01", base);
+  assert.ok(result, "run-uat should resolve to a non-null artifact path");
+  assert.ok(
+    result!.endsWith("S01-ASSESSMENT.md"),
+    `run-uat artifact path should end with S01-ASSESSMENT.md, got: ${result}`,
+  );
+});
+
+test("diagnoseExpectedArtifact for run-uat references ASSESSMENT (#2873)", (t) => {
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  const diag = diagnoseExpectedArtifact("run-uat", "M001/S01", base);
+  assert.ok(diag, "run-uat should have a diagnostic message");
+  assert.ok(
+    diag!.includes("ASSESSMENT"),
+    `run-uat diagnostic should reference ASSESSMENT, got: ${diag}`,
+  );
+});
+
+test("verifyExpectedArtifact passes for run-uat when ASSESSMENT file exists (#2873)", (t) => {
+  // Regression test: run-uat writes S##-ASSESSMENT.md via gsd_summary_save,
+  // but verification looked for S##-UAT.md, causing false stuck retries.
+  const base = makeTmpBase();
+  t.after(() => cleanup(base));
+
+  // Write the ASSESSMENT file (what gsd_summary_save actually produces)
+  const assessPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-ASSESSMENT.md");
+  writeFileSync(assessPath, "---\nverdict: PASS\n---\n# UAT Assessment\n");
+
+  const verified = verifyExpectedArtifact("run-uat", "M001/S01", base);
+  assert.ok(verified, "verifyExpectedArtifact should pass when ASSESSMENT file exists");
 });
 
 // ─── diagnoseExpectedArtifact ─────────────────────────────────────────────
@@ -697,6 +741,72 @@ test("verifyExpectedArtifact complete-milestone fails with only .gsd/ files (#17
   assert.equal(result, false, "complete-milestone should fail verification when only .gsd/ files present");
 });
 
+// ─── reconcileMergeState: silent nativeCommit failure (#2542) ─────────────
+
+import { reconcileMergeState } from "../../auto-recovery.ts";
+import { chmodSync } from "node:fs";
+
+function makeMockCtx(): { ctx: any; notifications: Array<{ msg: string; level: string }> } {
+  const notifications: Array<{ msg: string; level: string }> = [];
+  const ctx = {
+    ui: {
+      notify(msg: string, level: string) {
+        notifications.push({ msg, level });
+      },
+    },
+  };
+  return { ctx, notifications };
+}
+
+test("reconcileMergeState returns false and notifies error when nativeCommit fails (#2542)", (t) => {
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  // Create a second branch with a commit, then start a merge on main
+  execFileSync("git", ["checkout", "-b", "feature"], { cwd: base, stdio: "ignore" });
+  writeFileSync(join(base, "feature.txt"), "feature content");
+  execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "add feature"], { cwd: base, stdio: "ignore" });
+  execFileSync("git", ["checkout", "main"], { cwd: base, stdio: "ignore" });
+
+  // Start merge (no conflicts — fast path with MERGE_HEAD)
+  execFileSync("git", ["merge", "--no-ff", "--no-commit", "feature"], { cwd: base, stdio: "ignore" });
+
+  // Verify MERGE_HEAD exists
+  assert.ok(existsSync(join(base, ".git", "MERGE_HEAD")), "MERGE_HEAD should exist");
+
+  // Make .git/objects read-only so git cannot write the commit object,
+  // causing nativeCommit to throw a non-"nothing to commit" error.
+  const objectsDir = join(base, ".git", "objects");
+  chmodSync(objectsDir, 0o444);
+  t.after(() => { try { chmodSync(objectsDir, 0o755); } catch { /* cleanup */ } });
+
+  const { ctx, notifications } = makeMockCtx();
+  const result = reconcileMergeState(base, ctx);
+
+  // The function should return false to signal reconciliation failure
+  // (Currently it silently swallows the error and returns true — this test should FAIL before the fix)
+  assert.equal(result, false, "reconcileMergeState should return false when nativeCommit fails");
+  const errorNotifications = notifications.filter(n => n.level === "error");
+  assert.ok(errorNotifications.length > 0, "should notify an error when nativeCommit fails");
+  assert.ok(
+    errorNotifications[0].msg.includes("Failed to finalize"),
+    "error notification should describe the commit failure",
+  );
+});
+
+test("reconcileMergeState returns true when no merge state present", (t) => {
+  // When there's no MERGE_HEAD or SQUASH_MSG, reconcileMergeState returns false (no dirty state)
+  const base = makeGitBase();
+  t.after(() => cleanup(base));
+
+  const { ctx, notifications } = makeMockCtx();
+  const result = reconcileMergeState(base, ctx);
+
+  assert.equal(result, false, "should return false when no merge state exists");
+  assert.equal(notifications.length, 0, "should not notify when no merge state present");
+});
+
 test("verifyExpectedArtifact complete-milestone passes with impl files (#1703)", (t) => {
   const base = makeGitBase();
   t.after(() => cleanup(base));
diff --git a/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts b/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts
index bf11a5109..48f5897d9 100644
--- a/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/auto-worktree-milestone-merge.test.ts
@@ -638,6 +638,140 @@ describe("auto-worktree-milestone-merge", { timeout: 300_000 }, () => {
       "#1906: codeFilesChanged must be false when only .gsd/ files were merged");
   });
 
+  test("#2912: MERGE_HEAD cleaned up after squash-merge conflict", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M291");
+
+    // Create a file on main that will conflict with the milestone branch
+    run("git checkout main", repo);
+    writeFileSync(join(repo, "conflict.ts"), "// main version\nexport const x = 1;\n");
+    run("git add .", repo);
+    run("git commit -m 'add conflict.ts on main'", repo);
+
+    // Switch back to milestone branch and create conflicting content
+    run("git checkout milestone/M291", wtPath);
+    writeFileSync(join(wtPath, "conflict.ts"), "// milestone version\nexport const x = 2;\n");
+    run("git add .", wtPath);
+    run("git commit -m 'add conflict.ts on milestone'", wtPath);
+
+    const roadmap = makeRoadmap("M291", "Conflict milestone", [
+      { id: "S01", title: "Conflict test" },
+    ]);
+
+    // The merge should throw MergeConflictError due to conflict.ts
+    let threw = false;
+    try {
+      mergeMilestoneToMain(repo, "M291", roadmap);
+    } catch (err: unknown) {
+      threw = true;
+      // Verify it's a merge conflict error
+      assert.ok(
+        err instanceof Error && err.message.includes("conflict"),
+        "should throw a conflict-related error",
+      );
+    }
+    assert.ok(threw, "mergeMilestoneToMain must throw on code conflict");
+
+    // BUG #2912: MERGE_HEAD must NOT be left on disk after the error
+    const mergeHeadPath = join(repo, ".git", "MERGE_HEAD");
+    assert.ok(
+      !existsSync(mergeHeadPath),
+      "#2912: MERGE_HEAD must be cleaned up after merge conflict error",
+    );
+  });
+
+  test("#2912: stale MERGE_HEAD from native merge is cleaned after successful commit", () => {
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M292");
+
+    addSliceToMilestone(repo, wtPath, "M292", "S01", "Feature A", [
+      { file: "feature-a.ts", content: "export const a = true;\n", message: "add feature a" },
+    ]);
+
+    const roadmap = makeRoadmap("M292", "Clean merge", [
+      { id: "S01", title: "Feature A" },
+    ]);
+
+    // Simulate what libgit2's merge implementation does: it creates MERGE_HEAD
+    // even for squash merges (unlike CLI git). We plant MERGE_HEAD before calling
+    // mergeMilestoneToMain to verify the success path cleans it up.
+    // We cannot plant it before the call because the function manages checkout
+    // internally, so instead we verify after the call.
+    mergeMilestoneToMain(repo, "M292", roadmap);
+
+    // After successful merge+commit, MERGE_HEAD must not linger
+    const mergeHeadPath = join(repo, ".git", "MERGE_HEAD");
+    assert.ok(
+      !existsSync(mergeHeadPath),
+      "#2912: MERGE_HEAD must be cleaned up after successful merge",
+    );
+  });
+
+  test("#2912: planted MERGE_HEAD is cleaned up in success path", () => {
+    // This test directly verifies the cleanup code handles a MERGE_HEAD file
+    // left by the native (libgit2) merge path. We hook into the merge by
+    // planting MERGE_HEAD right after nativeMergeSquash would create it.
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M293");
+
+    addSliceToMilestone(repo, wtPath, "M293", "S01", "Feature B", [
+      { file: "feature-b.ts", content: "export const b = true;\n", message: "add feature b" },
+    ]);
+
+    const roadmap = makeRoadmap("M293", "Planted MERGE_HEAD", [
+      { id: "S01", title: "Feature B" },
+    ]);
+
+    // Plant a fake MERGE_HEAD in the git dir to simulate libgit2 behavior.
+    // We need to do this after the function checks out main but before it
+    // commits. Since we can't intercept mid-function, we plant it before
+    // the call. If the function cleans it up, the test passes.
+    const gitDir = join(repo, ".git");
+    const fakeHead = run("git rev-parse HEAD", repo);
+    writeFileSync(join(gitDir, "MERGE_HEAD"), fakeHead + "\n");
+
+    mergeMilestoneToMain(repo, "M293", roadmap);
+
+    // The planted MERGE_HEAD must be cleaned up
+    assert.ok(
+      !existsSync(join(gitDir, "MERGE_HEAD")),
+      "#2912: planted MERGE_HEAD must be removed by success-path cleanup",
+    );
+  });
+
+  test("#2912: stale SQUASH_MSG and MERGE_MSG are cleaned before squash merge", () => {
+    // Verifies that the pre-merge cleanup (step 7b) removes all three merge
+    // artifacts — not just MERGE_HEAD — so that `git merge --squash` never
+    // encounters leftover state from a prior interrupted operation.
+    const repo = freshRepo();
+    const wtPath = createAutoWorktree(repo, "M294");
+
+    addSliceToMilestone(repo, wtPath, "M294", "S01", "Feature C", [
+      { file: "feature-c.ts", content: "export const c = true;\n", message: "add feature c" },
+    ]);
+
+    const roadmap = makeRoadmap("M294", "Stale merge artifacts", [
+      { id: "S01", title: "Feature C" },
+    ]);
+
+    // Plant stale merge artifacts in the git dir to simulate a prior
+    // interrupted merge.  The pre-merge cleanup must remove all of them.
+    const gitDir = join(repo, ".git");
+    writeFileSync(join(gitDir, "SQUASH_MSG"), "stale squash message\n");
+    writeFileSync(join(gitDir, "MERGE_MSG"), "stale merge message\n");
+
+    mergeMilestoneToMain(repo, "M294", roadmap);
+
+    assert.ok(
+      !existsSync(join(gitDir, "SQUASH_MSG")),
+      "#2912: stale SQUASH_MSG must be removed by pre-merge cleanup",
+    );
+    assert.ok(
+      !existsSync(join(gitDir, "MERGE_MSG")),
+      "#2912: stale MERGE_MSG must be removed by pre-merge cleanup",
+    );
+  });
+
   test("#1906: codeFilesChanged=true when real code is merged", () => {
     const repo = freshRepo();
     const wtPath = createAutoWorktree(repo, "M190");
diff --git a/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts b/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts
index 38aa285b6..500fe6329 100644
--- a/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts
@@ -20,6 +20,7 @@ import {
   enterAutoWorktree,
   getAutoWorktreeOriginalBase,
   getActiveAutoWorktreeContext,
+  syncGsdStateToWorktree,
 } from "../../auto-worktree.ts";
 
 // Note: execSync is used intentionally in tests for git operations with
@@ -286,4 +287,62 @@ describe("auto-worktree lifecycle", () => {
       teardownAutoWorktree(tempDir, "M004");
     }
   });
+
+  test("#2791: mcp.json copied into worktree via copyPlanningArtifacts", () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    // Create mcp.json in .gsd/ AFTER the commit (untracked, like real usage).
+    // copyPlanningArtifacts should copy it into the worktree's .gsd/.
+    writeFileSync(
+      join(tempDir, ".gsd", "mcp.json"),
+      JSON.stringify({ servers: { test: { command: "echo" } } }),
+    );
+
+    const wtPath = createAutoWorktree(tempDir, "M003");
+
+    try {
+      assert.ok(
+        existsSync(join(wtPath, ".gsd", "mcp.json")),
+        "mcp.json should be copied into worktree .gsd/ on creation",
+      );
+    } finally {
+      teardownAutoWorktree(tempDir, "M003");
+    }
+  });
+
+  test("#2791: mcp.json synced via syncGsdStateToWorktree (ROOT_STATE_FILES)", () => {
+    tempDir = createTempRepo();
+    const msDir = join(tempDir, ".gsd", "milestones", "M003");
+    mkdirSync(msDir, { recursive: true });
+    writeFileSync(join(msDir, "CONTEXT.md"), "# M003 Context\n");
+    run("git add .", tempDir);
+    run("git commit -m \"add milestone\"", tempDir);
+
+    // Create worktree first (no mcp.json yet)
+    const wtPath = createAutoWorktree(tempDir, "M003");
+
+    try {
+      // Now add mcp.json to the main .gsd/ after worktree was created
+      writeFileSync(
+        join(tempDir, ".gsd", "mcp.json"),
+        JSON.stringify({ servers: { test: { command: "echo" } } }),
+      );
+
+      // Sync should pick up the new mcp.json
+      const { synced } = syncGsdStateToWorktree(tempDir, wtPath);
+
+      assert.ok(synced.includes("mcp.json"), "mcp.json should be in the synced list");
+      assert.ok(
+        existsSync(join(wtPath, ".gsd", "mcp.json")),
+        "mcp.json should exist in worktree after sync",
+      );
+    } finally {
+      teardownAutoWorktree(tempDir, "M003");
+    }
+  });
 });
diff --git a/src/resources/extensions/gsd/tests/integration/doctor-false-positives.test.ts b/src/resources/extensions/gsd/tests/integration/doctor-false-positives.test.ts
new file mode 100644
index 000000000..c2189e236
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/doctor-false-positives.test.ts
@@ -0,0 +1,243 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { runGSDDoctor } from "../../doctor.js";
+import { parsePlan } from "../../parsers-legacy.js";
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+function makeBase(): { base: string; gsd: string; mDir: string } {
+  const base = mkdtempSync(join(tmpdir(), "gsd-doctor-fp-"));
+  const gsd = join(base, ".gsd");
+  const mDir = join(gsd, "milestones", "M001");
+  mkdirSync(join(mDir, "slices"), { recursive: true });
+  return { base, gsd, mDir };
+}
+
+function writeRoadmap(mDir: string, content: string): void {
+  writeFileSync(join(mDir, "M001-ROADMAP.md"), content);
+}
+
+function writeSlice(mDir: string, sliceId: string, planContent: string): string {
+  const sDir = join(mDir, "slices", sliceId);
+  const tDir = join(sDir, "tasks");
+  mkdirSync(tDir, { recursive: true });
+  writeFileSync(join(sDir, `${sliceId}-PLAN.md`), planContent);
+  return sDir;
+}
+
+describe('doctor false-positives (#3105)', async () => {
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Bug 1: Orphaned worktree directory recreated by appendDoctorHistory
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('Bug 1: orphaned worktree check ignores dirs containing only .gsd/doctor-history.jsonl', async () => {
+    // Simulate: a worktree dir that only contains .gsd/doctor-history.jsonl
+    // (created by appendDoctorHistory writing to the worktree-scoped path).
+    // The orphan check should NOT warn about this directory.
+    const { base, gsd } = makeBase();
+    writeRoadmap(join(gsd, "milestones", "M001"), `# M001: Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+    writeSlice(join(gsd, "milestones", "M001"), "S01", "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [ ] **T01: Task** `est:10m`\n  Pending.\n");
+
+    // Create a worktree directory that only has .gsd/doctor-history.jsonl
+    const wtDir = join(gsd, "worktrees", "M042");
+    const wtGsdDir = join(wtDir, ".gsd");
+    mkdirSync(wtGsdDir, { recursive: true });
+    writeFileSync(join(wtGsdDir, "doctor-history.jsonl"), '{"ts":"2026-01-01","ok":true}\n');
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    // Should NOT produce worktree_directory_orphaned for a dir that only has doctor history
+    const orphanIssues = result.issues.filter(
+      i => i.code === "worktree_directory_orphaned" && i.unitId === "M042"
+    );
+    assert.equal(orphanIssues.length, 0,
+      "should not warn about worktree dir that only contains .gsd/doctor-history.jsonl");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Bug 2: blocker_discovered + all tasks done = unfixable deadlock
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('Bug 2: blocker_discovered with all tasks done should not warn (implicitly resolved)', async () => {
+    // Scenario: blocker was discovered and resolved within the same task.
+    // blocker_discovered: true, no REPLAN, but all tasks are done.
+    // Neither blocker_discovered_no_replan nor stale_replan_file should fire.
+    const { base, mDir } = makeBase();
+    writeRoadmap(mDir, `# M001: Blocker Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+    const sDir = writeSlice(mDir, "S01",
+      "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task** `est:10m`\n  Done.\n");
+    writeFileSync(join(sDir, "tasks", "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+provides: []
+requires: []
+affects: []
+key_files: []
+key_decisions: []
+patterns_established: []
+observability_surfaces: []
+drill_down_paths: []
+duration: 10m
+verification_result: passed
+completed_at: 2026-01-01T00:00:00Z
+blocker_discovered: true
+---
+
+# T01: Task
+
+**Done**
+
+## What Happened
+Found a blocker, resolved it in-task.
+
+## Diagnostics
+- log
+`);
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    // Should NOT produce blocker_discovered_no_replan when all tasks are done
+    const blockerIssues = result.issues.filter(i => i.code === "blocker_discovered_no_replan");
+    assert.equal(blockerIssues.length, 0,
+      "should not warn about blocker_discovered when all tasks are done (blocker was implicitly resolved)");
+
+    // Also should NOT produce stale_replan_file (no REPLAN exists, so this shouldn't fire anyway)
+    const staleReplanIssues = result.issues.filter(i => i.code === "stale_replan_file");
+    assert.equal(staleReplanIssues.length, 0,
+      "should not produce stale_replan_file when no REPLAN exists");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  test('Bug 2: blocker_discovered with incomplete tasks should still warn', async () => {
+    // Sanity check: when there IS an incomplete task and blocker_discovered, warn as before.
+    const { base, mDir } = makeBase();
+    writeRoadmap(mDir, `# M001: Blocker Warn Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+    const sDir = writeSlice(mDir, "S01",
+      "# S01: Slice\n\n**Goal:** G\n**Demo:** D\n\n## Tasks\n- [x] **T01: Task A** `est:10m`\n  Done.\n- [ ] **T02: Task B** `est:10m`\n  Pending.\n");
+    writeFileSync(join(sDir, "tasks", "T01-SUMMARY.md"), `---
+id: T01
+parent: S01
+milestone: M001
+provides: []
+requires: []
+affects: []
+key_files: []
+key_decisions: []
+patterns_established: []
+observability_surfaces: []
+drill_down_paths: []
+duration: 10m
+verification_result: passed
+completed_at: 2026-01-01T00:00:00Z
+blocker_discovered: true
+---
+
+# T01: Task A
+
+**Done**
+
+## What Happened
+Found blocker, but T02 is still pending.
+
+## Diagnostics
+- log
+`);
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    const blockerIssues = result.issues.filter(i => i.code === "blocker_discovered_no_replan");
+    assert.ok(blockerIssues.length > 0,
+      "should still warn about blocker_discovered when some tasks are not done");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // Bug 3: Multi-task plan — T02+ outside ## Tasks section
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  test('Bug 3: parsePlan finds all tasks even when interleaved with detail sections', () => {
+    // Multi-task plan where T02 checkbox appears after T01's ## Steps heading,
+    // which ends the ## Tasks section for extractSection().
+    const planContent = `# S01: Demo Slice
+
+**Goal:** Build the demo
+**Demo:** Run it
+
+## Must-Haves
+- Feature A
+
+## Tasks
+- [x] **T01: First task** \`est:30m\`
+  Implement the first thing.
+## Steps
+1. Step one
+2. Step two
+## Must-Haves
+- Requirement A
+- [x] **T02: Second task** \`est:1h\`
+  Implement the second thing.
+## Steps
+1. Step one
+2. Step two
+`;
+
+    const plan = parsePlan(planContent);
+    const taskIds = plan.tasks.map(t => t.id);
+
+    assert.ok(taskIds.includes("T01"), "should find T01");
+    assert.ok(taskIds.includes("T02"), "should find T02 even when after T01 detail headings");
+    assert.equal(plan.tasks.length, 2, "should find exactly 2 tasks");
+  });
+
+  test('Bug 3: task_file_not_in_plan should not fire for T02 in multi-task plan', async () => {
+    const { base, mDir } = makeBase();
+    writeRoadmap(mDir, `# M001: Multi-Task Test\n\n## Slices\n- [ ] **S01: Slice** \`risk:low\` \`depends:[]\`\n  > After this: done\n`);
+
+    // Plan with interleaved headings (the problematic format)
+    const sDir = writeSlice(mDir, "S01", `# S01: Demo Slice
+
+**Goal:** Build the demo
+**Demo:** Run it
+
+## Must-Haves
+- Feature A
+
+## Tasks
+- [x] **T01: First task** \`est:30m\`
+  Implement the first thing.
+## Steps
+1. Step one
+## Must-Haves
+- Req A
+- [x] **T02: Second task** \`est:1h\`
+  Implement the second thing.
+## Steps
+1. Step one
+`);
+
+    // Both tasks have summaries on disk
+    writeFileSync(join(sDir, "tasks", "T01-SUMMARY.md"), "---\nstatus: done\ncompleted_at: 2026-01-01T00:00:00Z\n---\n# T01\nDone.\n");
+    writeFileSync(join(sDir, "tasks", "T02-SUMMARY.md"), "---\nstatus: done\ncompleted_at: 2026-01-01T00:00:00Z\n---\n# T02\nDone.\n");
+
+    const result = await runGSDDoctor(base, { fix: false });
+
+    // T02 should NOT be flagged as "not in plan"
+    const notInPlan = result.issues.filter(
+      i => i.code === "task_file_not_in_plan" && i.message.includes("T02")
+    );
+    assert.equal(notInPlan.length, 0,
+      "should not report T02 as 'not in plan' when it exists in the interleaved plan format");
+
+    rmSync(base, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/integration/gitignore-staging-2570.test.ts b/src/resources/extensions/gsd/tests/integration/gitignore-staging-2570.test.ts
new file mode 100644
index 000000000..b32f046a9
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/gitignore-staging-2570.test.ts
@@ -0,0 +1,150 @@
+/**
+ * gitignore-staging-2570.test.ts — Regression tests for #2570.
+ *
+ * Verifies that:
+ * 1. isGsdGitignored() detects when .gsd is covered by .gitignore
+ * 2. The rethink prompt uses {{commitInstruction}} instead of hardcoded git add .gsd/
+ * 3. rethink.ts passes the correct commitInstruction based on gitignore state
+ *
+ * Uses real temporary git repos — no mocks.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { execFileSync } from "node:child_process";
+import {
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// Dynamic import — isGsdGitignored is the function under test (may not exist yet during TDD red phase)
+const { isGsdGitignored } = await import("../../gitignore.ts");
+
+// ─── Helpers ─────────────────────────────────────────────────────────
+
+function git(dir: string, ...args: string[]): string {
+  return execFileSync("git", args, { cwd: dir, stdio: "pipe", encoding: "utf-8" }).trim();
+}
+
+function makeTempRepo(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-staging-2570-"));
+  git(dir, "init");
+  git(dir, "config", "user.email", "test@test.com");
+  git(dir, "config", "user.name", "Test");
+  writeFileSync(join(dir, "README.md"), "# init\n");
+  git(dir, "add", "-A");
+  git(dir, "commit", "-m", "init");
+  git(dir, "branch", "-M", "main");
+  return dir;
+}
+
+function cleanup(dir: string): void {
+  try {
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // ignore
+  }
+}
+
+// ─── isGsdGitignored ─────────────────────────────────────────────────
+
+test("isGsdGitignored returns true when .gsd is in .gitignore (#2570)", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  writeFileSync(join(dir, ".gitignore"), ".gsd\n");
+  assert.equal(isGsdGitignored(dir), true);
+});
+
+test("isGsdGitignored returns true when .gsd/ (with slash) is in .gitignore", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  writeFileSync(join(dir, ".gitignore"), ".gsd/\n");
+  // Create .gsd directory so git check-ignore can match the directory-only pattern
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  assert.equal(isGsdGitignored(dir), true);
+});
+
+test("isGsdGitignored returns false when .gsd is NOT in .gitignore", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  writeFileSync(join(dir, ".gitignore"), "node_modules/\n");
+  assert.equal(isGsdGitignored(dir), false);
+});
+
+test("isGsdGitignored returns false when no .gitignore exists", (t) => {
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  // No .gitignore — default
+  assert.equal(isGsdGitignored(dir), false);
+});
+
+// ─── rethink.md prompt template ─────────────────────────────────────
+
+test("rethink.md prompt uses {{commitInstruction}} not hardcoded git add .gsd/ (#2570)", () => {
+  const promptPath = join(
+    import.meta.dirname!,
+    "..",
+    "..",
+    "prompts",
+    "rethink.md",
+  );
+  const content = readFileSync(promptPath, "utf-8");
+
+  // Must NOT contain hardcoded `git add .gsd/`
+  assert.ok(
+    !content.includes("git add .gsd/"),
+    `rethink.md must not contain hardcoded "git add .gsd/" — use {{commitInstruction}} instead.\nFound: ${content.match(/.*git add .gsd\/.*/)?.[0]}`,
+  );
+
+  // Must contain the {{commitInstruction}} placeholder
+  assert.ok(
+    content.includes("{{commitInstruction}}"),
+    "rethink.md must use {{commitInstruction}} template variable for commit step",
+  );
+});
+
+// ─── smartStage respects .gitignore for .gsd/ (#2570) ───────────────
+
+test("smartStage does not stage .gsd/ files when .gsd is gitignored (#2570)", async (t) => {
+  // This imports GitServiceImpl to test through the public commit() method
+  // which calls smartStage() internally.
+  const { GitServiceImpl } = await import("../../git-service.ts");
+
+  const dir = makeTempRepo();
+  t.after(() => { cleanup(dir); });
+
+  // Add .gsd to .gitignore
+  writeFileSync(join(dir, ".gitignore"), ".gsd\nnode_modules/\n");
+  git(dir, "add", ".gitignore");
+  git(dir, "commit", "-m", "add gitignore with .gsd");
+
+  // Create .gsd/ milestone artifacts (NOT tracked, NOT symlinked)
+  mkdirSync(join(dir, ".gsd", "milestones", "M001", "slices", "S01"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "milestones", "M001", "slices", "S01", "S01-PLAN.md"), "# Plan");
+  writeFileSync(join(dir, ".gsd", "DECISIONS.md"), "# Decisions");
+
+  // Create a normal source file
+  writeFileSync(join(dir, "src.ts"), "export const x = 1;");
+
+  // Commit through GitServiceImpl (uses smartStage internally)
+  const svc = new GitServiceImpl(dir);
+  const msg = svc.commit({ message: "test: should not include .gsd files" });
+  assert.ok(msg !== null, "commit should succeed");
+
+  // Check what was committed
+  const committed = git(dir, "show", "--name-only", "HEAD");
+  assert.ok(committed.includes("src.ts"), "source files ARE committed");
+  assert.ok(
+    !committed.includes(".gsd/"),
+    `gitignored .gsd/ files must NOT be staged by smartStage.\nCommitted files: ${committed}`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/integration/parallel-merge.test.ts b/src/resources/extensions/gsd/tests/integration/parallel-merge.test.ts
index 038f40f44..b438d5fa6 100644
--- a/src/resources/extensions/gsd/tests/integration/parallel-merge.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/parallel-merge.test.ts
@@ -38,6 +38,12 @@ import {
   writeSessionStatus,
   readSessionStatus,
 } from "../../session-status-io.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  updateMilestoneStatus,
+} from "../../gsd-db.ts";
 
 // ─── Helpers ──────────────────────────────────────────────────────────────────
 
@@ -465,3 +471,107 @@ test("mergeAllCompleted — by-completion order respects startedAt", async () =>
     cleanup(repo);
   }
 });
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug #2812 — determineMergeOrder should use worktree DB as source of truth
+// ═══════════════════════════════════════════════════════════════════════════════
+
+/** Set up a worktree DB with a milestone marked complete */
+function setupWorktreeDb(basePath: string, mid: string): void {
+  const wtGsdDir = join(basePath, ".gsd", "worktrees", mid, ".gsd");
+  mkdirSync(wtGsdDir, { recursive: true });
+  const dbPath = join(wtGsdDir, "gsd.db");
+  openDatabase(dbPath);
+  insertMilestone({ id: mid, title: `Milestone ${mid}`, status: "complete" });
+  updateMilestoneStatus(mid, "complete", new Date().toISOString());
+  closeDatabase();
+}
+
+test("determineMergeOrder — finds milestones completed in worktree DB even when worker state is 'error' (#2812)", () => {
+  const base = realpathSync(mkdtempSync(join(tmpdir(), "merge-db-bug-")));
+  try {
+    // Simulate the bug scenario: orchestrator has stale "error" state
+    // but the worktree DB shows milestone is actually complete.
+    setupWorktreeDb(base, "M011");
+
+    const workers = [
+      makeWorker({ milestoneId: "M010", state: "error" }),
+      makeWorker({ milestoneId: "M011", state: "error" }),  // stale — actually complete in DB
+      makeWorker({ milestoneId: "M012", state: "running" }),
+    ];
+
+    const order = determineMergeOrder(workers, "sequential", base);
+
+    // M011 should be included because its worktree DB says status='complete'
+    assert.ok(
+      order.includes("M011"),
+      `Expected M011 in merge order (worktree DB says complete), got: [${order}]`,
+    );
+    // M010 and M012 should NOT be included (no worktree DB with complete status)
+    assert.ok(!order.includes("M010"), "M010 should not be in merge order (error, no DB)");
+    assert.ok(!order.includes("M012"), "M012 should not be in merge order (running, no DB)");
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("determineMergeOrder — workers with state='stopped' still included without basePath", () => {
+  // Backward compatibility: existing behavior still works when basePath is omitted
+  const workers = [
+    makeWorker({ milestoneId: "M001", state: "stopped" }),
+    makeWorker({ milestoneId: "M002", state: "error" }),
+  ];
+  const order = determineMergeOrder(workers, "sequential");
+  assert.deepEqual(order, ["M001"]);
+});
+
+test("determineMergeOrder — combines stopped workers and DB-complete milestones without duplicates", () => {
+  const base = realpathSync(mkdtempSync(join(tmpdir(), "merge-dedup-")));
+  try {
+    // M001 is stopped in orchestrator AND complete in worktree DB
+    setupWorktreeDb(base, "M001");
+
+    const workers = [
+      makeWorker({ milestoneId: "M001", state: "stopped" }),
+      makeWorker({ milestoneId: "M002", state: "running" }),
+    ];
+
+    const order = determineMergeOrder(workers, "sequential", base);
+    // M001 should appear exactly once
+    assert.deepEqual(order, ["M001"]);
+  } finally {
+    cleanup(base);
+  }
+});
+
+test("mergeAllCompleted — discovers DB-complete milestones when workers show error (#2812)", async () => {
+  const savedCwd = process.cwd();
+  const repo = createTempRepo();
+
+  try {
+    // Create milestone branch with a file
+    createMilestoneBranch(repo, "M011", [
+      { name: "feature.ts", content: "export const feature = true;\n" },
+    ]);
+    setupRoadmap(repo, "M011", "Feature System", ["S01: Feature module"]);
+
+    // Set up worktree DB showing M011 is complete
+    setupWorktreeDb(repo, "M011");
+
+    // Orchestrator thinks M011 is in error (stale state)
+    const workers = [
+      makeWorker({ milestoneId: "M011", state: "error" }),
+    ];
+
+    process.chdir(repo);
+    const results = await mergeAllCompleted(repo, workers, "sequential");
+
+    // Should find and merge M011 despite orchestrator "error" state
+    assert.equal(results.length, 1, "should have one result");
+    assert.equal(results[0]!.milestoneId, "M011");
+    assert.equal(results[0]!.success, true, `M011 merge should succeed: ${results[0]!.error}`);
+  } finally {
+    process.chdir(savedCwd);
+    cleanup(repo);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/integration/run-uat.test.ts b/src/resources/extensions/gsd/tests/integration/run-uat.test.ts
index cf9d44f74..b4427751b 100644
--- a/src/resources/extensions/gsd/tests/integration/run-uat.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/run-uat.test.ts
@@ -171,7 +171,7 @@ test('(k) run-uat prompt template', () => {
   const milestoneId = 'M001';
   const sliceId = 'S01';
   const uatPath = '.gsd/milestones/M001/slices/S01/S01-UAT.md';
-  const uatResultPath = '.gsd/milestones/M001/slices/S01/S01-UAT.md';
+  const uatResultPath = '.gsd/milestones/M001/slices/S01/S01-ASSESSMENT.md';
   const uatType = 'live-runtime';
   const inlinedContext = '<!-- no context -->';
   let promptResult: string | undefined;
diff --git a/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts b/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts
new file mode 100644
index 000000000..57ffe2a49
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts
@@ -0,0 +1,959 @@
+/**
+ * state-machine-live-validation.test.ts — Live operational validation of the
+ * GSD state machine with real handlers, real DB, and real filesystem.
+ *
+ * Exercises every phase transition, completion guard, edge case, and reopen
+ * path end-to-end. This is NOT a unit test — it drives the actual tool handlers
+ * against a real temp directory with a real SQLite database.
+ *
+ * Findings reference: #3161 (state machine validation report)
+ */
+
+// GSD State Machine Live Validation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  readFileSync,
+  rmSync,
+  existsSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+// ── DB layer ──────────────────────────────────────────────────────────────
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+  getMilestone,
+  getSliceTasks,
+  getMilestoneSlices,
+  updateTaskStatus,
+  updateSliceStatus,
+  updateMilestoneStatus,
+} from "../../gsd-db.ts";
+
+// ── Tool handlers ─────────────────────────────────────────────────────────
+import { handleCompleteTask } from "../../tools/complete-task.ts";
+import { handleCompleteSlice } from "../../tools/complete-slice.ts";
+import { handleCompleteMilestone } from "../../tools/complete-milestone.ts";
+import { handleReopenTask } from "../../tools/reopen-task.ts";
+import { handleReopenSlice } from "../../tools/reopen-slice.ts";
+
+// ── State derivation ──────────────────────────────────────────────────────
+import {
+  deriveState,
+  deriveStateFromDb,
+  invalidateStateCache,
+  isGhostMilestone,
+} from "../../state.ts";
+
+// ── Status guards ─────────────────────────────────────────────────────────
+import { isClosedStatus } from "../../status-guards.ts";
+
+// ── Events ────────────────────────────────────────────────────────────────
+import { readEvents } from "../../workflow-events.ts";
+
+// ── Cache invalidation ───────────────────────────────────────────────────
+import { invalidateAllCaches } from "../../cache.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Fixture Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTempDir(): string {
+  return mkdtempSync(join(tmpdir(), "gsd-live-validation-"));
+}
+
+/**
+ * Create a realistic .gsd/ fixture with:
+ * - M001 milestone with ROADMAP, CONTEXT
+ * - S01 slice with PLAN (2 tasks T01, T02)
+ * - S02 slice with PLAN (1 task T01)
+ * - Task PLAN stubs for each task
+ * - REQUIREMENTS.md and DECISIONS.md
+ */
+function createFullFixture(): string {
+  const base = makeTempDir();
+  const gsdDir = join(base, ".gsd");
+  const m001Dir = join(gsdDir, "milestones", "M001");
+  const s01Dir = join(m001Dir, "slices", "S01");
+  const s01Tasks = join(s01Dir, "tasks");
+  const s02Dir = join(m001Dir, "slices", "S02");
+  const s02Tasks = join(s02Dir, "tasks");
+
+  mkdirSync(s01Tasks, { recursive: true });
+  mkdirSync(s02Tasks, { recursive: true });
+
+  // CONTEXT.md — needed to get past needs-discussion
+  writeFileSync(
+    join(m001Dir, "M001-CONTEXT.md"),
+    [
+      "# M001: Live Validation Milestone",
+      "",
+      "## Purpose",
+      "Validate the state machine end-to-end.",
+    ].join("\n"),
+  );
+
+  // ROADMAP.md
+  writeFileSync(
+    join(m001Dir, "M001-ROADMAP.md"),
+    [
+      "# M001: Live Validation Milestone",
+      "",
+      "## Vision",
+      "Prove state machine correctness.",
+      "",
+      "## Success Criteria",
+      "- All operations succeed",
+      "",
+      "## Slices",
+      "",
+      "- [ ] **S01: First Feature** `risk:low` `depends:[]`",
+      "  - After this: First feature proven.",
+      "",
+      "- [ ] **S02: Second Feature** `risk:low` `depends:[]`",
+      "  - After this: Second feature proven.",
+      "",
+      "## Boundary Map",
+      "",
+      "| From | To | Produces | Consumes |",
+      "|------|----|----------|----------|",
+      "| S01 | terminal | feature-a | nothing |",
+      "| S02 | terminal | feature-b | nothing |",
+    ].join("\n"),
+  );
+
+  // S01 PLAN
+  writeFileSync(
+    join(s01Dir, "S01-PLAN.md"),
+    [
+      "# S01: First Feature",
+      "",
+      "**Goal:** Implement first feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+      "",
+      "- [ ] **T02: Testing** `est:30m`",
+      "  - Do: Write tests",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  // S01 task plan stubs
+  writeFileSync(join(s01Tasks, "T01-PLAN.md"), "# T01 Plan\nImplement.\n");
+  writeFileSync(join(s01Tasks, "T02-PLAN.md"), "# T02 Plan\nTest.\n");
+
+  // S02 PLAN
+  writeFileSync(
+    join(s02Dir, "S02-PLAN.md"),
+    [
+      "# S02: Second Feature",
+      "",
+      "**Goal:** Implement second feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  // S02 task plan stub
+  writeFileSync(join(s02Tasks, "T01-PLAN.md"), "# T01 Plan\nBuild.\n");
+
+  // REQUIREMENTS.md
+  writeFileSync(
+    join(gsdDir, "REQUIREMENTS.md"),
+    [
+      "# Requirements",
+      "",
+      "## Active",
+      "",
+      "| ID | Description | Owner |",
+      "|----|-------------|-------|",
+      "| R001 | Feature works | S01 |",
+    ].join("\n"),
+  );
+
+  // DECISIONS.md
+  writeFileSync(
+    join(gsdDir, "DECISIONS.md"),
+    [
+      "# Decisions",
+      "",
+      "| ID | Decision | Choice | Rationale |",
+      "|----|----------|--------|-----------|",
+    ].join("\n"),
+  );
+
+  return base;
+}
+
+function makeTaskParams(
+  taskId: string,
+  sliceId: string,
+  milestoneId: string,
+  overrides?: Partial<Record<string, unknown>>,
+): Record<string, unknown> {
+  return {
+    taskId,
+    sliceId,
+    milestoneId,
+    oneLiner: `Completed ${taskId}`,
+    narrative: `Implemented ${taskId} with full coverage.`,
+    verification: "All tests pass.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    deviations: "None.",
+    knownIssues: "None.",
+    blockerDiscovered: false,
+    verificationEvidence: [
+      { command: "npm test", exitCode: 0, verdict: "pass", durationMs: 1000 },
+    ],
+    ...overrides,
+  };
+}
+
+function makeSliceParams(
+  sliceId: string,
+  milestoneId: string,
+): Record<string, unknown> {
+  return {
+    sliceId,
+    milestoneId,
+    sliceTitle: `${sliceId} Feature`,
+    oneLiner: `${sliceId} proven`,
+    narrative: "All tasks completed.",
+    verification: "Tests pass.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    patternsEstablished: [],
+    observabilitySurfaces: [],
+    deviations: "None.",
+    knownLimitations: "None.",
+    followUps: "None.",
+    requirementsAdvanced: [],
+    requirementsValidated: [],
+    requirementsSurfaced: [],
+    requirementsInvalidated: [],
+    filesModified: [{ path: "src/feature.ts", description: "Feature" }],
+    uatContent: "Acceptance criteria met.",
+    provides: ["feature"],
+    requires: [],
+    affects: [],
+    drillDownPaths: [],
+  };
+}
+
+function makeMilestoneParams(milestoneId: string): Record<string, unknown> {
+  return {
+    milestoneId,
+    title: "Live Validation Milestone",
+    oneLiner: "Milestone proven end-to-end",
+    narrative: "All slices completed and verified.",
+    successCriteriaResults: "All criteria met.",
+    definitionOfDoneResults: "All items checked.",
+    requirementOutcomes: "All requirements satisfied.",
+    keyDecisions: ["Chose approach A"],
+    keyFiles: ["src/feature.ts"],
+    lessonsLearned: ["Integration testing is valuable"],
+    followUps: "None.",
+    deviations: "None.",
+    verificationPassed: true,
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test Suite
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("state-machine-live-validation", () => {
+  let base: string;
+
+  afterEach(() => {
+    closeDatabase();
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 1: Full happy-path lifecycle
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("happy path: full lifecycle M001 → complete", () => {
+    test("step 1: empty project derives pre-planning", async () => {
+      base = makeTempDir();
+      mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+      const state = await deriveState(base);
+      assert.equal(state.phase, "pre-planning");
+      assert.equal(state.activeMilestone, null);
+    });
+
+    test("step 2: milestone with CONTEXT-DRAFT derives needs-discussion", async () => {
+      base = makeTempDir();
+      const mDir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(mDir, { recursive: true });
+      writeFileSync(join(mDir, "M001-CONTEXT-DRAFT.md"), "# Draft\nDraft context.\n");
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.equal(state.phase, "needs-discussion");
+      assert.equal(state.activeMilestone?.id, "M001");
+    });
+
+    test("step 3: full fixture with ROADMAP+PLAN derives planning or executing", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+      // Without DB migration, filesystem path is used — should be planning or executing
+      assert.ok(
+        ["planning", "executing", "pre-planning"].includes(state.phase),
+        `expected planning/executing/pre-planning, got: ${state.phase}`,
+      );
+    });
+
+    test("step 4: complete T01 in S01 — handler succeeds, DB reflects completion", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // Seed DB with hierarchy
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Implementation", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Testing", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      // Verify DB state
+      const task = getTask("M001", "S01", "T01");
+      assert.ok(task, "T01 should exist in DB");
+      assert.ok(isClosedStatus(task!.status), `T01 status should be closed, got: ${task!.status}`);
+
+      // Verify SUMMARY.md written to disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "T01-SUMMARY.md should exist on disk");
+
+      // Verify event log entry
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+      const taskEvent = events.find(e => e.cmd === "complete-task" && (e.params as any).taskId === "T01");
+      assert.ok(taskEvent, "event log should contain complete-task for T01");
+    });
+
+    test("step 5: complete T02 in S01 — both tasks now done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Implementation", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Testing", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T02", "S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      // Both tasks complete
+      const tasks = getSliceTasks("M001", "S01");
+      assert.equal(tasks.length, 2);
+      assert.ok(tasks.every(t => isClosedStatus(t.status)), "all tasks should be closed");
+    });
+
+    test("step 6: complete slice S01 — all tasks done, slice closes", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Test", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      const slice = getSlice("M001", "S01");
+      assert.ok(slice, "S01 should exist");
+      assert.ok(isClosedStatus(slice!.status), `S01 should be closed, got: ${slice!.status}`);
+
+      // SUMMARY.md on disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "S01-SUMMARY.md should exist");
+    });
+
+    test("step 7: complete S02 task + slice — both slices done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", title: "Impl", status: "pending" });
+
+      // Complete task
+      const taskResult = await handleCompleteTask(makeTaskParams("T01", "S02", "M001") as any, base);
+      assert.ok(!("error" in taskResult), `task: ${JSON.stringify(taskResult)}`);
+
+      // Complete slice
+      const sliceResult = await handleCompleteSlice(makeSliceParams("S02", "M001") as any, base);
+      assert.ok(!("error" in sliceResult), `slice: ${JSON.stringify(sliceResult)}`);
+
+      // Both slices complete
+      const slices = getMilestoneSlices("M001");
+      assert.ok(slices.length >= 2, "should have 2+ slices");
+      assert.ok(slices.every(s => isClosedStatus(s.status)), "all slices should be closed");
+    });
+
+    test("step 8: complete milestone M001 — full lifecycle done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Test", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", title: "Impl", status: "complete" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      const milestone = getMilestone("M001");
+      assert.ok(milestone, "M001 should exist");
+      assert.ok(isClosedStatus(milestone!.status), `M001 should be closed, got: ${milestone!.status}`);
+
+      // SUMMARY.md on disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "M001-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "M001-SUMMARY.md should exist");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 2: Completion guard edge cases
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("completion guards — edge cases", () => {
+    test("cannot complete task with empty taskId", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      const result = await handleCompleteTask(makeTaskParams("", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /taskId is required/);
+    });
+
+    test("cannot complete task in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("cannot complete task in closed slice", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed slice/);
+    });
+
+    test("double task completion returns error (H5-related)", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+
+    test("cannot complete slice with zero tasks — vacuous truth guard", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      // No tasks inserted
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /no tasks found/);
+    });
+
+    test("cannot complete slice with incomplete tasks", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete tasks/);
+    });
+
+    test("double slice completion returns error", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+
+    test("cannot complete milestone with zero slices", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /no slices found/);
+    });
+
+    test("cannot complete milestone with incomplete slices", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete slices/);
+    });
+
+    test("cannot complete milestone with incomplete tasks in complete slice (deep check)", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      // Slice marked complete but task is still pending — simulates inconsistent state
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete tasks/);
+    });
+
+    test("cannot complete milestone without verificationPassed=true", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const params = makeMilestoneParams("M001");
+      params.verificationPassed = false;
+      const result = await handleCompleteMilestone(params as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /verification did not pass/);
+    });
+
+    test("double milestone completion returns error", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 3: Reopen operations
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("reopen operations", () => {
+    test("reopen task: resets completed task to pending", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01", reason: "Need to redo" },
+        base,
+      );
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task!.status, "pending");
+    });
+
+    test("cannot reopen task that is not complete", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /not complete/);
+    });
+
+    test("cannot reopen task in closed slice — must reopen slice first", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed slice/);
+    });
+
+    test("cannot reopen task in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("reopen slice: resets slice to in_progress and all tasks to pending", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenSlice(
+        { milestoneId: "M001", sliceId: "S01", reason: "Need rework" },
+        base,
+      );
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+      assert.equal((result as any).tasksReset, 2);
+
+      // Verify slice state
+      const slice = getSlice("M001", "S01");
+      assert.equal(slice!.status, "in_progress");
+
+      // Verify all tasks reset to pending
+      const tasks = getSliceTasks("M001", "S01");
+      assert.ok(tasks.every(t => t.status === "pending"), "all tasks should be pending after slice reopen");
+    });
+
+    test("cannot reopen slice in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenSlice(
+        { milestoneId: "M001", sliceId: "S01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("no reopen-milestone tool exists — milestone completion is irrevocable (H5)", async () => {
+      // This test documents the H5 finding: there is no handleReopenMilestone function.
+      // A completed milestone can only be undone via direct DB manipulation.
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+
+      const milestone = getMilestone("M001");
+      assert.ok(isClosedStatus(milestone!.status), "milestone is closed");
+
+      // The only escape is direct DB manipulation — no handler exists
+      updateMilestoneStatus("M001", "active", null);
+      const reopened = getMilestone("M001");
+      assert.equal(reopened!.status, "active", "direct DB manipulation can reopen, but no tool exposes this");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 4: Phantom parents and auto-creation (H6)
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("phantom parent auto-creation (H6)", () => {
+    test("completing task for non-existent milestone/slice auto-creates them", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // No milestone or slice pre-inserted — handler will auto-create
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S99", "M099") as any, base);
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+
+      // Phantom milestone created
+      const milestone = getMilestone("M099");
+      assert.ok(milestone, "phantom milestone M099 should exist");
+      assert.equal(milestone!.title, "", "phantom milestone has empty title");
+
+      // Phantom slice created
+      const slice = getSlice("M099", "S99");
+      assert.ok(slice, "phantom slice S99 should exist");
+    });
+
+    test("completing slice for non-existent milestone auto-creates it", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // Insert task to satisfy completion guard
+      insertMilestone({ id: "M099" });
+      insertSlice({ id: "S99", milestoneId: "M099" });
+      insertTask({ id: "T01", sliceId: "S99", milestoneId: "M099", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S99", "M099") as any, base);
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 5: State derivation consistency
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("state derivation with live DB", () => {
+    test("deriveStateFromDb reflects task completion immediately", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      invalidateStateCache();
+      const stateBefore = await deriveStateFromDb(base);
+      assert.equal(stateBefore.phase, "executing", `before: expected executing, got ${stateBefore.phase}`);
+
+      // Complete T01
+      updateTaskStatus("M001", "S01", "T01", "complete", new Date().toISOString());
+      invalidateStateCache();
+      const stateAfterT01 = await deriveStateFromDb(base);
+      // Still executing — T02 is pending
+      assert.equal(stateAfterT01.phase, "executing", `after T01: expected executing, got ${stateAfterT01.phase}`);
+
+      // Complete T02
+      updateTaskStatus("M001", "S01", "T02", "complete", new Date().toISOString());
+      invalidateStateCache();
+      const stateAfterT02 = await deriveStateFromDb(base);
+      // All tasks done → summarizing
+      assert.equal(stateAfterT02.phase, "summarizing", `after T02: expected summarizing, got ${stateAfterT02.phase}`);
+    });
+
+    test("deriveStateFromDb reflects slice completion → next slice or validating", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+      // S01 done, S02 has pending task → executing
+      assert.equal(state.phase, "executing", `expected executing for S02, got ${state.phase}`);
+      assert.equal(state.activeSlice?.id, "S02", "active slice should be S02");
+    });
+
+    test("deriveStateFromDb with all slices done → validating-milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "complete" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+      assert.equal(state.phase, "validating-milestone", `expected validating-milestone, got ${state.phase}`);
+    });
+
+    test("ghost milestone is skipped by deriveState", async () => {
+      base = makeTempDir();
+      const gsdDir = join(base, ".gsd", "milestones");
+      // M001 is ghost — empty dir
+      mkdirSync(join(gsdDir, "M001"), { recursive: true });
+      // M002 has content
+      mkdirSync(join(gsdDir, "M002"), { recursive: true });
+      writeFileSync(join(gsdDir, "M002", "M002-CONTEXT-DRAFT.md"), "# Draft\nContent.\n");
+
+      assert.ok(isGhostMilestone(base, "M001"), "M001 should be ghost");
+      assert.ok(!isGhostMilestone(base, "M002"), "M002 should not be ghost");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.equal(state.activeMilestone?.id, "M002", "should skip ghost M001 and use M002");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 6: Event log integrity
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("event log integrity across operations", () => {
+    test("full operation sequence produces correct event log", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete T01
+      await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      // Complete T02
+      await handleCompleteTask(makeTaskParams("T02", "S01", "M001") as any, base);
+      // Complete S01
+      await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+
+      // Should have 3 events: 2 task completions + 1 slice completion
+      assert.ok(events.length >= 3, `expected ≥3 events, got ${events.length}`);
+
+      const taskEvents = events.filter(e => e.cmd === "complete-task");
+      assert.equal(taskEvents.length, 2, "2 task completion events");
+
+      const sliceEvents = events.filter(e => e.cmd === "complete-slice");
+      assert.equal(sliceEvents.length, 1, "1 slice completion event");
+
+      // Events are ordered chronologically
+      for (let i = 1; i < events.length; i++) {
+        assert.ok(
+          events[i]!.ts >= events[i - 1]!.ts,
+          `events should be chronologically ordered: ${events[i - 1]!.ts} <= ${events[i]!.ts}`,
+        );
+      }
+
+      // All events have hashes and session IDs
+      for (const event of events) {
+        assert.ok(event.hash, "event should have hash");
+        assert.ok(event.session_id, "event should have session_id");
+      }
+    });
+
+    test("reopen operations produce events", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01", reason: "redo" },
+        base,
+      );
+
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+      const reopenEvent = events.find(e => e.cmd === "reopen-task");
+      assert.ok(reopenEvent, "should have reopen-task event");
+      assert.equal((reopenEvent!.params as any).taskId, "T01");
+      assert.equal((reopenEvent!.params as any).reason, "redo");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 7: Reopen-then-redo cycle
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("reopen-then-redo cycle", () => {
+    test("complete → reopen → M12: stale SUMMARY causes immediate auto-reconcile", async () => {
+      // Finding M12: reopen-task does NOT delete the SUMMARY.md from disk.
+      // The reopen handler's own post-mutation hook calls renderAllProjections
+      // which triggers deriveStateFromDb, which sees the stale SUMMARY.md and
+      // auto-reconciles the task BACK to "complete" (#2514) within the same call.
+      //
+      // Result: the reopen is effectively a no-op when filesystem artifacts exist.
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete — writes T01-SUMMARY.md to disk
+      const r1 = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok(!("error" in r1), `first complete: ${JSON.stringify(r1)}`);
+
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "SUMMARY.md exists after completion");
+
+      // Reopen — handler sets DB to "pending" in transaction, but post-mutation
+      // hook triggers reconciler which immediately sets it back to "complete"
+      const r2 = await handleReopenTask({ milestoneId: "M001", sliceId: "S01", taskId: "T01" }, base);
+      assert.ok(!("error" in r2), `reopen handler succeeded: ${JSON.stringify(r2)}`);
+
+      // M12: After reopen completes, DB shows "complete" not "pending" because
+      // the reconciler auto-corrected it from the stale SUMMARY.md
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task!.status, "complete", "M12: reconciler overrides reopen — task is back to complete");
+      assert.ok(existsSync(summaryPath), "M12: SUMMARY.md was never cleaned up");
+    });
+
+    test("complete slice → reopen → M12: reconciler overrides task reset via stale SUMMARY", async () => {
+      // Same M12 pattern at the slice level: reopen-slice resets all tasks to
+      // "pending" in DB, but task SUMMARY.md artifacts remain on disk. The
+      // reopen handler's post-mutation hook triggers reconciler which sees the
+      // stale artifacts and auto-corrects tasks back to "complete".
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete task + slice
+      await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok(isClosedStatus(getSlice("M001", "S01")!.status));
+
+      // Reopen slice — transaction resets slice to in_progress and task to pending,
+      // but post-mutation hook triggers reconciler which sees stale SUMMARY.md
+      await handleReopenSlice({ milestoneId: "M001", sliceId: "S01" }, base);
+
+      // Slice status is correctly in_progress (no slice SUMMARY reconciliation)
+      assert.equal(getSlice("M001", "S01")!.status, "in_progress");
+
+      // M12: Task was reset to "pending" in the transaction, but reconciler
+      // already corrected it back to "complete" from the stale SUMMARY.md
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task!.status, "complete", "M12: reconciler overrides reopen — task back to complete");
+    });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/memory-extractor.test.ts b/src/resources/extensions/gsd/tests/memory-extractor.test.ts
index 4df555470..47839f67b 100644
--- a/src/resources/extensions/gsd/tests/memory-extractor.test.ts
+++ b/src/resources/extensions/gsd/tests/memory-extractor.test.ts
@@ -1,4 +1,4 @@
-import { parseMemoryResponse, _resetExtractionState } from '../memory-extractor.ts';
+import { parseMemoryResponse, _resetExtractionState, buildMemoryLLMCall } from '../memory-extractor.ts';
 import {
   openDatabase,
   closeDatabase,
@@ -9,7 +9,7 @@ import {
   getActiveMemoriesRanked,
 } from '../memory-store.ts';
 import type { MemoryAction } from '../memory-store.ts';
-import { describe, test, beforeEach, afterEach } from 'node:test';
+import { describe, test, beforeEach, afterEach, mock } from 'node:test';
 import assert from 'node:assert/strict';
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -169,3 +169,86 @@ test('memory-extractor: reset extraction state', () => {
   assert.ok(true, '_resetExtractionState should not throw');
 });
 
+// ═══════════════════════════════════════════════════════════════════════════
+// memory-extractor: buildMemoryLLMCall resolves OAuth API key via modelRegistry
+// Regression test for #2959 — OAuth users had broken memory extraction
+// because streamSimpleAnthropic only checked env vars, not auth.json.
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('memory-extractor: buildMemoryLLMCall resolves API key from modelRegistry for OAuth users', async () => {
+  const OAUTH_TOKEN = 'sk-ant-oat-test-oauth-token-12345';
+  let getApiKeyCalled = false;
+
+  const fakeModel = {
+    id: 'claude-haiku-test',
+    provider: 'anthropic',
+    api: 'anthropic-messages',
+    cost: { input: 0.25, output: 1.25 },
+  };
+
+  const ctx = {
+    modelRegistry: {
+      getAvailable: () => [fakeModel],
+      getApiKey: async (_model: any) => {
+        getApiKeyCalled = true;
+        return OAUTH_TOKEN;
+      },
+    },
+  } as any;
+
+  const llmCallFn = buildMemoryLLMCall(ctx);
+  assert.ok(llmCallFn !== null, 'buildMemoryLLMCall should return a function when models are available');
+
+  // The function should have resolved the API key eagerly via modelRegistry.getApiKey.
+  // Give the async getApiKey a tick to resolve.
+  await new Promise(resolve => setTimeout(resolve, 50));
+  assert.ok(getApiKeyCalled, 'buildMemoryLLMCall must call modelRegistry.getApiKey() to resolve OAuth tokens');
+});
+
+test('memory-extractor: buildMemoryLLMCall returns null when no models available', () => {
+  const ctx = {
+    modelRegistry: {
+      getAvailable: () => [],
+      getApiKey: async () => undefined,
+    },
+  } as any;
+
+  const llmCallFn = buildMemoryLLMCall(ctx);
+  assert.strictEqual(llmCallFn, null, 'should return null when no models available');
+});
+
+test('memory-extractor: buildMemoryLLMCall prefers haiku model', async () => {
+  let resolvedModelId: string | undefined;
+
+  const haikuModel = {
+    id: 'claude-3-5-haiku-20241022',
+    provider: 'anthropic',
+    api: 'anthropic-messages',
+    cost: { input: 0.25, output: 1.25 },
+  };
+  const sonnetModel = {
+    id: 'claude-sonnet-4-20250514',
+    provider: 'anthropic',
+    api: 'anthropic-messages',
+    cost: { input: 3, output: 15 },
+  };
+
+  const ctx = {
+    modelRegistry: {
+      getAvailable: () => [sonnetModel, haikuModel],
+      getApiKey: async (model: any) => {
+        resolvedModelId = model.id;
+        return 'sk-ant-oat-test-token';
+      },
+    },
+  } as any;
+
+  const llmCallFn = buildMemoryLLMCall(ctx);
+  assert.ok(llmCallFn !== null, 'should return a function');
+
+  // Wait for the async getApiKey to resolve
+  await new Promise(resolve => setTimeout(resolve, 50));
+  assert.strictEqual(resolvedModelId, 'claude-3-5-haiku-20241022',
+    'should resolve API key for haiku model, not sonnet');
+});
+
diff --git a/src/resources/extensions/gsd/tests/migrate-external-worktree.test.ts b/src/resources/extensions/gsd/tests/migrate-external-worktree.test.ts
new file mode 100644
index 000000000..43098237b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/migrate-external-worktree.test.ts
@@ -0,0 +1,105 @@
+import { describe, test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+  existsSync,
+  mkdirSync,
+  realpathSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { migrateToExternalState } from "../migrate-external.ts";
+
+function run(command: string, cwd: string): string {
+  return execSync(command, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+describe("migrate-external worktree guard (#2970)", () => {
+  let base: string;
+  let stateDir: string;
+  let worktreePath: string;
+
+  before(() => {
+    base = realpathSync(mkdtempSync(join(tmpdir(), "gsd-migrate-wt-")));
+    stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-state-")));
+    process.env.GSD_STATE_DIR = stateDir;
+
+    // Create a git repo with a remote
+    run("git init -b main", base);
+    run('git config user.name "Test"', base);
+    run('git config user.email "test@example.com"', base);
+    run('git remote add origin git@github.com:example/repo.git', base);
+    writeFileSync(join(base, "README.md"), "# Test\n", "utf-8");
+    run("git add README.md", base);
+    run('git commit -m "init"', base);
+
+    // Create a worktree
+    worktreePath = join(base, ".gsd", "worktrees", "M001");
+    run(`git worktree add -b milestone/M001 ${worktreePath}`, base);
+
+    // Populate worktree with a .gsd directory (simulating syncGsdStateToWorktree)
+    const worktreeGsd = join(worktreePath, ".gsd");
+    mkdirSync(worktreeGsd, { recursive: true });
+    writeFileSync(join(worktreeGsd, "PREFERENCES.md"), "# prefs\n", "utf-8");
+  });
+
+  after(() => {
+    delete process.env.GSD_STATE_DIR;
+    // Remove worktree before cleaning up
+    try { run(`git worktree remove --force ${worktreePath}`, base); } catch { /* ok */ }
+    rmSync(base, { recursive: true, force: true });
+    rmSync(stateDir, { recursive: true, force: true });
+  });
+
+  test("migrateToExternalState skips when basePath is a git worktree", () => {
+    // The worktree has a real .gsd directory — migration would normally run.
+    // But since this is a worktree, it should be skipped.
+    const result = migrateToExternalState(worktreePath);
+
+    assert.equal(result.migrated, false, "should not migrate inside a worktree");
+    assert.equal(result.error, undefined, "should not report an error");
+
+    // .gsd should still exist as a real directory (not renamed/removed)
+    assert.ok(
+      existsSync(join(worktreePath, ".gsd")),
+      ".gsd directory should still exist after skipped migration"
+    );
+
+    // .gsd.migrating should NOT exist
+    assert.ok(
+      !existsSync(join(worktreePath, ".gsd.migrating")),
+      ".gsd.migrating should not be created in a worktree"
+    );
+  });
+
+  test("migrateToExternalState still works on main repo", () => {
+    // Create a fresh temp repo to test main repo migration path
+    const mainBase = realpathSync(mkdtempSync(join(tmpdir(), "gsd-migrate-main-")));
+    try {
+      run("git init -b main", mainBase);
+      run('git config user.name "Test"', mainBase);
+      run('git config user.email "test@example.com"', mainBase);
+      run('git remote add origin git@github.com:example/main-repo.git', mainBase);
+      writeFileSync(join(mainBase, "README.md"), "# Test\n", "utf-8");
+      run("git add README.md", mainBase);
+      run('git commit -m "init"', mainBase);
+
+      // Create a .gsd directory with content
+      mkdirSync(join(mainBase, ".gsd"), { recursive: true });
+      writeFileSync(join(mainBase, ".gsd", "PREFERENCES.md"), "# prefs\n", "utf-8");
+
+      const result = migrateToExternalState(mainBase);
+      assert.equal(result.migrated, true, "should migrate on main repo");
+    } finally {
+      rmSync(mainBase, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/milestone-status-authoritative.test.ts b/src/resources/extensions/gsd/tests/milestone-status-authoritative.test.ts
new file mode 100644
index 000000000..94fdcf3c0
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/milestone-status-authoritative.test.ts
@@ -0,0 +1,116 @@
+/**
+ * Bug #2807: Web roadmap derives milestone status from slice heuristics
+ * instead of authoritative GSD milestone state.
+ *
+ * getMilestoneStatus() should prefer the authoritative `status` field on
+ * WorkspaceMilestoneTarget (populated from the engine registry) rather
+ * than inferring status from slice completion flags.
+ */
+import test from "node:test";
+import assert from "node:assert/strict";
+import { getMilestoneStatus } from "../../../../../web/lib/workspace-status.ts";
+
+// Inline type to avoid importing .tsx (not compiled to .js by test pipeline)
+interface TestMilestone {
+  id: string;
+  title: string;
+  roadmapPath?: string;
+  status?: "complete" | "active" | "pending" | "parked";
+  validationVerdict?: "pass" | "needs-attention" | "needs-remediation";
+  slices: Array<{ id: string; title: string; done: boolean; tasks: Array<{ id: string; title: string; done: boolean }> }>;
+}
+
+// ── Helpers ────────────────────────────────────────────────────────────────
+
+function makeMilestone(overrides: Partial<TestMilestone> & { id: string }): TestMilestone {
+  return {
+    title: overrides.id,
+    roadmapPath: undefined,
+    slices: [],
+    ...overrides,
+  };
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────
+
+test("getMilestoneStatus returns authoritative 'complete' even when slices are not all done", () => {
+  const milestone = makeMilestone({
+    id: "M001",
+    status: "complete",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: false, tasks: [] }, // not done
+    ],
+  });
+  // Before the fix, this would return "in-progress" because not all slices are done.
+  // After the fix, it should return "done" because authoritative status is "complete".
+  assert.equal(getMilestoneStatus(milestone, {}), "done");
+});
+
+test("getMilestoneStatus returns authoritative 'active' regardless of slice state", () => {
+  const milestone = makeMilestone({
+    id: "M002",
+    status: "active",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: true, tasks: [] },
+    ],
+  });
+  // Before the fix, this would return "done" because all slices are done.
+  // After the fix, it should return "in-progress" because authoritative status is "active".
+  assert.equal(getMilestoneStatus(milestone, {}), "in-progress");
+});
+
+test("getMilestoneStatus returns 'pending' for authoritative 'pending' even when some slices done", () => {
+  const milestone = makeMilestone({
+    id: "M003",
+    status: "pending",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: false, tasks: [] },
+    ],
+  });
+  // Before the fix, this would return "in-progress" because some slices are done.
+  // After the fix, it should return "pending".
+  assert.equal(getMilestoneStatus(milestone, {}), "pending");
+});
+
+test("getMilestoneStatus maps 'parked' to 'pending' item status", () => {
+  const milestone = makeMilestone({
+    id: "M004",
+    status: "parked",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+    ],
+  });
+  // Parked milestones should render as pending in the UI
+  assert.equal(getMilestoneStatus(milestone, {}), "pending");
+});
+
+test("getMilestoneStatus falls back to heuristic when no authoritative status", () => {
+  // Backward compatibility: milestones without the status field should
+  // still work using the old slice-based heuristic.
+  const milestone = makeMilestone({
+    id: "M005",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+      { id: "S02", title: "Slice 2", done: true, tasks: [] },
+    ],
+  });
+  assert.equal(getMilestoneStatus(milestone, {}), "done");
+});
+
+test("getMilestoneStatus exposes validationVerdict on milestone target", () => {
+  const milestone = makeMilestone({
+    id: "M006",
+    status: "complete",
+    validationVerdict: "needs-attention",
+    slices: [
+      { id: "S01", title: "Slice 1", done: true, tasks: [] },
+    ],
+  });
+  // The milestone should have the validationVerdict field available
+  assert.equal(milestone.validationVerdict, "needs-attention");
+  // And status should still be "done"
+  assert.equal(getMilestoneStatus(milestone, {}), "done");
+});
diff --git a/src/resources/extensions/gsd/tests/model-cost-table.test.ts b/src/resources/extensions/gsd/tests/model-cost-table.test.ts
index 98906c083..4ab8381f0 100644
--- a/src/resources/extensions/gsd/tests/model-cost-table.test.ts
+++ b/src/resources/extensions/gsd/tests/model-cost-table.test.ts
@@ -67,3 +67,37 @@ test("all cost table entries have valid data", () => {
     assert.ok(entry.updatedAt, `${entry.id} missing updatedAt`);
   }
 });
+
+// ─── #2885: openai-codex and modern OpenAI models in cost table ──────────────
+
+test("#2885: cost table includes openai-codex provider models", () => {
+  const ids = BUNDLED_COST_TABLE.map(e => e.id);
+  const codexModels = [
+    "gpt-5.1", "gpt-5.1-codex-max", "gpt-5.1-codex-mini",
+    "gpt-5.2", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.4",
+  ];
+  for (const model of codexModels) {
+    assert.ok(ids.includes(model), `cost table should include openai-codex model "${model}"`);
+  }
+});
+
+test("#2885: cost table includes modern OpenAI models", () => {
+  const ids = BUNDLED_COST_TABLE.map(e => e.id);
+  const newModels = [
+    "o4-mini", "o4-mini-deep-research",
+    "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
+    "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-5-pro",
+  ];
+  for (const model of newModels) {
+    assert.ok(ids.includes(model), `cost table should include modern OpenAI model "${model}"`);
+  }
+});
+
+test("#2885: lookupModelCost returns costs for new models (not 999 fallback)", () => {
+  const newModels = ["o4-mini", "gpt-4.1", "gpt-5", "gpt-5.4", "gpt-5.1-codex-mini"];
+  for (const model of newModels) {
+    const entry = lookupModelCost(model);
+    assert.ok(entry, `lookupModelCost should find "${model}"`);
+    assert.ok(entry.inputPer1k < 999, `${model} should have a real cost, not the 999 fallback`);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/model-router.test.ts b/src/resources/extensions/gsd/tests/model-router.test.ts
index b22fce7fd..fb1128eb5 100644
--- a/src/resources/extensions/gsd/tests/model-router.test.ts
+++ b/src/resources/extensions/gsd/tests/model-router.test.ts
@@ -172,11 +172,11 @@ test("#2192: unknown model is not downgraded — respects user config", () => {
   const config = { ...defaultRoutingConfig(), enabled: true };
   const result = resolveModelForComplexity(
     makeClassification("light"),
-    { primary: "gpt-5.4", fallbacks: [] },
+    { primary: "some-future-unknown-model-v9", fallbacks: [] },
     config,
-    ["gpt-5.4", ...AVAILABLE_MODELS],
+    ["some-future-unknown-model-v9", ...AVAILABLE_MODELS],
   );
-  assert.equal(result.modelId, "gpt-5.4", "unknown model should be used as-is");
+  assert.equal(result.modelId, "some-future-unknown-model-v9", "unknown model should be used as-is");
   assert.equal(result.wasDowngraded, false, "should not be downgraded");
   assert.ok(result.reason.includes("not in the known tier map"), "reason should explain why");
 });
@@ -205,3 +205,68 @@ test("#2192: known model is still downgraded normally", () => {
   assert.equal(result.wasDowngraded, true, "known heavy model should still be downgraded for light tasks");
   assert.notEqual(result.modelId, "claude-opus-4-6");
 });
+
+// ─── #2885: openai-codex and modern OpenAI models in tier map ────────────────
+
+test("#2885: openai-codex light-tier models are recognized", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const lightModels = ["gpt-4.1-mini", "gpt-4.1-nano", "gpt-5-mini", "gpt-5-nano", "gpt-5.1-codex-mini", "gpt-5.3-codex-spark"];
+  for (const model of lightModels) {
+    const result = resolveModelForComplexity(
+      makeClassification("light"),
+      { primary: model, fallbacks: [] },
+      config,
+      [model, ...AVAILABLE_MODELS],
+    );
+    // Model is known AND light-tier, so requesting light should NOT downgrade
+    assert.equal(result.wasDowngraded, false, `${model} should be known as light tier (wasDowngraded)`);
+    assert.equal(result.modelId, model, `${model} should be returned as-is for light tier`);
+    // Verify it IS known (not hitting the unknown-model bail-out)
+    assert.ok(!result.reason.includes("not in the known tier map"), `${model} should be in the known tier map`);
+  }
+});
+
+test("#2885: openai-codex standard-tier models are recognized", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const standardModels = ["gpt-4.1", "gpt-5.1-codex-max"];
+  for (const model of standardModels) {
+    const result = resolveModelForComplexity(
+      makeClassification("standard"),
+      { primary: model, fallbacks: [] },
+      config,
+      [model, ...AVAILABLE_MODELS],
+    );
+    assert.equal(result.wasDowngraded, false, `${model} should be known as standard tier`);
+    assert.equal(result.modelId, model, `${model} should be returned as-is for standard tier`);
+    assert.ok(!result.reason.includes("not in the known tier map"), `${model} should be in the known tier map`);
+  }
+});
+
+test("#2885: openai-codex heavy-tier models are recognized", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const heavyModels = ["gpt-5", "gpt-5-pro", "gpt-5.1", "gpt-5.2", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.4", "o4-mini", "o4-mini-deep-research"];
+  for (const model of heavyModels) {
+    const result = resolveModelForComplexity(
+      makeClassification("heavy"),
+      { primary: model, fallbacks: [] },
+      config,
+      [model, ...AVAILABLE_MODELS],
+    );
+    assert.equal(result.wasDowngraded, false, `${model} should be known as heavy tier`);
+    assert.equal(result.modelId, model, `${model} should be returned as-is for heavy tier`);
+    assert.ok(!result.reason.includes("not in the known tier map"), `${model} should be in the known tier map`);
+  }
+});
+
+test("#2885: heavy openai-codex model downgrades to light for light task", () => {
+  const config = { ...defaultRoutingConfig(), enabled: true };
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "gpt-5.4", fallbacks: [] },
+    config,
+    ["gpt-5.4", "gpt-4.1-nano", ...AVAILABLE_MODELS],
+  );
+  assert.equal(result.wasDowngraded, true, "heavy model should downgrade for light task");
+  // Should pick a light-tier model
+  assert.notEqual(result.modelId, "gpt-5.4", "should not use the heavy model for light task");
+});
diff --git a/src/resources/extensions/gsd/tests/model-unittype-mapping.test.ts b/src/resources/extensions/gsd/tests/model-unittype-mapping.test.ts
index c23d1f4b2..82267a3e1 100644
--- a/src/resources/extensions/gsd/tests/model-unittype-mapping.test.ts
+++ b/src/resources/extensions/gsd/tests/model-unittype-mapping.test.ts
@@ -154,6 +154,34 @@ test("all auto-dispatch unitTypes have preference mapping or subagent handling",
   assert.deepEqual(unmapped, [], `Unmapped unitTypes in preferences-models.ts: ${unmapped.join(", ")}`);
 });
 
+// ═══════════════════════════════════════════════════════════════════════════
+// #2900: worktree-merge must map to completion phase
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("#2900: resolveModelWithFallbacksForUnit handles worktree-merge", () => {
+  assert.ok(preferencesSrc.includes('"worktree-merge"'), "missing worktree-merge case in switch");
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// #2900: KNOWN_UNIT_TYPES must include all dispatched unit types
+// ═══════════════════════════════════════════════════════════════════════════
+
+const preferenceTypesSrc = readSrc("preferences-types.ts");
+
+test("#2900: KNOWN_UNIT_TYPES includes all auto-dispatch unit types", () => {
+  const missing: string[] = [];
+  for (const ut of ALL_KNOWN_UNIT_TYPES) {
+    if (!preferenceTypesSrc.includes(`"${ut}"`)) {
+      missing.push(ut);
+    }
+  }
+  assert.deepEqual(missing, [], `Missing from KNOWN_UNIT_TYPES: ${missing.join(", ")}`);
+});
+
+test("#2900: KNOWN_UNIT_TYPES includes worktree-merge", () => {
+  assert.ok(preferenceTypesSrc.includes('"worktree-merge"'), "worktree-merge missing from KNOWN_UNIT_TYPES");
+});
+
 // ═══════════════════════════════════════════════════════════════════════════
 // metrics.ts: classifyUnitPhase coverage
 // ═══════════════════════════════════════════════════════════════════════════
diff --git a/src/resources/extensions/gsd/tests/notifications.test.ts b/src/resources/extensions/gsd/tests/notifications.test.ts
index b833c667b..0331f5956 100644
--- a/src/resources/extensions/gsd/tests/notifications.test.ts
+++ b/src/resources/extensions/gsd/tests/notifications.test.ts
@@ -4,6 +4,7 @@ import assert from "node:assert/strict";
 import {
   buildDesktopNotificationCommand,
   shouldSendDesktopNotification,
+  formatNotificationTitle,
 } from "../notifications.js";
 import type { NotificationPreferences } from "../types.js";
 
@@ -87,3 +88,47 @@ test("buildDesktopNotificationCommand preserves literal shell characters on linu
 test("buildDesktopNotificationCommand skips unsupported platforms", () => {
   assert.equal(buildDesktopNotificationCommand("win32", "Title", "Message"), null);
 });
+
+// ─── formatNotificationTitle — project context in notifications (#2708) ──────
+
+test("formatNotificationTitle returns 'GSD' when no project name is given", () => {
+  assert.equal(formatNotificationTitle(), "GSD");
+  assert.equal(formatNotificationTitle(undefined), "GSD");
+  assert.equal(formatNotificationTitle(""), "GSD");
+});
+
+test("formatNotificationTitle includes project name when provided", () => {
+  assert.equal(formatNotificationTitle("my-app"), "GSD — my-app");
+});
+
+test("formatNotificationTitle trims whitespace from project name", () => {
+  assert.equal(formatNotificationTitle("  spaced  "), "GSD — spaced");
+});
+
+test("buildDesktopNotificationCommand includes project name in title on linux", () => {
+  const command = buildDesktopNotificationCommand(
+    "linux",
+    formatNotificationTitle("my-project"),
+    "All milestones complete!",
+    "success",
+  );
+  assert.ok(command);
+  assert.equal(command.args[2], "GSD — my-project");
+  assert.equal(command.args[3], "All milestones complete!");
+});
+
+test("buildDesktopNotificationCommand includes project name in title on macOS", () => {
+  const command = buildDesktopNotificationCommand(
+    "darwin",
+    formatNotificationTitle("my-project"),
+    "Budget 90%",
+    "warning",
+  );
+  assert.ok(command);
+  if (command.file.includes("terminal-notifier")) {
+    const titleIdx = command.args.indexOf("-title");
+    assert.equal(command.args[titleIdx + 1], "GSD — my-project");
+  } else {
+    assert.match(command.args[1], /GSD — my-project/);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-commit-scope.test.ts b/src/resources/extensions/gsd/tests/parallel-commit-scope.test.ts
new file mode 100644
index 000000000..f7de95667
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-commit-scope.test.ts
@@ -0,0 +1,159 @@
+/**
+ * parallel-commit-scope.test.ts — Regression test for #1991.
+ *
+ * Parallel workers must only commit files belonging to their locked milestone.
+ * When GSD_MILESTONE_LOCK is set, smartStage() must exclude .gsd/milestones/<M>/
+ * directories for milestones other than the locked one.
+ *
+ * Without the fix, a worker for M033 can stage and commit fabricated artifacts
+ * under .gsd/milestones/M032/, causing cross-milestone pollution.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import {
+  GitServiceImpl,
+} from "../git-service.ts";
+
+function run(command: string, cwd: string): string {
+  const [cmd, ...args] = command.split(" ");
+  return execFileSync(cmd, args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function gitRun(args: string[], cwd: string): string {
+  return execFileSync("git", args, { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+}
+
+function createFile(base: string, relPath: string, content: string): void {
+  const full = join(base, relPath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content, "utf-8");
+}
+
+function initTempRepo(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-parallel-scope-"));
+  gitRun(["init", "-b", "main"], dir);
+  gitRun(["config", "user.name", "Test"], dir);
+  gitRun(["config", "user.email", "test@test.com"], dir);
+  createFile(dir, ".gitkeep", "");
+  gitRun(["add", "-A"], dir);
+  gitRun(["commit", "-m", "init"], dir);
+  return dir;
+}
+
+describe("parallel commit scope (#1991)", () => {
+  const savedEnv: Record<string, string | undefined> = {};
+
+  beforeEach(() => {
+    savedEnv.GSD_MILESTONE_LOCK = process.env.GSD_MILESTONE_LOCK;
+    savedEnv.GSD_PARALLEL_WORKER = process.env.GSD_PARALLEL_WORKER;
+  });
+
+  afterEach(() => {
+    for (const key of ["GSD_MILESTONE_LOCK", "GSD_PARALLEL_WORKER"] as const) {
+      if (savedEnv[key] === undefined) {
+        delete process.env[key];
+      } else {
+        process.env[key] = savedEnv[key];
+      }
+    }
+  });
+
+  test("autoCommit excludes other milestone directories when GSD_MILESTONE_LOCK is set", () => {
+    const repo = initTempRepo();
+
+    // Set up parallel worker environment for M033
+    process.env.GSD_MILESTONE_LOCK = "M033";
+    process.env.GSD_PARALLEL_WORKER = "1";
+
+    // Create dirty files in BOTH milestones (simulates cross-milestone pollution)
+    createFile(repo, ".gsd/milestones/M032/M032-SUMMARY.md", "# M032 Summary\nFabricated by M033 worker");
+    createFile(repo, ".gsd/milestones/M032/M032-VALIDATION.md", "# M032 Validation\nFabricated");
+    createFile(repo, ".gsd/milestones/M032/slices/S01/S01-SUMMARY.md", "Fabricated S01 summary");
+    createFile(repo, ".gsd/milestones/M033/slices/S01/tasks/T01-SUMMARY.md", "Legit T01 summary");
+    createFile(repo, "src/feature.ts", "export const x = 1;");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("complete-milestone", "M033/complete");
+    assert.ok(msg !== null, "autoCommit should produce a commit");
+
+    const committed = gitRun(["show", "--name-only", "HEAD"], repo);
+
+    // Source files and own milestone files SHOULD be committed
+    assert.ok(committed.includes("src/feature.ts"), "source files are committed");
+    assert.ok(committed.includes(".gsd/milestones/M033/"), "own milestone files are committed");
+
+    // Other milestone files MUST NOT be committed
+    assert.ok(!committed.includes(".gsd/milestones/M032/"),
+      "M032 files must NOT be committed by M033 worker — cross-milestone pollution (#1991)");
+
+    // Verify M032 files are still dirty (unstaged) in the working tree
+    const status = gitRun(["status", "--porcelain"], repo);
+    assert.ok(status.includes("M032"), "M032 files remain as untracked/dirty in working tree");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  test("autoCommit stages all milestones when GSD_MILESTONE_LOCK is NOT set (solo mode)", () => {
+    const repo = initTempRepo();
+
+    // No milestone lock — solo worker mode
+    delete process.env.GSD_MILESTONE_LOCK;
+    delete process.env.GSD_PARALLEL_WORKER;
+
+    createFile(repo, ".gsd/milestones/M032/M032-SUMMARY.md", "# M032 Summary");
+    createFile(repo, ".gsd/milestones/M033/slices/S01/tasks/T01-SUMMARY.md", "T01 summary");
+    createFile(repo, "src/feature.ts", "export const x = 1;");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("complete-milestone", "M032/complete");
+    assert.ok(msg !== null, "autoCommit should produce a commit");
+
+    const committed = gitRun(["show", "--name-only", "HEAD"], repo);
+
+    // In solo mode, ALL milestone files should be committed
+    assert.ok(committed.includes(".gsd/milestones/M032/"), "M032 files committed in solo mode");
+    assert.ok(committed.includes(".gsd/milestones/M033/"), "M033 files committed in solo mode");
+    assert.ok(committed.includes("src/feature.ts"), "source files committed in solo mode");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  test("autoCommit scopes to locked milestone even with multiple foreign milestones", () => {
+    const repo = initTempRepo();
+
+    process.env.GSD_MILESTONE_LOCK = "M035";
+    process.env.GSD_PARALLEL_WORKER = "1";
+
+    // Create files across many milestones
+    createFile(repo, ".gsd/milestones/M032/M032-SUMMARY.md", "foreign");
+    createFile(repo, ".gsd/milestones/M033/M033-SUMMARY.md", "foreign");
+    createFile(repo, ".gsd/milestones/M034/M034-SUMMARY.md", "foreign");
+    createFile(repo, ".gsd/milestones/M035/slices/S01/tasks/T01-SUMMARY.md", "own work");
+    createFile(repo, "src/app.ts", "export const app = {};");
+
+    const svc = new GitServiceImpl(repo);
+    const msg = svc.autoCommit("execute-task", "M035/S01/T01");
+    assert.ok(msg !== null, "autoCommit should produce a commit");
+
+    const committed = gitRun(["show", "--name-only", "HEAD"], repo);
+
+    assert.ok(committed.includes(".gsd/milestones/M035/"), "own milestone committed");
+    assert.ok(committed.includes("src/app.ts"), "source files committed");
+    assert.ok(!committed.includes(".gsd/milestones/M032/"), "M032 excluded");
+    assert.ok(!committed.includes(".gsd/milestones/M033/"), "M033 excluded");
+    assert.ok(!committed.includes(".gsd/milestones/M034/"), "M034 excluded");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/parallel-eligibility-ghost.test.ts b/src/resources/extensions/gsd/tests/parallel-eligibility-ghost.test.ts
new file mode 100644
index 000000000..ae8b87791
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/parallel-eligibility-ghost.test.ts
@@ -0,0 +1,150 @@
+/**
+ * Tests for parallel eligibility edge cases:
+ * - Ghost milestones (no registry entry) must NOT appear eligible (#2501 Bug 2)
+ * - Milestones with failed worktree merge (SUMMARY only in worktree, DB still
+ *   "active") must NOT appear eligible (#2501 Bug 1 context)
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { analyzeParallelEligibility } from "../parallel-eligibility.ts";
+import { invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  updateMilestoneStatus,
+} from "../gsd-db.ts";
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-parallel-elig-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeMilestoneFile(
+  base: string,
+  milestoneId: string,
+  filename: string,
+  content: string,
+): void {
+  const filePath = join(base, ".gsd", "milestones", milestoneId, filename);
+  mkdirSync(join(filePath, ".."), { recursive: true });
+  writeFileSync(filePath, content);
+}
+
+function makeMilestoneDir(base: string, milestoneId: string): void {
+  mkdirSync(join(base, ".gsd", "milestones", milestoneId), { recursive: true });
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+// ─── Tests ─────────────────────────────────────────────────────────────────
+
+describe("parallel-eligibility: ghost milestone ineligibility (#2501)", () => {
+  let base: string;
+
+  beforeEach(() => {
+    base = createFixtureBase();
+    openDatabase(":memory:");
+  });
+
+  afterEach(() => {
+    closeDatabase();
+    cleanup(base);
+    invalidateStateCache();
+  });
+
+  test("ghost milestone (directory only, no planning files) is ineligible", async () => {
+    // Set up a real milestone M001 with proper planning data in DB
+    writeMilestoneFile(base, "M001", "M001-CONTEXT.md", "# M001: Real Milestone\n\nA real milestone.");
+    writeMilestoneFile(base, "M001", "M001-ROADMAP.md", "# M001: Real Milestone\n\n## Slices\n\n- [ ] **S01: First Slice** `risk:low` `depends:[]`\n  > Do something.\n");
+    writeMilestoneFile(base, "M001", "slices/S01/S01-PLAN.md", "# S01: First Slice\n\n**Goal:** Do it.\n**Demo:** Done.\n\n## Tasks\n\n- [ ] **T01: Task One** `est:10m`\n  Do the thing.\n");
+    insertMilestone({ id: "M001", title: "M001: Real Milestone", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "First Slice", status: "active", risk: "low", depends: [] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Task One", status: "pending" });
+
+    // Create ghost milestone M017 — directory with only slices/, no CONTEXT/ROADMAP/SUMMARY
+    makeMilestoneDir(base, "M017");
+    mkdirSync(join(base, ".gsd", "milestones", "M017", "slices"), { recursive: true });
+
+    invalidateStateCache();
+    const result = await analyzeParallelEligibility(base);
+
+    // M017 should NOT be in the eligible list
+    const ghostEligible = result.eligible.find(e => e.milestoneId === "M017");
+    assert.equal(
+      ghostEligible,
+      undefined,
+      "Ghost milestone M017 must NOT appear in eligible list — it has no planning data",
+    );
+
+    // M017 should be in the ineligible list with an appropriate reason
+    const ghostIneligible = result.ineligible.find(e => e.milestoneId === "M017");
+    assert.ok(
+      ghostIneligible,
+      "Ghost milestone M017 must appear in ineligible list",
+    );
+    assert.equal(ghostIneligible!.eligible, false);
+    assert.match(
+      ghostIneligible!.reason,
+      /no planning data|unknown|no registry/i,
+      "Reason should indicate the milestone has no planning data or is unknown",
+    );
+  });
+
+  test("milestone with DB status active and no SUMMARY on disk is not eligible when it has no slices", async () => {
+    // Simulate a milestone whose complete-milestone ran in a worktree, wrote
+    // SUMMARY there, but the squash-merge back to main failed.  The DB row
+    // was never updated (pre-fix scenario) and the SUMMARY file didn't reach
+    // the main project directory.
+    //
+    // In the current codebase, complete-milestone.ts already writes the DB
+    // status (Bug 1 was fixed). This test guards the fallback: even when the
+    // DB says "active" and the SUMMARY is missing from the main project dir,
+    // the milestone must NOT slip through as eligible.
+
+    // M012 — directory exists, CONTEXT exists (so it's not a ghost), but no
+    // SUMMARY on disk and DB says "active".  No slices in DB either (they
+    // lived only in the worktree DB copy).
+    writeMilestoneFile(base, "M012", "M012-CONTEXT.md", "# M012: Worktree Milestone\n\nThis ran in a worktree.");
+    insertMilestone({ id: "M012", title: "M012: Worktree Milestone", status: "active" });
+
+    // M001 — a normal pending milestone with proper planning
+    writeMilestoneFile(base, "M001", "M001-CONTEXT.md", "# M001: Normal Milestone\n\nNormal milestone.");
+    writeMilestoneFile(base, "M001", "M001-ROADMAP.md", "# M001: Normal Milestone\n\n## Slices\n\n- [ ] **S01: Slice** `risk:low` `depends:[]`\n  > Do it.\n");
+    writeMilestoneFile(base, "M001", "slices/S01/S01-PLAN.md", "# S01: Slice\n\n**Goal:** Do.\n**Demo:** Done.\n\n## Tasks\n\n- [ ] **T01: Task** `est:10m`\n  Do.\n");
+    insertMilestone({ id: "M001", title: "M001: Normal Milestone", status: "active" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "active", risk: "low", depends: [] });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Task", status: "pending" });
+
+    invalidateStateCache();
+    const result = await analyzeParallelEligibility(base);
+
+    // M001 should be eligible (it has proper planning and active status)
+    const m001 = result.eligible.find(e => e.milestoneId === "M001");
+    assert.ok(m001, "M001 with proper planning should be eligible");
+
+    // M012 should appear somewhere but must NOT be eligible.  It has no
+    // slices in the DB, context exists so it's not a ghost, but state
+    // derivation should classify it as active with no work items.  Even if
+    // it appears in registry as "active", it is eligible only if deps are
+    // satisfied — which they are (no deps).  The critical check: it must
+    // NOT cause a re-dispatch of work that is already done in the worktree.
+    //
+    // NOTE: This test documents the current behavior.  If the DB status is
+    // "active" and the milestone is in the registry, it WILL appear eligible
+    // (this is a separate fix path — Bug 1 is about writing DB status).
+    // We verify the fix path through Bug 2's ghost handling above.
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/plan-milestone-title.test.ts b/src/resources/extensions/gsd/tests/plan-milestone-title.test.ts
new file mode 100644
index 000000000..1ee49d335
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/plan-milestone-title.test.ts
@@ -0,0 +1,70 @@
+/**
+ * Regression test for #2879: gsd_plan_milestone silently drops milestone title
+ * when the DB row pre-exists from state reconciliation.
+ *
+ * Scenario: state reconciliation inserts a milestone row with an empty title
+ * (INSERT OR IGNORE). When gsd_plan_milestone is called later with a title,
+ * the title must be persisted — not silently dropped.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  getMilestone,
+  upsertMilestonePlanning,
+} from "../gsd-db.ts";
+
+test("upsertMilestonePlanning updates title when DB row pre-exists with empty title (#2879)", () => {
+  try {
+    openDatabase(":memory:");
+
+    // Step 1: Simulate state reconciliation — inserts milestone with empty title
+    insertMilestone({ id: "M099", status: "active" });
+    const before = getMilestone("M099");
+    assert.ok(before, "milestone row should exist after insertMilestone");
+    assert.equal(before.title, "", "title should be empty after reconciliation insert");
+
+    // Step 2: Simulate gsd_plan_milestone — insertMilestone is called again
+    // with a title, but INSERT OR IGNORE skips it since the row exists.
+    insertMilestone({ id: "M099", title: "My Important Milestone", status: "active" });
+    const afterInsert = getMilestone("M099");
+    assert.ok(afterInsert);
+    // The INSERT OR IGNORE means title is still empty — this is the known limitation
+    assert.equal(afterInsert.title, "", "INSERT OR IGNORE does not update existing row");
+
+    // Step 3: upsertMilestonePlanning should update the title
+    upsertMilestonePlanning("M099", {
+      vision: "Test vision",
+    }, "My Important Milestone");
+    const afterUpsert = getMilestone("M099");
+    assert.ok(afterUpsert);
+    assert.equal(
+      afterUpsert.title,
+      "My Important Milestone",
+      "title must be updated by upsertMilestonePlanning when row pre-exists",
+    );
+  } finally {
+    closeDatabase();
+  }
+});
+
+test("upsertMilestonePlanning preserves existing title when no title argument provided", () => {
+  try {
+    openDatabase(":memory:");
+
+    // Insert milestone with a title
+    insertMilestone({ id: "M100", title: "Original Title", status: "active" });
+
+    // Call upsertMilestonePlanning without a title — should preserve existing
+    upsertMilestonePlanning("M100", { vision: "Updated vision" });
+    const after = getMilestone("M100");
+    assert.ok(after);
+    assert.equal(after.title, "Original Title", "existing title must be preserved when no title argument given");
+  } finally {
+    closeDatabase();
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/plan-milestone.test.ts b/src/resources/extensions/gsd/tests/plan-milestone.test.ts
index 5aad5017c..68c78804f 100644
--- a/src/resources/extensions/gsd/tests/plan-milestone.test.ts
+++ b/src/resources/extensions/gsd/tests/plan-milestone.test.ts
@@ -4,7 +4,7 @@ import { mkdtempSync, mkdirSync, rmSync, readFileSync, existsSync, writeFileSync
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
-import { openDatabase, closeDatabase, getMilestone, getMilestoneSlices } from '../gsd-db.ts';
+import { openDatabase, closeDatabase, getMilestone, getMilestoneSlices, updateSliceStatus } from '../gsd-db.ts';
 import { handlePlanMilestone } from '../tools/plan-milestone.ts';
 import { parseRoadmap } from '../parsers-legacy.ts';
 
@@ -197,3 +197,35 @@ test('handlePlanMilestone reruns idempotently and updates existing planning stat
     cleanup(base);
   }
 });
+
+// Regression: #2960 — plan-milestone must refuse to overwrite completed slices
+test('handlePlanMilestone refuses to re-plan a milestone with completed slices (#2960)', async () => {
+  const base = makeTmpBase();
+  const dbPath = join(base, '.gsd', 'gsd.db');
+  openDatabase(dbPath);
+
+  try {
+    // First plan succeeds
+    const first = await handlePlanMilestone(validParams(), base);
+    assert.ok(!('error' in first), `initial plan should succeed: ${'error' in first ? first.error : ''}`);
+
+    // Mark S01 as complete
+    updateSliceStatus('M001', 'S01', 'complete');
+
+    // Second plan should fail — S01 is already complete
+    const second = await handlePlanMilestone({
+      ...validParams(),
+      vision: 'Should not overwrite',
+    }, base);
+    assert.ok('error' in second, 'should refuse to re-plan when slices are completed');
+    assert.match(second.error, /cannot re-plan/i);
+    assert.match(second.error, /S01/);
+
+    // Verify the completed slice was not overwritten
+    const slices = getMilestoneSlices('M001');
+    const s01 = slices.find(s => s.id === 'S01');
+    assert.equal(s01?.status, 'complete', 'S01 should still be complete');
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts b/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts
new file mode 100644
index 000000000..22b451c4a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/project-relocation-recovery.test.ts
@@ -0,0 +1,297 @@
+/**
+ * Project Relocation Recovery Tests (#2750)
+ *
+ * Verifies that moving/renaming a GSD project directory does not cause
+ * silent data loss. When a repo has a remote URL, the identity hash
+ * should be based solely on the remote — making moves transparent.
+ *
+ * For local-only repos (no remote), ensureGsdSymlink should detect
+ * orphaned state directories with a matching .gsd-id marker and
+ * recover them automatically.
+ */
+
+import { describe, test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+  readFileSync,
+  existsSync,
+  realpathSync,
+  mkdirSync,
+  readdirSync,
+  renameSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import {
+  repoIdentity,
+  externalGsdRoot,
+  ensureGsdSymlink,
+  readRepoMeta,
+  externalProjectsRoot,
+} from "../repo-identity.ts";
+
+function git(args: string[], cwd: string): string {
+  return execFileSync("git", args, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+function normalizePath(p: string): string {
+  const resolved =
+    process.platform === "win32" ? realpathSync.native(p) : realpathSync(p);
+  return process.platform === "win32" ? resolved.toLowerCase() : resolved;
+}
+
+function initRepo(dir: string, remote?: string): void {
+  git(["init", "-b", "main"], dir);
+  git(["config", "user.name", "Test"], dir);
+  git(["config", "user.email", "test@example.com"], dir);
+  if (remote) {
+    git(["remote", "add", "origin", remote], dir);
+  }
+  writeFileSync(join(dir, "README.md"), "# Test\n", "utf-8");
+  git(["add", "README.md"], dir);
+  git(["commit", "-m", "init"], dir);
+}
+
+describe("project-relocation-recovery (#2750)", () => {
+  let stateDir: string;
+  let savedStateDir: string | undefined;
+
+  before(() => {
+    savedStateDir = process.env.GSD_STATE_DIR;
+    stateDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-state-")));
+    process.env.GSD_STATE_DIR = stateDir;
+  });
+
+  after(() => {
+    if (savedStateDir !== undefined) {
+      process.env.GSD_STATE_DIR = savedStateDir;
+    } else {
+      delete process.env.GSD_STATE_DIR;
+    }
+    rmSync(stateDir, { recursive: true, force: true });
+  });
+
+  // ── Remote repos: identity should be path-independent ─────────────────
+
+  test("repoIdentity is stable across moves for repos with a remote URL", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-a-")));
+    initRepo(repoA, "https://github.com/example/myrepo.git");
+
+    const identityBefore = repoIdentity(repoA);
+
+    // Move the repo to a new location
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    const identityAfter = repoIdentity(repoB);
+
+    assert.strictEqual(
+      identityAfter,
+      identityBefore,
+      "identity hash must be stable when a remote-enabled repo is moved",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  test("ensureGsdSymlink reuses the same external dir after repo move (remote repo)", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-reuse-a-")));
+    initRepo(repoA, "https://github.com/example/reloc-reuse.git");
+
+    // Initialize GSD state with some planning data
+    const externalA = ensureGsdSymlink(repoA);
+    const milestonesPath = join(externalA, "milestones");
+    mkdirSync(milestonesPath, { recursive: true });
+    writeFileSync(
+      join(milestonesPath, "M001.md"),
+      "# Milestone 1\nImportant planning data\n",
+      "utf-8",
+    );
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-reuse-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    // ensureGsdSymlink at the new location should find the same external dir
+    const externalB = ensureGsdSymlink(repoB);
+
+    assert.strictEqual(
+      normalizePath(externalB),
+      normalizePath(externalA),
+      "external state dir must be the same after move",
+    );
+
+    // Planning data must survive the move
+    assert.ok(
+      existsSync(join(externalB, "milestones", "M001.md")),
+      "milestone data must survive project relocation",
+    );
+
+    const content = readFileSync(
+      join(externalB, "milestones", "M001.md"),
+      "utf-8",
+    );
+    assert.ok(
+      content.includes("Important planning data"),
+      "milestone content must be preserved",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  test("repo-meta.json gitRoot is updated after move (remote repo)", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-meta-a-")));
+    initRepo(repoA, "https://github.com/example/reloc-meta.git");
+
+    const externalA = ensureGsdSymlink(repoA);
+    const metaBefore = readRepoMeta(externalA);
+    assert.ok(metaBefore !== null, "metadata should exist before move");
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-meta-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    const externalB = ensureGsdSymlink(repoB);
+    const metaAfter = readRepoMeta(externalB);
+    assert.ok(metaAfter !== null, "metadata should exist after move");
+    assert.strictEqual(
+      normalizePath(metaAfter!.gitRoot),
+      normalizePath(repoB),
+      "repo-meta.json gitRoot must be updated to new location",
+    );
+    assert.strictEqual(
+      metaAfter!.createdAt,
+      metaBefore!.createdAt,
+      "createdAt must be preserved across moves",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  // ── Local-only repos: .gsd-id marker provides recovery ────────────────
+
+  test("ensureGsdSymlink writes a .gsd-id marker in the project root", () => {
+    const repo = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-marker-")));
+    initRepo(repo);
+
+    ensureGsdSymlink(repo);
+
+    const markerPath = join(repo, ".gsd-id");
+    assert.ok(existsSync(markerPath), ".gsd-id marker must be written by ensureGsdSymlink");
+
+    const markerId = readFileSync(markerPath, "utf-8").trim();
+    const computedId = repoIdentity(repo);
+    assert.strictEqual(markerId, computedId, ".gsd-id must contain the repo identity hash");
+
+    rmSync(repo, { recursive: true, force: true });
+  });
+
+  test("local-only repo recovers state via .gsd-id marker after move", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-local-a-")));
+    initRepo(repoA);
+    // No remote — identity includes gitRoot
+
+    // Initialize GSD state
+    const externalA = ensureGsdSymlink(repoA);
+    mkdirSync(join(externalA, "milestones"), { recursive: true });
+    writeFileSync(
+      join(externalA, "milestones", "M001.md"),
+      "# Local Milestone\n",
+      "utf-8",
+    );
+
+    const identityBefore = repoIdentity(repoA);
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-local-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    // The identity WILL change (no remote, gitRoot changed)
+    const identityAfter = repoIdentity(repoB);
+    assert.notStrictEqual(
+      identityAfter,
+      identityBefore,
+      "local-only repo identity changes with move (expected)",
+    );
+
+    // But ensureGsdSymlink should detect .gsd-id marker and recover
+    const externalB = ensureGsdSymlink(repoB);
+    assert.ok(
+      existsSync(join(externalB, "milestones", "M001.md")),
+      "local-only repo must recover state via .gsd-id marker after move",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  // ── Edge cases ────────────────────────────────────────────────────────
+
+  test("identity remains different for repos with different remotes", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-diff-a-")));
+    initRepo(repoA, "https://github.com/example/repo-alpha.git");
+
+    const repoB = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-diff-b-")));
+    initRepo(repoB, "https://github.com/example/repo-beta.git");
+
+    assert.notStrictEqual(
+      repoIdentity(repoA),
+      repoIdentity(repoB),
+      "repos with different remotes must have different identities",
+    );
+
+    rmSync(repoA, { recursive: true, force: true });
+    rmSync(repoB, { recursive: true, force: true });
+  });
+
+  test("no orphaned state dir created when remote repo is moved", () => {
+    const repoA = realpathSync(mkdtempSync(join(tmpdir(), "gsd-reloc-orphan-a-")));
+    initRepo(repoA, "https://github.com/example/no-orphan.git");
+
+    ensureGsdSymlink(repoA);
+
+    // Count project dirs before move
+    const projectsDir = externalProjectsRoot();
+    const countBefore = existsSync(projectsDir)
+      ? readdirSync(projectsDir).length
+      : 0;
+
+    // Move the repo
+    const repoB = join(
+      tmpdir(),
+      `gsd-reloc-orphan-b-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    renameSync(repoA, repoB);
+
+    ensureGsdSymlink(repoB);
+
+    const countAfter = readdirSync(projectsDir).length;
+    assert.strictEqual(
+      countAfter,
+      countBefore,
+      "moving a remote repo must not create a new orphaned state directory",
+    );
+
+    rmSync(repoB, { recursive: true, force: true });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts
index 2c52a1da5..5f6f938a5 100644
--- a/src/resources/extensions/gsd/tests/prompt-contracts.test.ts
+++ b/src/resources/extensions/gsd/tests/prompt-contracts.test.ts
@@ -202,6 +202,35 @@ test("reassess-roadmap prompt names gsd_reassess_roadmap as the tool to use", ()
   assert.match(prompt, /gsd_reassess_roadmap/);
 });
 
+// ─── Bug #2933: prompt parameter names must match camelCase TypeBox schema ───
+
+test("execute-task prompt uses camelCase parameter names matching TypeBox schema", () => {
+  const prompt = readPrompt("execute-task");
+  // The gsd_complete_task tool schema uses camelCase: milestoneId, sliceId, taskId
+  // Prompts must NOT tell the LLM to use snake_case (milestone_id, slice_id, task_id)
+  const toolCallLine = prompt.split("\n").find((l) => /gsd_complete_task/.test(l) || /gsd_task_complete/.test(l));
+  assert.ok(toolCallLine, "prompt must contain a gsd_complete_task or gsd_task_complete tool call line");
+  assert.doesNotMatch(toolCallLine!, /milestone_id/, "must use milestoneId, not milestone_id");
+  assert.doesNotMatch(toolCallLine!, /slice_id/, "must use sliceId, not slice_id");
+  assert.doesNotMatch(toolCallLine!, /task_id/, "must use taskId, not task_id");
+  // Positive: must mention the camelCase names
+  assert.match(toolCallLine!, /milestoneId/);
+  assert.match(toolCallLine!, /sliceId/);
+  assert.match(toolCallLine!, /taskId/);
+});
+
+test("complete-slice prompt uses camelCase parameter names matching TypeBox schema", () => {
+  const prompt = readPrompt("complete-slice");
+  // The gsd_complete_slice tool schema uses camelCase: milestoneId, sliceId
+  const toolCallLine = prompt.split("\n").find((l) => /gsd_complete_slice/.test(l) || /gsd_slice_complete/.test(l));
+  assert.ok(toolCallLine, "prompt must contain a gsd_complete_slice or gsd_slice_complete tool call line");
+  assert.doesNotMatch(toolCallLine!, /milestone_id/, "must use milestoneId, not milestone_id");
+  assert.doesNotMatch(toolCallLine!, /slice_id/, "must use sliceId, not slice_id");
+  // Positive: must mention the camelCase names
+  assert.match(toolCallLine!, /milestoneId/);
+  assert.match(toolCallLine!, /sliceId/);
+});
+
 test("reactive-execute prompt references tool calls instead of checkbox updates", () => {
   const prompt = readPrompt("reactive-execute");
   assert.doesNotMatch(prompt, /checkbox updates/);
diff --git a/src/resources/extensions/gsd/tests/prompt-loader-replacement.test.ts b/src/resources/extensions/gsd/tests/prompt-loader-replacement.test.ts
new file mode 100644
index 000000000..fcfd923ea
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/prompt-loader-replacement.test.ts
@@ -0,0 +1,178 @@
+/**
+ * Regression test for #2968: loadPrompt replaceAll expands $' in replacement strings.
+ *
+ * JavaScript's String.replaceAll interprets special replacement patterns ($', $`, $&)
+ * in the replacement string. When a template variable value contains $' (common in
+ * bash commands like `grep -q '^0$'`), the replacement injects the entire remainder
+ * of the template, causing exponential prompt expansion.
+ *
+ * The fix: use split/join instead of replaceAll, which has no special pattern
+ * interpretation.
+ */
+import test from "node:test";
+import assert from "node:assert/strict";
+
+/**
+ * Replicate the OLD (buggy) substitution logic from prompt-loader.ts.
+ * Uses replaceAll which interprets $' $` $& in the replacement string.
+ */
+function substituteBuggy(template: string, vars: Record<string, string>): string {
+  let content = template;
+  for (const [key, value] of Object.entries(vars)) {
+    content = content.replaceAll(`{{${key}}}`, value);
+  }
+  return content.trim();
+}
+
+/**
+ * Replicate the FIXED substitution logic from prompt-loader.ts.
+ * Uses split/join which treats the replacement as a literal string.
+ */
+function substituteFixed(template: string, vars: Record<string, string>): string {
+  let content = template;
+  for (const [key, value] of Object.entries(vars)) {
+    content = content.split(`{{${key}}}`).join(value);
+  }
+  return content.trim();
+}
+
+test("replaceAll $' expansion bug — demonstrates the problem", () => {
+  // This test shows the bug: replaceAll interprets $' as "insert portion after match"
+  const template = "Hello {{name}}, welcome to {{place}}!";
+  const valueWithDollarQuote = "grep -q '^0$'";
+
+  // Using replaceAll (buggy approach)
+  const buggyResult = template.replaceAll("{{name}}", valueWithDollarQuote);
+
+  // $' in the replacement string causes replaceAll to append the text after the match
+  // So it should NOT equal the expected result
+  const expected = "Hello grep -q '^0$', welcome to {{place}}!";
+
+  // The buggy result will contain extra text injected by $' expansion
+  assert.notEqual(buggyResult, expected,
+    "replaceAll should have expanded $' — if this fails, the JS engine changed behavior");
+  assert.ok(buggyResult.length > expected.length,
+    `Buggy result should be longer due to $' expansion. Got length ${buggyResult.length} vs expected ${expected.length}`);
+});
+
+test("split/join replacement — safe from $' expansion", () => {
+  const template = "Hello {{name}}, welcome to {{place}}!";
+  const valueWithDollarQuote = "grep -q '^0$'";
+
+  // Using split/join (safe approach)
+  const safeResult = template.split("{{name}}").join(valueWithDollarQuote);
+  const expected = "Hello grep -q '^0$', welcome to {{place}}!";
+
+  assert.equal(safeResult, expected,
+    "split/join should preserve $' literally without expansion");
+});
+
+test("fixed substitution preserves $' literally in replacement values", () => {
+  const template =
+    "Task: {{taskDescription}}\n\nVerification:\n```bash\n{{verificationCommand}}\n```\n\nEnd of prompt.";
+
+  const vars: Record<string, string> = {
+    taskDescription: "Run tests",
+    verificationCommand: "grep -c 'foo' file.txt | grep -q '^0$' && echo 'PASS' || echo 'FAIL'",
+  };
+
+  const buggyResult = substituteBuggy(template, vars);
+  const fixedResult = substituteFixed(template, vars);
+
+  // The $' in the verification command value should appear literally in fixed result
+  const expectedSnippet = "grep -q '^0$'";
+  assert.ok(fixedResult.includes(expectedSnippet),
+    `Fixed result should contain the literal string: ${expectedSnippet}`);
+
+  // The fixed result should NOT have blown up in size
+  const maxReasonableLength = 300;
+  assert.ok(fixedResult.length < maxReasonableLength,
+    `Fixed result length ${fixedResult.length} exceeds reasonable maximum ${maxReasonableLength} — prompt explosion detected!`);
+
+  // The buggy result DOES blow up — it's larger than the fixed result
+  assert.ok(buggyResult.length > fixedResult.length,
+    `Buggy result (${buggyResult.length}) should be larger than fixed (${fixedResult.length}) due to $' expansion`);
+});
+
+test("multiple $-pattern values do not cause cascading expansion", () => {
+  const template = "A: {{a}}\nB: {{b}}\nC: {{c}}\nEnd.";
+  const vars: Record<string, string> = {
+    a: "value with $' single quote pattern",
+    b: "value with $` backtick pattern",
+    c: "value with $& ampersand pattern",
+  };
+
+  const buggyResult = substituteBuggy(template, vars);
+  const fixedResult = substituteFixed(template, vars);
+
+  // The fixed version should preserve all values literally
+  assert.ok(fixedResult.includes("$'"), "Fixed result should contain literal $'");
+  assert.ok(fixedResult.includes("$`"), "Fixed result should contain literal $`");
+  assert.ok(fixedResult.includes("$&"), "Fixed result should contain literal $&");
+
+  // The fixed version should be a reasonable size
+  assert.ok(fixedResult.length < 200,
+    `Fixed result length ${fixedResult.length} should be under 200`);
+
+  // The buggy version will be larger due to expansion
+  assert.ok(buggyResult.length > fixedResult.length,
+    `Buggy result (${buggyResult.length}) should be larger than fixed (${fixedResult.length}) due to $-pattern expansion`);
+});
+
+test("realistic execute-task prompt does not explode with $' in slice plan", () => {
+  // Simulate a realistic execute-task template with multiple variables
+  const template = [
+    "# Execute Task",
+    "",
+    "## Context",
+    "Working directory: {{workingDirectory}}",
+    "Milestone: {{milestoneId}}",
+    "Slice: {{sliceId}} — {{sliceTitle}}",
+    "",
+    "## Slice Plan Excerpt",
+    "{{slicePlanExcerpt}}",
+    "",
+    "## Instructions",
+    "Complete the task described above.",
+    "{{skillActivation}}",
+    "",
+    "## Verification",
+    "Run the verification commands to confirm success.",
+  ].join("\n");
+
+  const slicePlanWithDollarPatterns = [
+    "### Step 1: Validate output",
+    "```bash",
+    "grep -c 'error' output.log | grep -q '^0$' && echo 'PASS' || echo 'FAIL'",
+    "```",
+    "",
+    "### Step 2: Check format",
+    "```bash",
+    "diff <(cat expected.txt) <(cat actual.txt) | grep -q '^$' && echo 'MATCH'",
+    "```",
+  ].join("\n");
+
+  const vars: Record<string, string> = {
+    workingDirectory: "/home/user/project",
+    milestoneId: "M001",
+    sliceId: "S01",
+    sliceTitle: "Build pipeline",
+    slicePlanExcerpt: slicePlanWithDollarPatterns,
+    skillActivation: "Load relevant skills.",
+  };
+
+  const fixedResult = substituteFixed(template, vars);
+
+  // Should contain the literal $' patterns
+  assert.ok(fixedResult.includes("'^0$'"), "Should preserve '^0$' literally");
+  assert.ok(fixedResult.includes("'^$'"), "Should preserve '^$' literally");
+
+  // Result should be reasonable size (template ~300 chars + values ~400 chars)
+  assert.ok(fixedResult.length < 1000,
+    `Result length ${fixedResult.length} exceeds 1000 — prompt explosion detected!`);
+
+  // Compare with buggy version to confirm it WOULD have exploded
+  const buggyResult = substituteBuggy(template, vars);
+  assert.ok(buggyResult.length > fixedResult.length * 1.5,
+    `Buggy result (${buggyResult.length}) should be significantly larger than fixed (${fixedResult.length})`);
+});
diff --git a/src/resources/extensions/gsd/tests/prompt-tool-names.test.ts b/src/resources/extensions/gsd/tests/prompt-tool-names.test.ts
new file mode 100644
index 000000000..5636c9a82
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/prompt-tool-names.test.ts
@@ -0,0 +1,69 @@
+// prompt-tool-names — Ensures prompt files reference correct tool names.
+//
+// The registered GSD tool is `search-the-web`, not `web_search`.
+// `web_search` is an Anthropic API implementation detail that should
+// never appear in GSD prompts or agent frontmatter.
+// See: https://github.com/gsd-build/gsd-2/issues/2920
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, readdirSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const promptsDir = join(__dirname, "..", "prompts");
+const agentsDir = join(__dirname, "..", "..", "..", "agents");
+
+/** Collect all .md files in a directory (non-recursive). */
+function mdFiles(dir: string): string[] {
+  return readdirSync(dir)
+    .filter((f) => f.endsWith(".md"))
+    .map((f) => join(dir, f));
+}
+
+const WRONG_TOOL = "web_search";
+const CORRECT_TOOL = "search-the-web";
+
+test("prompt files must not reference `web_search` — use `search-the-web` instead", () => {
+  const files = mdFiles(promptsDir);
+  assert.ok(files.length > 0, "Expected at least one prompt file");
+
+  const violations: string[] = [];
+  for (const file of files) {
+    const content = readFileSync(file, "utf-8");
+    if (content.includes(WRONG_TOOL)) {
+      violations.push(file);
+    }
+  }
+
+  assert.deepStrictEqual(
+    violations,
+    [],
+    `These prompt files reference "${WRONG_TOOL}" instead of "${CORRECT_TOOL}":\n${violations.join("\n")}`,
+  );
+});
+
+test("agent frontmatter must not reference `web_search` — use `search-the-web` instead", () => {
+  const files = mdFiles(agentsDir);
+  assert.ok(files.length > 0, "Expected at least one agent file");
+
+  const violations: string[] = [];
+  for (const file of files) {
+    const content = readFileSync(file, "utf-8");
+    // Check frontmatter tools line specifically
+    const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
+    if (frontmatterMatch) {
+      const frontmatter = frontmatterMatch[1];
+      if (frontmatter.includes(WRONG_TOOL)) {
+        violations.push(file);
+      }
+    }
+  }
+
+  assert.deepStrictEqual(
+    violations,
+    [],
+    `These agent files reference "${WRONG_TOOL}" in frontmatter instead of "${CORRECT_TOOL}":\n${violations.join("\n")}`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/provider-errors.test.ts b/src/resources/extensions/gsd/tests/provider-errors.test.ts
index 832cea206..e4ec992d4 100644
--- a/src/resources/extensions/gsd/tests/provider-errors.test.ts
+++ b/src/resources/extensions/gsd/tests/provider-errors.test.ts
@@ -118,6 +118,44 @@ test("classifyError: rate limit takes precedence over auth keywords", () => {
   assert.ok(isTransient(result));
 });
 
+// ── STREAM_RE: V8 JSON parse error variants (#2916) ────────────────────────
+
+test("classifyError: 'Expected comma/brace after property value in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Expected ',' or '}' after property value in JSON at position 2056 (line 1 column 2057)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
+test("classifyError: 'Expected colon after property name in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Expected ':' after property name in JSON at position 500 (line 1 column 501)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
+test("classifyError: 'Expected property name or brace in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Expected property name or '}' in JSON at position 42 (line 1 column 43)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
+test("classifyError: 'Unterminated string in JSON' is transient stream", () => {
+  const result = classifyError(
+    "Unterminated string in JSON at position 100 (line 1 column 101)"
+  );
+  assert.equal(result.kind, "stream");
+  assert.ok(isTransient(result));
+  assert.ok("retryAfterMs" in result && result.retryAfterMs === 15_000);
+});
+
 // ── isTransientNetworkError ──────────────────────────────────────────────────
 
 test("isTransientNetworkError detects ECONNRESET", () => {
diff --git a/src/resources/extensions/gsd/tests/queue-execution-guard.test.ts b/src/resources/extensions/gsd/tests/queue-execution-guard.test.ts
new file mode 100644
index 000000000..0b200b12a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/queue-execution-guard.test.ts
@@ -0,0 +1,157 @@
+/**
+ * Unit tests for the queue-mode execution guard (#2545).
+ *
+ * When queue phase is active, the agent should only create milestones —
+ * not execute work. This guard blocks write/edit/bash tool calls that
+ * target source code (non-.gsd/ paths) during queue mode.
+ *
+ * Exercises shouldBlockQueueExecution() — a pure function that checks:
+ *   (a) queuePhaseActive false → pass (not in queue mode)
+ *   (b) toolName is read-only (read, grep, find, ls) → pass
+ *   (c) toolName is ask_user_questions → pass (discussion tool)
+ *   (d) write/edit to .gsd/ path → pass (planning artifacts)
+ *   (e) write/edit to source path → block
+ *   (f) bash command → block (could execute work)
+ *   (g) registered GSD tools (gsd_milestone_generate_id, gsd_summary_save) → pass
+ */
+
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { shouldBlockQueueExecution } from '../bootstrap/write-gate.ts';
+
+// ─── Scenario 1: Not in queue mode — all tools pass ──
+
+test('queue-guard: allows all tools when queue phase is not active', () => {
+  const r1 = shouldBlockQueueExecution('write', '/src/index.ts', false);
+  assert.strictEqual(r1.block, false, 'write should pass outside queue mode');
+
+  const r2 = shouldBlockQueueExecution('bash', 'npm test', false);
+  assert.strictEqual(r2.block, false, 'bash should pass outside queue mode');
+
+  const r3 = shouldBlockQueueExecution('edit', '/src/index.ts', false);
+  assert.strictEqual(r3.block, false, 'edit should pass outside queue mode');
+});
+
+// ─── Scenario 2: Read-only tools always pass in queue mode ──
+
+test('queue-guard: allows read-only tools during queue mode', () => {
+  for (const tool of ['read', 'grep', 'find', 'ls', 'glob']) {
+    const result = shouldBlockQueueExecution(tool, '/src/index.ts', true);
+    assert.strictEqual(result.block, false, `${tool} should pass in queue mode`);
+  }
+});
+
+// ─── Scenario 3: Discussion/planning tools pass in queue mode ──
+
+test('queue-guard: allows discussion and planning tools during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('ask_user_questions', '', true);
+  assert.strictEqual(r1.block, false, 'ask_user_questions should pass');
+
+  const r2 = shouldBlockQueueExecution('gsd_milestone_generate_id', '', true);
+  assert.strictEqual(r2.block, false, 'gsd_milestone_generate_id should pass');
+
+  const r3 = shouldBlockQueueExecution('gsd_summary_save', '', true);
+  assert.strictEqual(r3.block, false, 'gsd_summary_save should pass');
+});
+
+// ─── Scenario 4: Write to .gsd/ paths passes (planning artifacts) ──
+
+test('queue-guard: allows writes to .gsd/ paths during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('write', '.gsd/milestones/M001/M001-CONTEXT.md', true);
+  assert.strictEqual(r1.block, false, 'write to .gsd/ should pass');
+
+  const r2 = shouldBlockQueueExecution('write', '/project/.gsd/PROJECT.md', true);
+  assert.strictEqual(r2.block, false, 'write to .gsd/PROJECT.md should pass');
+
+  const r3 = shouldBlockQueueExecution('edit', '.gsd/QUEUE.md', true);
+  assert.strictEqual(r3.block, false, 'edit to .gsd/QUEUE.md should pass');
+
+  const r4 = shouldBlockQueueExecution('write', '.gsd/REQUIREMENTS.md', true);
+  assert.strictEqual(r4.block, false, 'write to .gsd/REQUIREMENTS.md should pass');
+
+  const r5 = shouldBlockQueueExecution('write', '.gsd/DECISIONS.md', true);
+  assert.strictEqual(r5.block, false, 'write to .gsd/DECISIONS.md should pass');
+});
+
+// ─── Scenario 5: Write/edit to source code paths blocked ──
+
+test('queue-guard: blocks writes to source code during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('write', 'src/index.ts', true);
+  assert.strictEqual(r1.block, true, 'write to src/ should be blocked');
+  assert.ok(r1.reason, 'should provide a reason');
+  assert.ok(r1.reason!.includes('queue'), 'reason should mention queue');
+
+  const r2 = shouldBlockQueueExecution('write', '/project/src/components/App.tsx', true);
+  assert.strictEqual(r2.block, true, 'write to component file should be blocked');
+
+  const r3 = shouldBlockQueueExecution('edit', 'package.json', true);
+  assert.strictEqual(r3.block, true, 'edit to package.json should be blocked');
+
+  const r4 = shouldBlockQueueExecution('edit', '/project/lib/utils.ts', true);
+  assert.strictEqual(r4.block, true, 'edit to lib/ should be blocked');
+});
+
+// ─── Scenario 6: Bash commands blocked during queue mode ──
+
+test('queue-guard: blocks bash commands during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('bash', 'npm install some-package', true);
+  assert.strictEqual(r1.block, true, 'npm install should be blocked');
+  assert.ok(r1.reason, 'should provide a reason');
+
+  const r2 = shouldBlockQueueExecution('bash', 'node src/index.ts', true);
+  assert.strictEqual(r2.block, true, 'running node should be blocked');
+});
+
+// ─── Scenario 7: Bash read-only commands pass during queue mode ──
+
+test('queue-guard: allows read-only bash commands during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('bash', 'cat src/index.ts', true);
+  assert.strictEqual(r1.block, false, 'cat should pass');
+
+  const r2 = shouldBlockQueueExecution('bash', 'ls -la src/', true);
+  assert.strictEqual(r2.block, false, 'ls should pass');
+
+  const r3 = shouldBlockQueueExecution('bash', 'git log --oneline -10', true);
+  assert.strictEqual(r3.block, false, 'git log should pass');
+
+  const r4 = shouldBlockQueueExecution('bash', 'find . -name "*.ts"', true);
+  assert.strictEqual(r4.block, false, 'find should pass');
+
+  const r5 = shouldBlockQueueExecution('bash', 'grep -rn "TODO" src/', true);
+  assert.strictEqual(r5.block, false, 'grep should pass');
+
+  const r6 = shouldBlockQueueExecution('bash', 'head -20 src/index.ts', true);
+  assert.strictEqual(r6.block, false, 'head should pass');
+
+  const r7 = shouldBlockQueueExecution('bash', 'wc -l src/index.ts', true);
+  assert.strictEqual(r7.block, false, 'wc should pass');
+
+  const r8 = shouldBlockQueueExecution('bash', 'git diff HEAD~1', true);
+  assert.strictEqual(r8.block, false, 'git diff should pass');
+
+  const r9 = shouldBlockQueueExecution('bash', 'gh issue view 42', true);
+  assert.strictEqual(r9.block, false, 'gh issue view should pass');
+});
+
+// ─── Scenario 8: mkdir for .gsd/ milestone directories passes ──
+
+test('queue-guard: allows mkdir for .gsd/ milestone directories', () => {
+  const r1 = shouldBlockQueueExecution('bash', 'mkdir -p .gsd/milestones/M010/slices', true);
+  assert.strictEqual(r1.block, false, 'mkdir -p .gsd/ should pass');
+});
+
+// ─── Scenario 9: Web search and library tools pass ──
+
+test('queue-guard: allows web search and library tools during queue mode', () => {
+  const r1 = shouldBlockQueueExecution('search-the-web', '', true);
+  assert.strictEqual(r1.block, false, 'search-the-web should pass');
+
+  const r2 = shouldBlockQueueExecution('resolve_library', '', true);
+  assert.strictEqual(r2.block, false, 'resolve_library should pass');
+
+  const r3 = shouldBlockQueueExecution('get_library_docs', '', true);
+  assert.strictEqual(r3.block, false, 'get_library_docs should pass');
+
+  const r4 = shouldBlockQueueExecution('fetch_page', '', true);
+  assert.strictEqual(r4.block, false, 'fetch_page should pass');
+});
diff --git a/src/resources/extensions/gsd/tests/quick-turn-end-cleanup.test.ts b/src/resources/extensions/gsd/tests/quick-turn-end-cleanup.test.ts
new file mode 100644
index 000000000..5051a8567
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/quick-turn-end-cleanup.test.ts
@@ -0,0 +1,90 @@
+/**
+ * Tests that cleanupQuickBranch is called on turn_end to squash-merge the
+ * quick branch back to the original branch after the agent completes.
+ *
+ * Relates to #2668: /gsd quick does not squash-merge branch back after agent
+ * completes task. cleanupQuickBranch() exists but is never invoked.
+ *
+ * The fix registers a turn_end hook in register-hooks.ts that calls
+ * cleanupQuickBranch() after each turn, which is a no-op when no quick-task
+ * state is pending.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+// ─── Structural test: verify turn_end hook exists in register-hooks.ts ──────
+
+describe("quick task turn_end cleanup (#2668)", () => {
+  const hooksSource = readFileSync(
+    join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
+    "utf-8",
+  );
+
+  it("register-hooks.ts imports cleanupQuickBranch from quick.ts", () => {
+    assert.ok(
+      hooksSource.includes("cleanupQuickBranch"),
+      "register-hooks.ts must reference cleanupQuickBranch",
+    );
+
+    // Verify it's imported (not just mentioned in a comment)
+    const importMatch = hooksSource.match(
+      /import\s*\{[^}]*cleanupQuickBranch[^}]*\}\s*from\s*["'][^"']*quick/,
+    );
+    assert.ok(
+      importMatch,
+      "cleanupQuickBranch must be imported from quick module",
+    );
+  });
+
+  it("registers a turn_end handler that calls cleanupQuickBranch", () => {
+    // Find the turn_end registration
+    const turnEndMatch = hooksSource.match(
+      /pi\.on\(\s*["']turn_end["']/,
+    );
+    assert.ok(
+      turnEndMatch,
+      "register-hooks.ts must register a turn_end handler",
+    );
+
+    // Extract the turn_end handler body — find everything from the pi.on("turn_end"
+    // to the matching closing });
+    const turnEndIdx = hooksSource.indexOf(turnEndMatch[0]);
+    assert.ok(turnEndIdx !== -1);
+
+    // Get the rest of the source from that point
+    const rest = hooksSource.slice(turnEndIdx);
+
+    // The handler must call cleanupQuickBranch
+    // Look for cleanupQuickBranch within the first handler body (up to first `});`)
+    const handlerEnd = rest.indexOf("});");
+    assert.ok(handlerEnd !== -1, "turn_end handler has a closing });");
+
+    const handlerBody = rest.slice(0, handlerEnd);
+    assert.ok(
+      handlerBody.includes("cleanupQuickBranch"),
+      "turn_end handler must call cleanupQuickBranch",
+    );
+  });
+
+  it("turn_end handler calls cleanupQuickBranch without arguments (uses cwd default)", () => {
+    // cleanupQuickBranch(basePath = process.cwd()) — calling without args is correct
+    // because the handler runs in the same process where handleQuick set up cwd
+    const turnEndIdx = hooksSource.indexOf('pi.on("turn_end"') !== -1
+      ? hooksSource.indexOf('pi.on("turn_end"')
+      : hooksSource.indexOf("pi.on('turn_end'");
+    assert.ok(turnEndIdx !== -1);
+
+    const rest = hooksSource.slice(turnEndIdx);
+    const handlerEnd = rest.indexOf("});");
+    const handlerBody = rest.slice(0, handlerEnd);
+
+    // Should call cleanupQuickBranch() — either bare or with no-arg form
+    assert.ok(
+      handlerBody.includes("cleanupQuickBranch("),
+      "turn_end handler invokes cleanupQuickBranch()",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/reassess-handler.test.ts b/src/resources/extensions/gsd/tests/reassess-handler.test.ts
index 38908433f..2f8e2aa36 100644
--- a/src/resources/extensions/gsd/tests/reassess-handler.test.ts
+++ b/src/resources/extensions/gsd/tests/reassess-handler.test.ts
@@ -9,6 +9,7 @@ import {
   closeDatabase,
   insertMilestone,
   insertSlice,
+  insertAssessment,
   getSlice,
   getMilestoneSlices,
   getAssessment,
@@ -323,3 +324,119 @@ test('handleReassessRoadmap returns structured error payloads with actionable me
     cleanup(base);
   }
 });
+
+// ─── Bug #2957: Stale VALIDATION survives roadmap remediation ────────────
+
+test('handleReassessRoadmap invalidates stale milestone-validation when roadmap changes (#2957)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    // Seed: M001 with S01-S04 all complete, plus a stale VALIDATION with needs-remediation
+    insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice One', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Slice Two', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S03', milestoneId: 'M001', title: 'Slice Three', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S04', milestoneId: 'M001', title: 'Slice Four', status: 'complete', demo: 'Demo' });
+
+    // Insert milestone-validation assessment with needs-remediation verdict (stale)
+    const validationPath = join('.gsd', 'milestones', 'M001', 'M001-VALIDATION.md');
+    insertAssessment({
+      path: validationPath,
+      milestoneId: 'M001',
+      sliceId: null,
+      taskId: null,
+      status: 'needs-remediation',
+      scope: 'milestone-validation',
+      fullContent: '---\nverdict: needs-remediation\nremediation_round: 0\n---\n\n# Validation\nNeeds remediation.',
+    });
+
+    // Verify the validation row exists before reassess
+    const adapter = _getAdapter()!;
+    const before = adapter.prepare(
+      `SELECT * FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get() as Record<string, unknown> | undefined;
+    assert.ok(before, 'milestone-validation row should exist before reassess');
+
+    // Now reassess the roadmap: add remediation slice S05
+    // This simulates the scenario from #2957 where validation produced needs-remediation
+    // and then roadmap was reassessed to add a remediation slice
+    const result = await handleReassessRoadmap({
+      milestoneId: 'M001',
+      completedSliceId: 'S04',
+      verdict: 'on-track',
+      assessment: 'S04 completed. Adding remediation slice S05.',
+      sliceChanges: {
+        modified: [],
+        added: [
+          {
+            sliceId: 'S05',
+            title: 'Remediation Slice',
+            risk: 'low',
+            depends: ['S04'],
+            demo: 'Fix the issues found during validation.',
+          },
+        ],
+        removed: [],
+      },
+    }, base);
+
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // The stale milestone-validation row must be deleted after roadmap changes
+    const after = adapter.prepare(
+      `SELECT * FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get() as Record<string, unknown> | undefined;
+    assert.equal(after, undefined, 'milestone-validation row should be deleted after roadmap changes — stale validation must not survive remediation (#2957)');
+  } finally {
+    cleanup(base);
+  }
+});
+
+test('handleReassessRoadmap does NOT invalidate validation when no roadmap structural changes (#2957)', async () => {
+  const base = makeTmpBase();
+  openDatabase(join(base, '.gsd', 'gsd.db'));
+
+  try {
+    // Seed: M001 with slices, plus a validation with pass verdict
+    insertMilestone({ id: 'M001', title: 'Test Milestone', status: 'active' });
+    insertSlice({ id: 'S01', milestoneId: 'M001', title: 'Slice One', status: 'complete', demo: 'Demo' });
+    insertSlice({ id: 'S02', milestoneId: 'M001', title: 'Slice Two', status: 'pending', demo: 'Demo' });
+
+    // Insert milestone-validation assessment with pass verdict
+    const validationPath = join('.gsd', 'milestones', 'M001', 'M001-VALIDATION.md');
+    insertAssessment({
+      path: validationPath,
+      milestoneId: 'M001',
+      sliceId: null,
+      taskId: null,
+      status: 'pass',
+      scope: 'milestone-validation',
+      fullContent: '---\nverdict: pass\nremediation_round: 0\n---\n\n# Validation\nAll good.',
+    });
+
+    // Reassess with no structural changes (empty added/modified/removed)
+    const result = await handleReassessRoadmap({
+      milestoneId: 'M001',
+      completedSliceId: 'S01',
+      verdict: 'confirmed',
+      assessment: 'S01 completed. No changes needed.',
+      sliceChanges: {
+        modified: [],
+        added: [],
+        removed: [],
+      },
+    }, base);
+
+    assert.ok(!('error' in result), `unexpected error: ${'error' in result ? result.error : ''}`);
+
+    // Validation should still exist when no structural changes occurred
+    const adapter = _getAdapter()!;
+    const row = adapter.prepare(
+      `SELECT * FROM assessments WHERE milestone_id = 'M001' AND scope = 'milestone-validation'`,
+    ).get() as Record<string, unknown> | undefined;
+    assert.ok(row, 'milestone-validation row should survive when no structural changes occurred');
+  } finally {
+    cleanup(base);
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts b/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts
new file mode 100644
index 000000000..0413859b6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts
@@ -0,0 +1,162 @@
+// GSD State Machine Regression Tests — Event Replay & Reconciliation (#3161)
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { createHash } from "node:crypto";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import * as os from "node:os";
+import { findForkPoint, readEvents, appendEvent } from "../workflow-events.ts";
+import type { WorkflowEvent } from "../workflow-events.ts";
+import { extractEntityKey, detectConflicts } from "../workflow-reconcile.ts";
+
+// ─── Helper: build a full WorkflowEvent from cmd + params ────────────────────
+
+function makeEvent(cmd: string, params: Record<string, unknown>, ts?: string): WorkflowEvent {
+  const hash = createHash("sha256")
+    .update(JSON.stringify({ cmd, params }))
+    .digest("hex")
+    .slice(0, 16);
+  return { cmd, params, ts: ts ?? new Date().toISOString(), hash, actor: "agent", session_id: "test-session" };
+}
+
+// ─── Temp dir management ─────────────────────────────────────────────────────
+
+const tempDirs: string[] = [];
+
+function tempDir(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-recon-test-"));
+  tempDirs.push(dir);
+  return dir;
+}
+
+afterEach(() => {
+  for (const dir of tempDirs.splice(0)) {
+    try { fs.rmSync(dir, { recursive: true, force: true }); } catch { /* best effort */ }
+  }
+});
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("reconciliation-edge-cases", () => {
+
+  // findForkPoint
+  test("findForkPoint returns -1 for completely diverged logs", () => {
+    const eA = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const eB = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T02" });
+
+    const logA: WorkflowEvent[] = [eA];
+    const logB: WorkflowEvent[] = [eB];
+
+    assert.equal(findForkPoint(logA, logB), -1, "completely diverged logs should return -1");
+  });
+
+  test("findForkPoint returns last index when one log is prefix of another", () => {
+    const e1 = makeEvent("start_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const e2 = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const e3 = makeEvent("complete_slice", { milestoneId: "M001", sliceId: "S01" });
+
+    const logA: WorkflowEvent[] = [e1, e2];
+    const logB: WorkflowEvent[] = [e1, e2, e3];
+
+    assert.equal(findForkPoint(logA, logB), 1, "prefix log should fork at last shared index");
+  });
+
+  test("findForkPoint returns -1 for empty logs", () => {
+    assert.equal(findForkPoint([], []), -1, "two empty logs should return -1");
+  });
+
+  // extractEntityKey
+  test("extractEntityKey returns null for malformed events (missing taskId)", () => {
+    const event = makeEvent("complete_task", {});
+    // params has no taskId — should return null rather than return a bad key
+    assert.equal(extractEntityKey(event), null, "missing taskId should yield null entity key");
+  });
+
+  test("extractEntityKey returns null for unknown commands", () => {
+    const event = makeEvent("future_cmd", { foo: "bar" });
+    assert.equal(extractEntityKey(event), null, "unknown command should yield null entity key");
+  });
+
+  test("plan_slice and complete_slice use different entity types", () => {
+    const planEvent = makeEvent("plan_slice", { sliceId: "S01" });
+    const completeEvent = makeEvent("complete_slice", { sliceId: "S01" });
+
+    const planKey = extractEntityKey(planEvent);
+    const completeKey = extractEntityKey(completeEvent);
+
+    assert.ok(planKey !== null, "plan_slice should produce an entity key");
+    assert.ok(completeKey !== null, "complete_slice should produce an entity key");
+    assert.equal(planKey!.type, "slice_plan", "plan_slice entity type should be 'slice_plan'");
+    assert.equal(completeKey!.type, "slice", "complete_slice entity type should be 'slice'");
+    assert.notEqual(
+      planKey!.type,
+      completeKey!.type,
+      "plan_slice and complete_slice must map to different entity types",
+    );
+  });
+
+  // detectConflicts
+  test("detectConflicts finds no conflicts when entities do not overlap", () => {
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T02" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 0, "non-overlapping task edits should produce no conflicts");
+  });
+
+  test("detectConflicts flags conflict when both sides touch the same task", () => {
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("start_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 1, "same task touched by both sides should produce exactly one conflict");
+
+    const conflict = conflicts[0]!;
+    assert.equal(conflict.entityType, "task", "conflict entityType should be 'task'");
+    assert.equal(conflict.entityId, "T01", "conflict entityId should be 'T01'");
+  });
+
+  test("detectConflicts ignores events with null entity keys", () => {
+    // Events with unknown commands produce null keys and must not cause false conflicts.
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("unknown_future_cmd", { milestoneId: "M001" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("another_unknown_cmd", { milestoneId: "M001" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 0, "unknown commands with null entity keys should not produce conflicts");
+  });
+
+  // appendEvent — filesystem creation
+  test("appendEvent creates event log if directory does not exist", () => {
+    const base = tempDir();
+    // Remove the .gsd directory if it somehow exists — appendEvent should create it.
+    const gsdDir = path.join(base, ".gsd");
+    if (fs.existsSync(gsdDir)) fs.rmSync(gsdDir, { recursive: true, force: true });
+
+    appendEvent(base, {
+      cmd: "complete_task",
+      params: { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+      ts: new Date().toISOString(),
+      actor: "agent",
+    });
+
+    const logPath = path.join(base, ".gsd", "event-log.jsonl");
+    assert.ok(fs.existsSync(logPath), "event-log.jsonl should be created by appendEvent");
+
+    const events = readEvents(logPath);
+    assert.equal(events.length, 1, "event log should contain exactly one event");
+    assert.equal(events[0]!.cmd, "complete_task", "persisted event should have the correct cmd");
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/roadmap-slices.test.ts b/src/resources/extensions/gsd/tests/roadmap-slices.test.ts
index 63f607683..56364a653 100644
--- a/src/resources/extensions/gsd/tests/roadmap-slices.test.ts
+++ b/src/resources/extensions/gsd/tests/roadmap-slices.test.ts
@@ -296,3 +296,100 @@ Do the second thing.
   assert.equal(slices[0]?.id, "S01");
   assert.equal(slices[1]?.id, "S02");
 });
+
+// ── Regression tests for #2567 ─────────────────────────────────────────────
+// Prose H3 parser fails on common LLM-generated patterns: numbered prefixes,
+// parenthetical numbering, bracketed IDs, and indented headings.
+
+test("parseRoadmapSlices: numbered H3 headers under ## Slices (#2567)", () => {
+  const numberedContent = `# M002: My Milestone
+
+**Vision:** Ship the product.
+
+## Slices
+
+### 1. S01: Setup Environment
+Set up the dev environment and tooling.
+
+### 2. S02: Build Core
+Implement the core logic.
+**Depends on:** S01
+
+### 3. S03: Polish UI
+Final polish and theming.
+**Depends on:** S01, S02
+`;
+  const slices = parseRoadmapSlices(numberedContent);
+  assert.equal(slices.length, 3, "should parse 3 slices from numbered H3 headers");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup Environment");
+  assert.equal(slices[1]?.id, "S02");
+  assert.deepEqual(slices[1]?.depends, ["S01"]);
+  assert.equal(slices[2]?.id, "S03");
+  assert.deepEqual(slices[2]?.depends, ["S01", "S02"]);
+});
+
+test("parseRoadmapSlices: parenthetical-numbered H3 headers (#2567)", () => {
+  const parenContent = `# M002: Milestone
+
+**Vision:** Ship.
+
+## Slices
+
+### (1) S01: Setup
+Setup work.
+
+### (2) S02: Build
+Build work.
+**Depends on:** S01
+`;
+  const slices = parseRoadmapSlices(parenContent);
+  assert.equal(slices.length, 2, "should parse slices with parenthetical numbering");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup");
+  assert.equal(slices[1]?.id, "S02");
+  assert.deepEqual(slices[1]?.depends, ["S01"]);
+});
+
+test("parseRoadmapSlices: bracketed slice IDs in H3 headers (#2567)", () => {
+  const bracketContent = `# M002: Milestone
+
+**Vision:** Ship.
+
+## Slices
+
+### [S01] Setup Environment
+Setup work.
+
+### [S02] Build Core
+Build work.
+**Depends on:** S01
+`;
+  const slices = parseRoadmapSlices(bracketContent);
+  assert.equal(slices.length, 2, "should parse slices with bracketed IDs");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup Environment");
+  assert.equal(slices[1]?.id, "S02");
+  assert.deepEqual(slices[1]?.depends, ["S01"]);
+});
+
+test("parseRoadmapSlices: indented H3 headers under ## Slices (#2567)", () => {
+  const indentedContent = `# M002: Milestone
+
+**Vision:** Ship.
+
+## Slices
+
+  ### S01: Setup
+  Setup work.
+
+  ### S02: Build
+  Build work.
+`;
+  const slices = parseRoadmapSlices(indentedContent);
+  assert.equal(slices.length, 2, "should parse slices from indented H3 headers");
+  assert.equal(slices[0]?.id, "S01");
+  assert.equal(slices[0]?.title, "Setup");
+  assert.equal(slices[1]?.id, "S02");
+  assert.equal(slices[1]?.title, "Build");
+});
diff --git a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts
index bd6096674..18acf7dd4 100644
--- a/src/resources/extensions/gsd/tests/secure-env-collect.test.ts
+++ b/src/resources/extensions/gsd/tests/secure-env-collect.test.ts
@@ -183,3 +183,137 @@ test("secure_env_collect: detectDestination — convex file (not dir) does not t
 		rmSync(tmp, { recursive: true, force: true });
 	}
 });
+
+// ─── Bug #2997: undefined vs null handling ──────────────────────────────────
+
+/**
+ * When ctx.ui.custom() returns undefined (e.g. noOpUIContext, component
+ * disposal, abort), the strict null checks (=== null / !== null) let
+ * undefined slip through as a "provided" value, crashing writeEnvKey
+ * which calls .replace() on it.
+ *
+ * These tests verify the fix: loose equality (== null / != null) so that
+ * both null AND undefined are treated as "skipped".
+ */
+
+// Helper to dynamically load the orchestrator
+async function loadOrchestrator(): Promise<{
+	collectSecretsFromManifest: Function;
+}> {
+	const mod = await import("../../get-secrets-from-user.ts");
+	return { collectSecretsFromManifest: mod.collectSecretsFromManifest };
+}
+
+// Helper to dynamically load files.ts functions
+async function loadFilesExports(): Promise<{
+	formatSecretsManifest: (m: any) => string;
+}> {
+	const mod = await import("../files.ts");
+	return { formatSecretsManifest: mod.formatSecretsManifest };
+}
+
+function makeManifest(entries: Array<{ key: string; status?: string; formatHint?: string; guidance?: string[] }>): any {
+	return {
+		milestone: "M001",
+		generatedAt: "2026-03-12T00:00:00Z",
+		entries: entries.map((e) => ({
+			key: e.key,
+			service: "TestService",
+			dashboardUrl: "",
+			guidance: e.guidance ?? [],
+			formatHint: e.formatHint ?? "",
+			status: e.status ?? "pending",
+			destination: "dotenv",
+		})),
+	};
+}
+
+async function writeManifestFile(dir: string, manifest: any): Promise<string> {
+	const { formatSecretsManifest } = await loadFilesExports();
+	const milestoneDir = join(dir, ".gsd", "milestones", "M001");
+	mkdirSync(milestoneDir, { recursive: true });
+	const filePath = join(milestoneDir, "M001-SECRETS.md");
+	writeFileSync(filePath, formatSecretsManifest(manifest));
+	return filePath;
+}
+
+test("secure_env_collect #2997: undefined from ctx.ui.custom() is treated as skipped, not provided", async (t) => {
+	const { collectSecretsFromManifest } = await loadOrchestrator();
+
+	const tmp = makeTempDir("sec-undefined-test");
+	t.after(() => {
+		rmSync(tmp, { recursive: true, force: true });
+	});
+
+	const manifest = makeManifest([
+		{ key: "SECRET_THAT_RETURNS_UNDEFINED", status: "pending" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			// First call is summary screen, second is collect — return undefined
+			// to simulate noOpUIContext or component disposal
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary screen dismiss
+				return undefined; // BUG TRIGGER: should be treated as skipped
+			},
+		},
+	};
+
+	// Before the fix, this crashes with:
+	// "Cannot read properties of undefined (reading 'replace')"
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	// The undefined-returning key must appear in skipped, not in applied
+	assert.ok(
+		result.skipped.includes("SECRET_THAT_RETURNS_UNDEFINED"),
+		"Key returning undefined should be in skipped list",
+	);
+	assert.ok(
+		!result.applied.includes("SECRET_THAT_RETURNS_UNDEFINED"),
+		"Key returning undefined must NOT be in applied list",
+	);
+});
+
+test("secure_env_collect #2997: null from ctx.ui.custom() is still treated as skipped (regression guard)", async (t) => {
+	const { collectSecretsFromManifest } = await loadOrchestrator();
+
+	const tmp = makeTempDir("sec-null-test");
+	t.after(() => {
+		rmSync(tmp, { recursive: true, force: true });
+	});
+
+	const manifest = makeManifest([
+		{ key: "SECRET_THAT_RETURNS_NULL", status: "pending" },
+	]);
+	await writeManifestFile(tmp, manifest);
+
+	let callIndex = 0;
+	const mockCtx = {
+		cwd: tmp,
+		hasUI: true,
+		ui: {
+			custom: async (_factory: any) => {
+				callIndex++;
+				if (callIndex <= 1) return null; // summary screen dismiss
+				return null; // explicit null skip
+			},
+		},
+	};
+
+	const result = await collectSecretsFromManifest(tmp, "M001", mockCtx as any);
+
+	assert.ok(
+		result.skipped.includes("SECRET_THAT_RETURNS_NULL"),
+		"Key returning null should be in skipped list",
+	);
+	assert.ok(
+		!result.applied.includes("SECRET_THAT_RETURNS_NULL"),
+		"Key returning null must NOT be in applied list",
+	);
+});
diff --git a/src/resources/extensions/gsd/tests/slice-disk-reconcile.test.ts b/src/resources/extensions/gsd/tests/slice-disk-reconcile.test.ts
new file mode 100644
index 000000000..a40822d29
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/slice-disk-reconcile.test.ts
@@ -0,0 +1,233 @@
+/**
+ * slice-disk-reconcile.test.ts — #2533
+ *
+ * Slices that exist on disk (in ROADMAP.md) but are missing from the SQLite
+ * database cause permanent "No slice eligible — check dependency ordering"
+ * blocks. deriveStateFromDb must reconcile disk slices into the DB, just as
+ * it already does for milestones (#2416).
+ *
+ * Scenario: M001 has a ROADMAP with S01-S04. S01 and S02 have SUMMARY files
+ * (complete on disk). S03 depends on S01. Only S04 is in the DB (depends on
+ * S03). Without slice reconciliation, S01-S03 are invisible and S04 is
+ * permanently blocked.
+ */
+
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveStateFromDb, invalidateStateCache } from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  getMilestoneSlices,
+} from "../gsd-db.ts";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-slice-reconcile-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function writeFile(base: string, relativePath: string, content: string): void {
+  const full = join(base, ".gsd", relativePath);
+  mkdirSync(join(full, ".."), { recursive: true });
+  writeFileSync(full, content);
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+const CONTEXT_CONTENT = `# M001: Test Milestone
+
+This milestone tests slice reconciliation.
+
+## Must-Haves
+- Something important
+`;
+
+// Roadmap with 4 slices: S01 (no deps), S02 (no deps), S03 (depends S01), S04 (depends S03)
+const ROADMAP_CONTENT = `# M001: Test Milestone
+
+**Vision:** Test slice disk→DB reconciliation.
+
+## Slices
+
+- [x] **S01: Foundation** \`risk:low\` \`depends:[]\`
+  > Set up project structure.
+- [x] **S02: Core Utils** \`risk:low\` \`depends:[]\`
+  > Build utility functions.
+- [ ] **S03: Integration** \`risk:medium\` \`depends:[S01]\`
+  > Integrate components.
+- [ ] **S04: Final Assembly** \`risk:high\` \`depends:[S03]\`
+  > Assemble everything.
+`;
+
+async function testMissingSlicesCauseBlock(): Promise<void> {
+  console.log("\n--- Test: missing DB slices cause permanent block (pre-fix) ---");
+
+  const base = createFixtureBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  try {
+    openDatabase(dbPath);
+
+    // M001 in DB
+    insertMilestone({ id: "M001", title: "M001: Test Milestone", status: "active", depends_on: [] });
+
+    // Only S04 is in the DB — S01-S03 are missing
+    insertSlice({ id: "S04", milestoneId: "M001", title: "S04: Final Assembly", status: "pending", risk: "high", depends: ["S03"] });
+
+    // Write disk files — S01 and S02 have SUMMARY (complete on disk)
+    writeFile(base, "milestones/M001/CONTEXT.md", CONTEXT_CONTENT);
+    writeFile(base, "milestones/M001/ROADMAP.md", ROADMAP_CONTENT);
+    writeFile(base, "milestones/M001/S01/PLAN.md", "# S01 Plan\n");
+    writeFile(base, "milestones/M001/S01/SUMMARY.md", "# S01 Summary\nDone.");
+    writeFile(base, "milestones/M001/S02/PLAN.md", "# S02 Plan\n");
+    writeFile(base, "milestones/M001/S02/SUMMARY.md", "# S02 Summary\nDone.");
+    writeFile(base, "milestones/M001/S03/PLAN.md", "# S03 Plan\n");
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    // After the fix, slices S01-S03 should be reconciled into DB
+    const dbSlices = getMilestoneSlices("M001");
+    assertTrue(
+      dbSlices.length === 4,
+      `All 4 roadmap slices should be in DB after reconciliation, got ${dbSlices.length}`,
+    );
+
+    // S01 and S02 should be marked complete (have SUMMARY files)
+    const s01 = dbSlices.find(s => s.id === "S01");
+    assertTrue(s01 !== undefined, "S01 should exist in DB after reconciliation");
+    if (s01) {
+      assertEq(s01.status, "complete", "S01 should be 'complete' (has SUMMARY on disk)");
+    }
+
+    const s02 = dbSlices.find(s => s.id === "S02");
+    assertTrue(s02 !== undefined, "S02 should exist in DB after reconciliation");
+    if (s02) {
+      assertEq(s02.status, "complete", "S02 should be 'complete' (has SUMMARY on disk)");
+    }
+
+    // S03 should be pending (no SUMMARY)
+    const s03 = dbSlices.find(s => s.id === "S03");
+    assertTrue(s03 !== undefined, "S03 should exist in DB after reconciliation");
+    if (s03) {
+      assertEq(s03.status, "pending", "S03 should be 'pending' (no SUMMARY on disk)");
+    }
+
+    // The state should NOT be blocked — S03 should be eligible (S01 dep satisfied)
+    assertTrue(
+      state.phase !== "blocked",
+      `Phase should not be 'blocked' after reconciliation, got '${state.phase}'`,
+    );
+
+    // Active slice should be S03 (S01 dep met, S03 is first incomplete with satisfied deps)
+    assertTrue(
+      state.activeSlice !== null,
+      "There should be an active slice after reconciliation",
+    );
+    if (state.activeSlice) {
+      assertEq(
+        state.activeSlice.id,
+        "S03",
+        "Active slice should be S03 (its dependency S01 is complete) (#2533)",
+      );
+    }
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+async function testSliceReconciliationIdempotent(): Promise<void> {
+  console.log("\n--- Test: slice reconciliation is idempotent ---");
+
+  const base = createFixtureBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  try {
+    openDatabase(dbPath);
+
+    insertMilestone({ id: "M001", title: "M001: Test", status: "active", depends_on: [] });
+    // S01 already in DB with correct status
+    insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Foundation", status: "complete", depends: [] });
+
+    writeFile(base, "milestones/M001/CONTEXT.md", CONTEXT_CONTENT);
+    writeFile(base, "milestones/M001/ROADMAP.md", ROADMAP_CONTENT);
+    writeFile(base, "milestones/M001/S01/PLAN.md", "# S01 Plan\n");
+    writeFile(base, "milestones/M001/S01/SUMMARY.md", "# S01 Summary\nDone.");
+    writeFile(base, "milestones/M001/S02/PLAN.md", "# S02 Plan\n");
+    writeFile(base, "milestones/M001/S02/SUMMARY.md", "# S02 Summary\nDone.");
+
+    invalidateStateCache();
+    await deriveStateFromDb(base);
+
+    // S01 should still be complete (not overwritten)
+    const dbSlices = getMilestoneSlices("M001");
+    const s01 = dbSlices.find(s => s.id === "S01");
+    assertTrue(s01 !== undefined, "S01 should still exist in DB");
+    if (s01) {
+      assertEq(s01.status, "complete", "S01 status should remain 'complete' (not overwritten)");
+    }
+
+    // S02-S04 should have been added
+    assertTrue(
+      dbSlices.length === 4,
+      `Should have 4 slices after reconciliation (existing + new), got ${dbSlices.length}`,
+    );
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+async function testNoRoadmapSkipsReconciliation(): Promise<void> {
+  console.log("\n--- Test: no ROADMAP file skips slice reconciliation ---");
+
+  const base = createFixtureBase();
+  const dbPath = join(base, ".gsd", "gsd.db");
+
+  try {
+    openDatabase(dbPath);
+
+    insertMilestone({ id: "M001", title: "M001: No Roadmap", status: "active", depends_on: [] });
+
+    // Only a CONTEXT file, no ROADMAP
+    writeFile(base, "milestones/M001/CONTEXT.md", CONTEXT_CONTENT);
+
+    invalidateStateCache();
+    const state = await deriveStateFromDb(base);
+
+    const dbSlices = getMilestoneSlices("M001");
+    assertEq(dbSlices.length, 0, "No slices should be added when no ROADMAP exists");
+
+    // Should be in pre-planning (no roadmap)
+    assertEq(state.phase, "pre-planning", "Phase should be pre-planning with no roadmap");
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+}
+
+async function main(): Promise<void> {
+  console.log("\n=== #2533: deriveStateFromDb reconciles disk slices ===");
+
+  await testMissingSlicesCauseBlock();
+  await testSliceReconciliationIdempotent();
+  await testNoRoadmapSkipsReconciliation();
+
+  report();
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/src/resources/extensions/gsd/tests/stash-queued-context-files.test.ts b/src/resources/extensions/gsd/tests/stash-queued-context-files.test.ts
new file mode 100644
index 000000000..cb59fa5e8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stash-queued-context-files.test.ts
@@ -0,0 +1,305 @@
+/**
+ * stash-queued-context-files.test.ts — Regression test for #2505.
+ *
+ * When mergeMilestoneToMain runs `git stash push --include-untracked`,
+ * untracked `.gsd/milestones/M<queued>/` directories created by `/gsd queue`
+ * are swept into the stash. If stash pop fails (conflict on tracked files),
+ * the queued milestone CONTEXT files are permanently lost.
+ *
+ * The fix: drop `--include-untracked` from the stash push, since the stash
+ * only needs to handle tracked dirty files. Untracked `.gsd/` files are
+ * already handled separately by clearProjectRootStateFiles.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  readFileSync,
+  realpathSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execSync } from "node:child_process";
+
+import { createAutoWorktree, mergeMilestoneToMain } from "../auto-worktree.ts";
+
+function run(cmd: string, cwd: string): string {
+  return execSync(cmd, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+function createTempRepo(): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-ctx-stash-test-")));
+  run("git init", dir);
+  run("git config user.email test@test.com", dir);
+  run("git config user.name Test", dir);
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  writeFileSync(join(dir, ".gsd", "STATE.md"), "version: 1\n");
+  // In projects with tracked .gsd/ files (hasGitTrackedGsdFiles=true),
+  // .gsd is NOT added to .gitignore. This means untracked files under
+  // .gsd/ are visible to --include-untracked and get swept into the
+  // stash, destroying queued milestone CONTEXT files (#2505).
+  run("git add -f .gsd/STATE.md", dir);
+  run("git add .", dir);
+  run("git commit -m init", dir);
+  run("git branch -M main", dir);
+  return dir;
+}
+
+function makeRoadmap(
+  milestoneId: string,
+  title: string,
+  slices: Array<{ id: string; title: string }>,
+): string {
+  const sliceLines = slices
+    .map((s) => `- [x] **${s.id}: ${s.title}**`)
+    .join("\n");
+  return `# ${milestoneId}: ${title}\n\n## Slices\n${sliceLines}\n`;
+}
+
+/**
+ * Standalone test proving that --include-untracked sweeps queued
+ * milestone CONTEXT files into the git stash. This is a direct
+ * git-level test, not going through mergeMilestoneToMain.
+ */
+test("#2505: git stash --include-untracked sweeps queued CONTEXT files (demonstrates the bug)", () => {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-stash-bug-demo-")));
+  try {
+    run("git init", dir);
+    run("git config user.email test@test.com", dir);
+    run("git config user.name Test", dir);
+    writeFileSync(join(dir, "README.md"), "# test\n");
+    mkdirSync(join(dir, ".gsd"), { recursive: true });
+    writeFileSync(join(dir, ".gsd", "STATE.md"), "version: 1\n");
+    run("git add -f .gsd/STATE.md", dir);
+    run("git add .", dir);
+    run("git commit -m init", dir);
+
+    // Create queued milestone CONTEXT files (untracked, not gitignored)
+    const m013Dir = join(dir, ".gsd", "milestones", "M013");
+    mkdirSync(m013Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n",
+    );
+
+    // Dirty a tracked file
+    writeFileSync(join(dir, "README.md"), "# test\n\nDirty.\n");
+
+    // Verify the CONTEXT file is untracked
+    const status = run("git status --porcelain", dir);
+    assert.ok(status.includes("?? .gsd/milestones/"), "precondition: M013 dir is untracked");
+
+    // Stash WITH --include-untracked (the bug)
+    run('git stash push --include-untracked -m "test stash"', dir);
+
+    // BUG: the queued CONTEXT file was swept into the stash
+    assert.ok(
+      !existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "BUG CONFIRMED: --include-untracked swept CONTEXT file into stash",
+    );
+
+    // Stash WITHOUT --include-untracked (the fix)
+    run("git stash pop", dir);
+
+    // Recreate the scenario
+    mkdirSync(m013Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n",
+    );
+    writeFileSync(join(dir, "README.md"), "# test\n\nDirty again.\n");
+
+    // Stash WITHOUT --include-untracked (the fix)
+    run('git stash push -m "test stash no untracked"', dir);
+
+    // FIX: the queued CONTEXT file stays on disk
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "FIX CONFIRMED: without --include-untracked, CONTEXT file stays on disk",
+    );
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("#2505: mergeMilestoneToMain preserves queued CONTEXT files (not swept into stash)", () => {
+  const repo = createTempRepo();
+  try {
+    const wtPath = createAutoWorktree(repo, "M015");
+    const normalizedPath = wtPath.replaceAll("\\", "/");
+    const worktreeName = normalizedPath.split("/").pop() || "M015";
+    const sliceBranch = `slice/${worktreeName}/S01`;
+    run(`git checkout -b "${sliceBranch}"`, wtPath);
+    writeFileSync(join(wtPath, "app.ts"), "export const app = true;\n");
+    run("git add .", wtPath);
+    run('git commit -m "add app feature"', wtPath);
+    run("git checkout milestone/M015", wtPath);
+    run(`git merge --no-ff "${sliceBranch}" -m "merge S01"`, wtPath);
+
+    // Simulate `/gsd queue` creating queued milestone CONTEXT files at the
+    // project root. These are untracked, and in repos with tracked .gsd/
+    // files they are NOT gitignored.
+    const m013Dir = join(repo, ".gsd", "milestones", "M013");
+    const m014Dir = join(repo, ".gsd", "milestones", "M014");
+    mkdirSync(m013Dir, { recursive: true });
+    mkdirSync(m014Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n\nQueued milestone context.\n",
+    );
+    writeFileSync(
+      join(m014Dir, "M014-CONTEXT.md"),
+      "# M014: Dashboard Redesign\n\nQueued milestone context.\n",
+    );
+
+    // Dirty a tracked file to trigger the pre-merge stash
+    writeFileSync(join(repo, "README.md"), "# test\n\nDirty change.\n");
+
+    // Verify M013 is untracked (precondition)
+    const statusBefore = run("git status --porcelain", repo);
+    assert.ok(
+      statusBefore.includes("?? .gsd/milestones/"),
+      "M013 directory is untracked before merge (precondition)",
+    );
+
+    const roadmap = makeRoadmap("M015", "App Feature", [
+      { id: "S01", title: "Feature" },
+    ]);
+
+    const result = mergeMilestoneToMain(repo, "M015", roadmap);
+    assert.ok(
+      result.commitMessage.includes("GSD-Milestone: M015"),
+      "merge should succeed",
+    );
+
+    // CRITICAL: Queued milestone CONTEXT files must still exist on disk.
+    // With --include-untracked, these files get swept into the stash
+    // during the merge and are only restored if stash pop succeeds.
+    // Without --include-untracked, they are never touched.
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "M013-CONTEXT.md must survive the merge (not swept into stash)",
+    );
+    assert.ok(
+      existsSync(join(m014Dir, "M014-CONTEXT.md")),
+      "M014-CONTEXT.md must survive the merge (not swept into stash)",
+    );
+    assert.ok(
+      readFileSync(join(m013Dir, "M013-CONTEXT.md"), "utf-8").includes("Login Page Redesign"),
+      "M013 context content preserved",
+    );
+    assert.ok(
+      readFileSync(join(m014Dir, "M014-CONTEXT.md"), "utf-8").includes("Dashboard Redesign"),
+      "M014 context content preserved",
+    );
+
+    // Verify milestone code merged correctly
+    assert.ok(existsSync(join(repo, "app.ts")), "milestone code merged to main");
+
+    // Verify no stash entry remains that could contain queued files.
+    // If --include-untracked is removed, the stash (if needed) should
+    // pop cleanly since it only contains tracked files.
+    let stashList: string;
+    try {
+      stashList = run("git stash list", repo);
+    } catch {
+      stashList = "";
+    }
+    // A leftover stash after merge is acceptable (pop conflict on tracked
+    // files), but it must NOT contain queued milestone files.
+    if (stashList) {
+      // Verify the stash does not contain queued milestone entries
+      try {
+        const stashDiff = run("git diff stash@{0}^3 --name-only 2>/dev/null || true", repo);
+        assert.ok(
+          !stashDiff.includes("M013-CONTEXT"),
+          "stash must not contain queued milestone M013 files",
+        );
+        assert.ok(
+          !stashDiff.includes("M014-CONTEXT"),
+          "stash must not contain queued milestone M014 files",
+        );
+      } catch {
+        // No untracked tree in stash — that's the expected outcome with the fix
+      }
+    }
+  } finally {
+    rmSync(repo, { recursive: true, force: true });
+  }
+});
+
+test("#2505: back-to-back merges preserve queued CONTEXT files", () => {
+  const repo = createTempRepo();
+  try {
+    // ── First milestone: M015 ──
+    const wt1 = createAutoWorktree(repo, "M015");
+    const wt1Name = wt1.replaceAll("\\", "/").split("/").pop() || "M015";
+    const slice1 = `slice/${wt1Name}/S01`;
+    run(`git checkout -b "${slice1}"`, wt1);
+    writeFileSync(join(wt1, "feature1.ts"), "export const f1 = true;\n");
+    run("git add .", wt1);
+    run('git commit -m "feature 1"', wt1);
+    run("git checkout milestone/M015", wt1);
+    run(`git merge --no-ff "${slice1}" -m "merge S01"`, wt1);
+
+    // Create queued milestone CONTEXT file
+    const m013Dir = join(repo, ".gsd", "milestones", "M013");
+    mkdirSync(m013Dir, { recursive: true });
+    writeFileSync(
+      join(m013Dir, "M013-CONTEXT.md"),
+      "# M013: Login Page Redesign\n\nQueued milestone context.\n",
+    );
+
+    // Dirty tracked file to trigger stash
+    writeFileSync(join(repo, "README.md"), "# test\n\nDirty for M015.\n");
+
+    mergeMilestoneToMain(repo, "M015", makeRoadmap("M015", "Feature 1", [
+      { id: "S01", title: "Feature 1" },
+    ]));
+
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "M013-CONTEXT.md survives first merge",
+    );
+
+    // ── Second milestone: M016 ──
+    const wt2 = createAutoWorktree(repo, "M016");
+    const wt2Name = wt2.replaceAll("\\", "/").split("/").pop() || "M016";
+    const slice2 = `slice/${wt2Name}/S01`;
+    run(`git checkout -b "${slice2}"`, wt2);
+    writeFileSync(join(wt2, "feature2.ts"), "export const f2 = true;\n");
+    run("git add .", wt2);
+    run('git commit -m "feature 2"', wt2);
+    run("git checkout milestone/M016", wt2);
+    run(`git merge --no-ff "${slice2}" -m "merge S01"`, wt2);
+
+    // Dirty tracked file again
+    writeFileSync(join(repo, "README.md"), "# test\n\nDirty for M016.\n");
+
+    mergeMilestoneToMain(repo, "M016", makeRoadmap("M016", "Feature 2", [
+      { id: "S01", title: "Feature 2" },
+    ]));
+
+    // After two consecutive merges, queued M013 CONTEXT must still exist
+    assert.ok(
+      existsSync(join(m013Dir, "M013-CONTEXT.md")),
+      "M013-CONTEXT.md must survive two consecutive milestone merges",
+    );
+    assert.ok(
+      readFileSync(join(m013Dir, "M013-CONTEXT.md"), "utf-8").includes("Login Page Redesign"),
+      "M013 context content preserved after back-to-back merges",
+    );
+  } finally {
+    rmSync(repo, { recursive: true, force: true });
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/state-corruption-2945.test.ts b/src/resources/extensions/gsd/tests/state-corruption-2945.test.ts
new file mode 100644
index 000000000..a7da901bc
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/state-corruption-2945.test.ts
@@ -0,0 +1,405 @@
+/**
+ * Regression tests for issue #2945: State corruption in milestone/slice completion workflow.
+ *
+ * Covers all 4 sub-bugs:
+ *   Bug 1: ROADMAP corrupted by inline UAT content in table rows
+ *   Bug 2: complete-milestone event replay bypasses task validation
+ *   Bug 3: Worktree directory not cleaned up after mergeAndExit
+ *   Bug 4: Quality gate records not written by validate-milestone
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getMilestoneSlices,
+  getSliceTasks,
+  getGateResults,
+} from "../gsd-db.ts";
+import { renderRoadmapContent } from "../workflow-projections.ts";
+import type { MilestoneRow, SliceRow } from "../gsd-db.ts";
+import type { AutoSession } from "../auto/session.ts";
+
+// ─── Fixture helpers ────────────────────────────────────────────────────────
+
+function tempDbPath(): string {
+  const dir = mkdtempSync(join(tmpdir(), "gsd-2945-"));
+  return join(dir, "test.db");
+}
+
+function cleanupDb(dbPath: string): void {
+  closeDatabase();
+  try {
+    const dir = join(dbPath, "..");
+    rmSync(dir, { recursive: true, force: true });
+  } catch {
+    // best effort
+  }
+}
+
+function createTempProject(): { basePath: string } {
+  const basePath = mkdtempSync(join(tmpdir(), "gsd-2945-project-"));
+  mkdirSync(join(basePath, ".gsd", "milestones", "M001"), { recursive: true });
+  return { basePath };
+}
+
+function makeMilestoneRow(overrides: Partial<MilestoneRow> = {}): MilestoneRow {
+  return {
+    id: "M001",
+    title: "Test Milestone",
+    vision: "Build a test milestone",
+    status: "active",
+    depends_on: [],
+    created_at: new Date().toISOString(),
+    completed_at: null,
+    success_criteria: ["SC1", "SC2"],
+    key_risks: [],
+    proof_strategy: [],
+    verification_contract: "",
+    verification_integration: "",
+    verification_operational: "",
+    verification_uat: "",
+    definition_of_done: [],
+    requirement_coverage: "",
+    boundary_map_markdown: "",
+    ...overrides,
+  };
+}
+
+function makeSliceRow(id: string, overrides: Partial<SliceRow> = {}): SliceRow {
+  return {
+    id,
+    milestone_id: "M001",
+    title: `Slice ${id}`,
+    goal: `Goal for ${id}`,
+    demo: `Demo for ${id}`,
+    risk: "medium",
+    status: "pending",
+    sequence: parseInt(id.replace("S", ""), 10) || 0,
+    depends: [],
+    created_at: new Date().toISOString(),
+    completed_at: null,
+    full_summary_md: "",
+    full_uat_md: "",
+    success_criteria: "",
+    proof_level: "",
+    integration_closure: "",
+    observability_impact: "",
+    replan_triggered_at: null,
+    ...overrides,
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 1: ROADMAP corrupted by inline UAT content
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 1: ROADMAP table cell corruption by UAT content", () => {
+
+  test("renderRoadmapContent does NOT inject full_uat_md into table rows when demo is empty", () => {
+    const milestone = makeMilestoneRow();
+
+    const longUatContent = `### Preconditions
+- Database initialized
+- Service running
+
+### Steps
+1. Open the application
+2. Navigate to settings
+3. Enable dark mode
+
+### Expected
+- Theme changes to dark
+- All components update`;
+
+    const slices: SliceRow[] = [
+      makeSliceRow("S01", {
+        status: "complete",
+        demo: "",                     // empty demo
+        full_uat_md: longUatContent,  // full UAT content in DB
+      }),
+      makeSliceRow("S02", {
+        status: "pending",
+        demo: "Advanced stuff works",
+      }),
+    ];
+
+    const content = renderRoadmapContent(milestone, slices);
+
+    // The roadmap table row for S01 should NOT contain UAT content
+    assert.ok(
+      !content.includes("Preconditions"),
+      "roadmap table row must not contain UAT preconditions",
+    );
+    assert.ok(
+      !content.includes("Navigate to settings"),
+      "roadmap table row must not contain UAT steps",
+    );
+
+    // Each table row should be a reasonable length (under 200 chars)
+    const lines = content.split("\n");
+    const s01Row = lines.find(l => l.includes("| S01 |"));
+    assert.ok(s01Row, "S01 should appear as a table row");
+    assert.ok(
+      s01Row!.length < 200,
+      `S01 row should be under 200 chars, got ${s01Row!.length}: ${s01Row!.slice(0, 100)}...`,
+    );
+
+    // S02 should still be visible
+    assert.ok(content.includes("| S02 |"), "S02 must still be visible in roadmap table");
+  });
+
+  test("renderRoadmapContent uses 'TBD' fallback when demo is empty, not full_uat_md", () => {
+    const milestone = makeMilestoneRow();
+    const slices: SliceRow[] = [
+      makeSliceRow("S01", { demo: "", full_uat_md: "Long UAT content here" }),
+    ];
+
+    const content = renderRoadmapContent(milestone, slices);
+    assert.ok(
+      content.includes("TBD"),
+      "empty demo should fallback to 'TBD', not full_uat_md",
+    );
+    assert.ok(
+      !content.includes("Long UAT content here"),
+      "full_uat_md should never appear in roadmap table",
+    );
+  });
+
+  test("renderRoadmapContent preserves demo field when present", () => {
+    const milestone = makeMilestoneRow();
+    const slices: SliceRow[] = [
+      makeSliceRow("S01", { demo: "Basic functionality works", full_uat_md: "Full UAT" }),
+    ];
+
+    const content = renderRoadmapContent(milestone, slices);
+    assert.ok(
+      content.includes("Basic functionality works"),
+      "demo field should be used when present",
+    );
+    assert.ok(
+      !content.includes("Full UAT"),
+      "full_uat_md should not be used when demo is present",
+    );
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 2: complete-milestone event replay bypasses task validation
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 2: workflow-reconcile bypasses task validation for complete_slice", () => {
+  let dbPath: string;
+
+  beforeEach(() => {
+    dbPath = tempDbPath();
+    openDatabase(dbPath);
+  });
+
+  afterEach(() => {
+    cleanupDb(dbPath);
+  });
+
+  test("replaySliceComplete must not mark slice done when tasks are pending", async () => {
+    // Set up: M001 with S01 that has 2 tasks, one pending
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete", title: "Done task" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending", title: "Pending task" });
+
+    // Import and call replaySliceComplete directly
+    const { replaySliceComplete } = await import("../workflow-reconcile.ts");
+    replaySliceComplete("M001", "S01", new Date().toISOString());
+
+    // The slice should NOT be marked done because T02 is still pending
+    const slices = getMilestoneSlices("M001");
+    const s01 = slices.find(s => s.id === "S01");
+    assert.ok(s01, "S01 should exist");
+    assert.notStrictEqual(
+      s01!.status,
+      "done",
+      "replaySliceComplete must not mark slice as done when tasks are pending",
+    );
+    assert.notStrictEqual(
+      s01!.status,
+      "complete",
+      "replaySliceComplete must not mark slice as complete when tasks are pending",
+    );
+  });
+
+  test("replaySliceComplete marks slice done when all tasks are complete", async () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete", title: "Done task" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "done", title: "Also done" });
+
+    const { replaySliceComplete } = await import("../workflow-reconcile.ts");
+    replaySliceComplete("M001", "S01", new Date().toISOString());
+
+    const slices = getMilestoneSlices("M001");
+    const s01 = slices.find(s => s.id === "S01");
+    assert.ok(s01, "S01 should exist");
+    assert.strictEqual(
+      s01!.status,
+      "done",
+      "replaySliceComplete should mark slice as done when all tasks are complete",
+    );
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 3: Worktree directory not cleaned up after mergeAndExit
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 3: mergeAndExit must teardown worktree after successful merge", () => {
+
+  test("_mergeWorktreeMode calls teardownAutoWorktree after successful merge", async () => {
+    // Test the WorktreeResolver to verify teardown is called after merge.
+    // We use a mock-based approach since actual worktrees require a git repo.
+    let teardownCalled = false;
+    let teardownMilestoneId = "";
+
+    const mockSession = {
+      basePath: "/mock/worktree/M001",
+      originalBasePath: "/mock/project",
+      isolationDegraded: false,
+      gitService: {} as unknown,
+    } as unknown as AutoSession;
+
+    const mockDeps = {
+      isInAutoWorktree: () => true,
+      shouldUseWorktreeIsolation: () => true,
+      getIsolationMode: () => "worktree" as const,
+      mergeMilestoneToMain: () => ({ pushed: false, codeFilesChanged: true }),
+      syncWorktreeStateBack: () => ({ synced: [] }),
+      teardownAutoWorktree: (basePath: string, mid: string) => {
+        teardownCalled = true;
+        teardownMilestoneId = mid;
+      },
+      createAutoWorktree: () => "",
+      enterAutoWorktree: () => "",
+      getAutoWorktreePath: () => null,
+      autoCommitCurrentBranch: () => {},
+      getCurrentBranch: () => "main",
+      autoWorktreeBranch: () => "gsd/M001",
+      resolveMilestoneFile: () => "/mock/roadmap.md",
+      readFileSync: () => "# Roadmap content",
+      GitServiceImpl: class {} as unknown as new (p: string, c: unknown) => unknown,
+      loadEffectiveGSDPreferences: () => undefined,
+      invalidateAllCaches: () => {},
+      captureIntegrationBranch: () => {},
+    };
+
+    // Import and create resolver
+    // We test the behavior contract: after a successful merge, teardown must be called
+    const { WorktreeResolver } = await import("../worktree-resolver.ts");
+    const resolver = new WorktreeResolver(mockSession, mockDeps);
+
+    const ctx = { notify: () => {} };
+    resolver.mergeAndExit("M001", ctx);
+
+    assert.ok(
+      teardownCalled,
+      "teardownAutoWorktree must be called after successful merge in worktree mode",
+    );
+    assert.strictEqual(
+      teardownMilestoneId,
+      "M001",
+      "teardown must be called with the correct milestone ID",
+    );
+  });
+});
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// Bug 4: Quality gate records not written by validate-milestone
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("#2945 Bug 4: validate-milestone must persist quality_gates records", () => {
+  let dbPath: string;
+  let basePath: string;
+
+  beforeEach(() => {
+    dbPath = tempDbPath();
+    openDatabase(dbPath);
+    const proj = createTempProject();
+    basePath = proj.basePath;
+  });
+
+  afterEach(() => {
+    cleanupDb(dbPath);
+    try { rmSync(basePath, { recursive: true, force: true }); } catch {}
+  });
+
+  test("handleValidateMilestone persists quality_gates records in DB", async () => {
+    // Set up milestone with slices
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const { handleValidateMilestone } = await import("../tools/validate-milestone.ts");
+
+    const result = await handleValidateMilestone({
+      milestoneId: "M001",
+      verdict: "pass",
+      remediationRound: 0,
+      successCriteriaChecklist: "- [x] SC1 met\n- [x] SC2 met",
+      sliceDeliveryAudit: "All slices delivered",
+      crossSliceIntegration: "Integration verified",
+      requirementCoverage: "100% coverage",
+      verdictRationale: "All checks pass",
+    }, basePath);
+
+    assert.ok(!("error" in result), `handler should succeed, got: ${JSON.stringify(result)}`);
+
+    // Quality gate records should exist in DB for this milestone
+    // Use a wildcard slice_id since milestone-level gates use a sentinel
+    const adapter = (await import("../gsd-db.ts"))._getAdapter()!;
+    const gates = adapter.prepare(
+      "SELECT * FROM quality_gates WHERE milestone_id = 'M001'"
+    ).all();
+
+    assert.ok(
+      gates.length > 0,
+      `validate-milestone must persist quality_gates records in DB, found ${gates.length}`,
+    );
+  });
+
+  test("handleValidateMilestone records verdict correctly in quality_gates", async () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const { handleValidateMilestone } = await import("../tools/validate-milestone.ts");
+
+    await handleValidateMilestone({
+      milestoneId: "M001",
+      verdict: "needs-remediation",
+      remediationRound: 1,
+      successCriteriaChecklist: "- [ ] SC1 not met",
+      sliceDeliveryAudit: "S01 incomplete",
+      crossSliceIntegration: "Not tested",
+      requirementCoverage: "50% coverage",
+      verdictRationale: "Needs work",
+      remediationPlan: "Fix S01",
+    }, basePath);
+
+    const adapter = (await import("../gsd-db.ts"))._getAdapter()!;
+    const gates = adapter.prepare(
+      "SELECT * FROM quality_gates WHERE milestone_id = 'M001'"
+    ).all();
+
+    assert.ok(gates.length > 0, "quality_gates records must exist");
+
+    // At least one gate should have a non-empty verdict
+    const withVerdict = gates.filter((g: Record<string, unknown>) => g["verdict"] && g["verdict"] !== "");
+    assert.ok(
+      withVerdict.length > 0,
+      "at least one quality_gate should have a recorded verdict",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts b/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts
new file mode 100644
index 000000000..ff1dd1695
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts
@@ -0,0 +1,257 @@
+// GSD State Machine Regression Tests — Completion Hierarchy & State Derivation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveState, isGhostMilestone, invalidateStateCache } from "../state.ts";
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-parity-test-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+function writeMilestoneFile(base: string, mid: string, suffix: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-${suffix}.md`), content);
+}
+
+function writeMilestoneValidation(base: string, mid: string, verdict: string = "pass"): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(
+    join(dir, `${mid}-VALIDATION.md`),
+    `---\nverdict: ${verdict}\nremediation_round: 0\n---\n\n# Validation\nValidated.`,
+  );
+}
+
+// ─── Setup / Teardown ──────────────────────────────────────────────────────
+
+beforeEach(() => {
+  invalidateStateCache();
+});
+
+afterEach(() => {
+  invalidateStateCache();
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("state-derivation-parity", () => {
+
+  // ─── Test 1: ghost milestone with only META.json ─────────────────────────
+  test("ghost milestone with only META.json is correctly detected", () => {
+    const base = createFixtureBase();
+    try {
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      // Write only META.json — no CONTEXT, CONTEXT-DRAFT, ROADMAP, or SUMMARY
+      writeFileSync(join(dir, "META.json"), JSON.stringify({ id: "M001", createdAt: new Date().toISOString() }));
+
+      assert.ok(
+        isGhostMilestone(base, "M001"),
+        "milestone with only META.json is a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 2: non-ghost milestone with CONTEXT is not ghost ───────────────
+  test("non-ghost milestone with CONTEXT is not ghost", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "CONTEXT", "# M001 Context\n\nThis milestone has real content.");
+
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with CONTEXT.md is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 3: empty milestones dir derives pre-planning phase ─────────────
+  test("empty milestones dir derives pre-planning phase", async () => {
+    const base = createFixtureBase();
+    try {
+      const state = await deriveState(base);
+      assert.equal(state.phase, "pre-planning", "empty milestones dir yields pre-planning phase");
+      assert.equal(state.activeMilestone, null, "no active milestone for empty dir");
+      assert.equal(state.activeSlice, null, "no active slice for empty dir");
+      assert.deepEqual(state.registry, [], "registry is empty for empty dir");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 4: state includes blockers field for future blocked-phase detection ──
+  test("deriveState result always includes a defined phase and nextAction", async () => {
+    // Document that the state shape includes a `phase` string and `nextAction` string.
+    // Triggering "blocked" via filesystem alone requires circular dep setup which
+    // is outside the scope of these parity tests. Instead we verify the shape.
+    const base = createFixtureBase();
+    try {
+      // Provide a milestone with a ROADMAP that has a single incomplete slice
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(
+        join(dir, "M001-ROADMAP.md"),
+        `# M001: Test\n\n**Vision:** Parity check.\n\n## Slices\n\n- [ ] **S01: First Slice** \`risk:low\` \`depends:[]\`\n  > After this: First slice done.\n`,
+      );
+
+      const state = await deriveState(base);
+
+      assert.ok(typeof state.phase === "string", "state.phase is a string");
+      assert.ok(typeof state.nextAction === "string", "state.nextAction is a string");
+      // The state object is the same shape regardless of phase — blockers would
+      // appear when the phase is "blocked". We document that the field may exist.
+      assert.ok("activeMilestone" in state, "state has activeMilestone field");
+      assert.ok("registry" in state, "state has registry field");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 5: CONTEXT-DRAFT but no CONTEXT returns needs-discussion ────────
+  test("deriveState with CONTEXT-DRAFT but no CONTEXT returns needs-discussion", async () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(
+        base,
+        "M001",
+        "CONTEXT-DRAFT",
+        "# Draft Context\n\nSeed discussion material for M001.",
+      );
+
+      const state = await deriveState(base);
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "CONTEXT-DRAFT with no CONTEXT yields needs-discussion phase",
+      );
+      assert.equal(state.activeMilestone?.id, "M001", "active milestone is M001");
+      assert.equal(state.activeSlice, null, "no active slice in needs-discussion phase");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 6: deriveState skips ghost milestones when finding active milestone ──
+  test("deriveState skips ghost milestones when finding active milestone", async () => {
+    const base = createFixtureBase();
+    try {
+      // M001: ghost — just an empty directory
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+
+      // M002: has CONTEXT-DRAFT — should become active
+      writeMilestoneFile(
+        base,
+        "M002",
+        "CONTEXT-DRAFT",
+        "# Draft for M002\n\nThis is the real milestone.",
+      );
+
+      const state = await deriveState(base);
+
+      // M001 is a ghost so it is skipped; M002 becomes the active milestone
+      assert.equal(
+        state.activeMilestone?.id,
+        "M002",
+        "ghost M001 is skipped; M002 is the active milestone",
+      );
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "phase is needs-discussion because M002 has only CONTEXT-DRAFT",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns true for fully empty directory ───────
+  test("isGhostMilestone returns true for milestone directory with no files", () => {
+    const base = createFixtureBase();
+    try {
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      // No files at all in the directory
+      assert.ok(
+        isGhostMilestone(base, "M001"),
+        "milestone directory with no files is a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns false when ROADMAP exists ────────────
+  test("isGhostMilestone returns false when ROADMAP exists", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "ROADMAP", "# M001\n\n## Slices\n\n- [ ] **S01: First** `risk:low` `depends:[]`\n  > After this: done.\n");
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with ROADMAP is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns false when CONTEXT-DRAFT exists ──────
+  test("isGhostMilestone returns false when CONTEXT-DRAFT exists", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "CONTEXT-DRAFT", "# Draft\n\nSeed material.");
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with CONTEXT-DRAFT is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: multiple ghost milestones before a real one are all skipped ───
+  test("deriveState skips multiple ghost milestones to find the first real one", async () => {
+    const base = createFixtureBase();
+    try {
+      // M001 and M002: ghosts
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      mkdirSync(join(base, ".gsd", "milestones", "M002"), { recursive: true });
+
+      // M003: has CONTEXT-DRAFT — first real milestone
+      writeMilestoneFile(base, "M003", "CONTEXT-DRAFT", "# M003 Draft\n\nFirst substantive milestone.");
+
+      const state = await deriveState(base);
+
+      assert.equal(
+        state.activeMilestone?.id,
+        "M003",
+        "both ghost milestones skipped; M003 is active",
+      );
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "phase is needs-discussion for M003 with CONTEXT-DRAFT",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+});
diff --git a/src/resources/extensions/gsd/tests/state-machine-full-walkthrough.test.ts b/src/resources/extensions/gsd/tests/state-machine-full-walkthrough.test.ts
new file mode 100644
index 000000000..b9d14baa6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/state-machine-full-walkthrough.test.ts
@@ -0,0 +1,1628 @@
+// GSD State Machine — Comprehensive Phase-by-Phase Walkthrough Tests
+// Verifies all 16 phases, reconciliation, edge cases, and cross-validation.
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  deriveState,
+  deriveStateFromDb,
+  isValidationTerminal,
+  isGhostMilestone,
+  invalidateStateCache,
+} from "../state.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  updateTaskStatus,
+  getAllMilestones,
+  insertGateRow,
+  getPendingSliceGateCount,
+} from "../gsd-db.ts";
+import { isClosedStatus } from "../status-guards.ts";
+import { clearPathCache } from "../paths.ts";
+
+// ─── Fixture Helpers ─────────────────────────────────────────────────────────
+
+const tempDirs: string[] = [];
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-walkthrough-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  tempDirs.push(base);
+  return base;
+}
+
+afterEach(() => {
+  for (const dir of tempDirs.splice(0)) {
+    try {
+      rmSync(dir, { recursive: true, force: true });
+    } catch { /* best effort */ }
+  }
+  try { closeDatabase(); } catch { /* may not be open */ }
+});
+
+function writeContext(base: string, mid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-CONTEXT.md`), content);
+}
+
+function writeContextDraft(base: string, mid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-CONTEXT-DRAFT.md`), content);
+}
+
+function writeRoadmap(base: string, mid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-ROADMAP.md`), content);
+}
+
+function writePlan(base: string, mid: string, sid: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  const tasksDir = join(dir, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-PLAN.md`), content);
+  // Create stub task plan files so deriveState doesn't fall back to planning
+  const taskMatches = content.matchAll(/\*\*(T\d+):/g);
+  for (const m of taskMatches) {
+    const tid = m[1];
+    writeFileSync(join(tasksDir, `${tid}-PLAN.md`), `# ${tid} Plan\n\nStub.\n`);
+  }
+}
+
+function writeTaskSummary(base: string, mid: string, sid: string, tid: string): void {
+  const tasksDir = join(base, ".gsd", "milestones", mid, "slices", sid, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(tasksDir, `${tid}-SUMMARY.md`), [
+    `# ${tid} Summary`,
+    "",
+    "Task completed successfully.",
+  ].join("\n"));
+}
+
+function writeTaskSummaryWithBlocker(base: string, mid: string, sid: string, tid: string): void {
+  const tasksDir = join(base, ".gsd", "milestones", mid, "slices", sid, "tasks");
+  mkdirSync(tasksDir, { recursive: true });
+  writeFileSync(join(tasksDir, `${tid}-SUMMARY.md`), [
+    "---",
+    "blocker_discovered: true",
+    "---",
+    "",
+    `# ${tid} Summary`,
+    "",
+    "Blocker found during execution.",
+  ].join("\n"));
+}
+
+function writeSliceSummary(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-SUMMARY.md`), `# ${sid} Summary\n\nSlice done.\n`);
+}
+
+function writeMilestoneSummary(base: string, mid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-SUMMARY.md`), `# ${mid} Summary\n\nMilestone complete.\n`);
+}
+
+function writeMilestoneValidation(base: string, mid: string, verdict: string = "pass"): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-VALIDATION.md`), [
+    "---",
+    `verdict: ${verdict}`,
+    "remediation_round: 0",
+    "---",
+    "",
+    "# Validation",
+    "Validated.",
+  ].join("\n"));
+}
+
+function writeReplanTrigger(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-REPLAN-TRIGGER.md`), "Triage replan triggered.\n");
+}
+
+function writeReplan(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-REPLAN.md`), "# Replan\n\nReplan completed.\n");
+}
+
+function writeContinue(base: string, mid: string, sid: string): void {
+  const dir = join(base, ".gsd", "milestones", mid, "slices", sid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${sid}-CONTINUE.md`), [
+    "---",
+    "milestone: " + mid,
+    "slice: " + sid,
+    "task: T01",
+    "status: interrupted",
+    "---",
+    "",
+    "# Continue",
+    "Resume from step 2.",
+  ].join("\n"));
+}
+
+/** Standard roadmap with one incomplete slice */
+function standardRoadmap(): string {
+  return [
+    "# M001: Test Milestone",
+    "",
+    "**Vision:** Test state machine.",
+    "",
+    "## Slices",
+    "",
+    "- [ ] **S01: First Slice** `risk:low` `depends:[]`",
+    "  > After this: slice done.",
+  ].join("\n");
+}
+
+/** Roadmap with one done slice */
+function doneSliceRoadmap(): string {
+  return [
+    "# M001: Test Milestone",
+    "",
+    "**Vision:** Test state machine.",
+    "",
+    "## Slices",
+    "",
+    "- [x] **S01: Done Slice** `risk:low` `depends:[]`",
+    "  > After this: slice done.",
+  ].join("\n");
+}
+
+/** Standard plan with two incomplete tasks */
+function standardPlan(): string {
+  return [
+    "# S01: First Slice",
+    "",
+    "**Goal:** Test.",
+    "**Demo:** Tests pass.",
+    "",
+    "## Tasks",
+    "",
+    "- [ ] **T01: First Task** `est:10m`",
+    "  First task description.",
+    "",
+    "- [ ] **T02: Second Task** `est:10m`",
+    "  Second task description.",
+  ].join("\n");
+}
+
+/** Plan with all tasks done */
+function allDonePlan(): string {
+  return [
+    "# S01: First Slice",
+    "",
+    "**Goal:** Test.",
+    "**Demo:** Tests pass.",
+    "",
+    "## Tasks",
+    "",
+    "- [x] **T01: First Task** `est:10m`",
+    "  First task done.",
+    "",
+    "- [x] **T02: Second Task** `est:10m`",
+    "  Second task done.",
+  ].join("\n");
+}
+
+/** Plan with one done, one incomplete task */
+function partialDonePlan(): string {
+  return [
+    "# S01: First Slice",
+    "",
+    "**Goal:** Test.",
+    "**Demo:** Tests pass.",
+    "",
+    "## Tasks",
+    "",
+    "- [x] **T01: First Task** `est:10m`",
+    "  First task done.",
+    "",
+    "- [ ] **T02: Second Task** `est:10m`",
+    "  Second task pending.",
+  ].join("\n");
+}
+
+// ═══════════════════════════════════════════════════════════════════════════════
+// PHASE 1: pre-planning
+// ═══════════════════════════════════════════════════════════════════════════════
+
+describe("state-machine-full-walkthrough", () => {
+
+  describe("Phase 1: pre-planning", () => {
+    test("empty milestones dir → pre-planning", async () => {
+      const base = createFixtureBase();
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "pre-planning");
+      assert.equal(state.activeMilestone, null);
+      assert.equal(state.activeSlice, null);
+      assert.equal(state.activeTask, null);
+      assert.deepStrictEqual(state.registry, []);
+    });
+
+    test("milestone with CONTEXT but no ROADMAP → pre-planning", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nSome context.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "pre-planning");
+      assert.ok(state.activeMilestone !== null, "activeMilestone should be set");
+      assert.equal(state.activeMilestone?.id, "M001");
+    });
+
+    test("roadmap with zero slices → pre-planning (not validating-milestone)", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nContext.");
+      // Roadmap exists but has no slice entries
+      writeRoadmap(base, "M001", [
+        "# M001: Test Milestone",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "No slices defined yet.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "pre-planning", "zero slices must NOT trigger validating-milestone (#2667)");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 2: needs-discussion
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 2: needs-discussion", () => {
+    test("CONTEXT-DRAFT exists, no CONTEXT → needs-discussion", async () => {
+      const base = createFixtureBase();
+      writeContextDraft(base, "M001", "# M001: Draft\n\nDraft context.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "needs-discussion");
+      assert.ok(state.activeMilestone !== null);
+      assert.equal(state.activeMilestone?.id, "M001");
+    });
+
+    test("both CONTEXT-DRAFT and CONTEXT exist → NOT needs-discussion", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Real\n\nReal context.");
+      writeContextDraft(base, "M001", "# M001: Draft\n\nDraft context.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "needs-discussion", "CONTEXT should win over CONTEXT-DRAFT");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 3: discussing (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 3: discussing (auto-mode only)", () => {
+    test("discussing is NOT reachable from deriveState", async () => {
+      // discussing is set only by auto-mode, never by state derivation.
+      // Verify that CONTEXT-DRAFT → needs-discussion (not discussing).
+      const base = createFixtureBase();
+      writeContextDraft(base, "M001", "# M001: Draft\n\nDraft.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "discussing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 4: researching (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 4: researching (auto-mode only)", () => {
+    test("researching is NOT reachable from deriveState", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nContext.");
+      writeRoadmap(base, "M001", standardRoadmap());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "researching");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 5: planning
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 5: planning", () => {
+    test("roadmap with slice, no PLAN file → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning");
+      assert.ok(state.activeSlice !== null);
+      assert.equal(state.activeSlice?.id, "S01");
+    });
+
+    test("PLAN exists but zero tasks → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // Plan file with no task entries
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), [
+        "# S01: First Slice",
+        "",
+        "**Goal:** Test.",
+        "**Demo:** Tests pass.",
+        "",
+        "## Tasks",
+        "",
+        "No tasks defined yet.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning", "plan with zero tasks should remain in planning");
+    });
+
+    test("PLAN with tasks but missing T##-PLAN.md files → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // Write plan file WITH tasks but WITHOUT stub T##-PLAN.md files
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(join(dir, "tasks"), { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), standardPlan());
+      // Intentionally do NOT create T01-PLAN.md or T02-PLAN.md
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning", "missing task plan files should stay in planning");
+    });
+
+    test("PLAN with all task plan files → NOT planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "planning", "complete plan should advance past planning");
+      // Should be executing since there are incomplete tasks
+      assert.equal(state.phase, "executing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 6: evaluating-gates (DB path only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 6: evaluating-gates", () => {
+    test("DB path: pending quality gates → evaluating-gates", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      // Set up milestone + slice + task in DB
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      // Write plan on disk (needed for state derivation)
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // Insert a pending quality gate
+      insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice", status: "pending" });
+
+      const pending = getPendingSliceGateCount("M001", "S01");
+      assert.ok(pending > 0, "should have pending gates");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "evaluating-gates");
+    });
+
+    test("DB path: no pending gates → NOT evaluating-gates", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // No gate rows → getPendingSliceGateCount returns 0
+      const pending = getPendingSliceGateCount("M001", "S01");
+      assert.equal(pending, 0, "should have no pending gates");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.notEqual(state.phase, "evaluating-gates");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 7: executing
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 7: executing", () => {
+    test("active task, no blockers → executing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "executing");
+      assert.ok(state.activeTask !== null);
+      assert.equal(state.activeTask?.id, "T01");
+    });
+
+    test("active task with CONTINUE.md → executing with resume message", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      writeContinue(base, "M001", "S01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "executing");
+      assert.ok(
+        state.nextAction.toLowerCase().includes("resume") || state.nextAction.toLowerCase().includes("continue"),
+        "nextAction should mention resume/continue",
+      );
+    });
+
+    test("one task remaining among completed → executing (not summarizing)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", partialDonePlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "executing", "should be executing while tasks remain");
+      assert.equal(state.activeTask?.id, "T02", "active task should be T02");
+      assert.equal(state.progress?.tasks?.done, 1);
+      assert.equal(state.progress?.tasks?.total, 2);
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 8: verifying (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 8: verifying (auto-mode only)", () => {
+    test("verifying is NOT reachable from deriveState", async () => {
+      // verifying is set only by auto-mode verification gates.
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "verifying");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 9: summarizing
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 9: summarizing", () => {
+    test("all tasks done, slice not complete → summarizing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "summarizing");
+      assert.ok(state.activeSlice !== null);
+      assert.equal(state.activeSlice?.id, "S01");
+      assert.equal(state.activeTask, null, "no active task when all done");
+      assert.equal(state.progress?.tasks?.done, 2);
+      assert.equal(state.progress?.tasks?.total, 2);
+    });
+
+    test("tasks reconciled via SUMMARY on disk → summarizing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // Plan says tasks incomplete (headings, no checkboxes) ...
+      const planContent = [
+        "# S01: First Slice",
+        "",
+        "**Goal:** Test.",
+        "**Demo:** Tests pass.",
+        "",
+        "## Tasks",
+        "",
+        "### T01: First Task",
+        "First task.",
+        "",
+        "### T02: Second Task",
+        "Second task.",
+      ].join("\n");
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      const tasksDir = join(dir, "tasks");
+      mkdirSync(tasksDir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), planContent);
+      writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan\nStub.\n");
+      writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan\nStub.\n");
+
+      // ... but SUMMARY files exist on disk (reconciliation trigger)
+      writeTaskSummary(base, "M001", "S01", "T01");
+      writeTaskSummary(base, "M001", "S01", "T02");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      // Reconciliation should mark both tasks done → summarizing
+      assert.equal(state.phase, "summarizing", "SUMMARY reconciliation should advance to summarizing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 10: advancing (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 10: advancing (auto-mode only)", () => {
+    test("advancing is NOT reachable from deriveState", async () => {
+      // advancing is an internal auto-mode transition marker
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "advancing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 11: validating-milestone
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 11: validating-milestone", () => {
+    test("all slices done, no VALIDATION file → validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone");
+      assert.ok(state.activeMilestone !== null);
+    });
+
+    test("all slices done, VALIDATION with unparseable verdict → validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // Write a validation file with no parseable verdict
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-VALIDATION.md"), "Just some text with no frontmatter verdict.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone", "unparseable verdict should stay in validating");
+    });
+
+    test("all slices done, terminal verdict → NOT validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "validating-milestone");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 12: completing-milestone
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 12: completing-milestone", () => {
+    test("all slices done, validation terminal, no SUMMARY → completing-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "completing-milestone");
+      assert.ok(state.activeMilestone !== null);
+    });
+
+    test("all slices done, validation terminal, SUMMARY exists → NOT completing-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "completing-milestone", "should be complete, not completing");
+      assert.equal(state.phase, "complete");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 13: replanning-slice
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 13: replanning-slice", () => {
+    test("filesystem: task with blocker_discovered, no REPLAN.md → replanning-slice", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // T01 is done with blocker, T02 is pending
+      writePlan(base, "M001", "S01", partialDonePlan());
+      writeTaskSummaryWithBlocker(base, "M001", "S01", "T01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "replanning-slice");
+      assert.ok(state.blockers.length > 0, "should have blocker details");
+    });
+
+    test("filesystem: REPLAN-TRIGGER.md exists, no REPLAN.md → replanning-slice", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      writeReplanTrigger(base, "M001", "S01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "replanning-slice");
+    });
+
+    test("filesystem: REPLAN-TRIGGER + REPLAN.md exists → NOT replanning-slice (loop guard)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      writeReplanTrigger(base, "M001", "S01");
+      writeReplan(base, "M001", "S01");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.notEqual(state.phase, "replanning-slice", "REPLAN.md loop guard should prevent re-entering replanning");
+      // Should fall through to executing
+      assert.equal(state.phase, "executing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 14: complete
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 14: complete", () => {
+    test("single milestone with SUMMARY + VALIDATION → complete", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "complete");
+      assert.equal(state.registry.length, 1);
+      assert.equal(state.registry[0]?.status, "complete");
+    });
+
+    test("all milestones complete → complete", async () => {
+      const base = createFixtureBase();
+      // M001: complete
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+
+      // M002: also complete
+      writeRoadmap(base, "M002", [
+        "# M002: Second Milestone",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "- [x] **S01: Done** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+      writeMilestoneValidation(base, "M002", "pass");
+      writeMilestoneSummary(base, "M002");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "complete");
+      assert.equal(state.registry.length, 2);
+      assert.ok(state.registry.every(e => e.status === "complete"), "all registry entries should be complete");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 15: paused (auto-mode only)
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 15: paused (auto-mode only)", () => {
+    test("paused is NOT reachable from deriveState", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.notEqual(state.phase, "paused");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // PHASE 16: blocked
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Phase 16: blocked", () => {
+    test("milestone with unmet dependency → blocked", async () => {
+      const base = createFixtureBase();
+      // M001 depends on M000 which doesn't exist — uses YAML frontmatter
+      writeContext(base, "M001", [
+        "---",
+        "depends_on:",
+        "  - M000",
+        "---",
+        "",
+        "# M001: Test",
+        "",
+        "Context.",
+      ].join("\n"));
+      writeRoadmap(base, "M001", [
+        "# M001: Test Milestone",
+        "",
+        "**Vision:** Test blocked.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "blocked");
+      assert.ok(state.blockers.length > 0, "should have blockers");
+    });
+
+    test("no eligible slice (all deps unmet) → blocked at slice level", async () => {
+      const base = createFixtureBase();
+      // S01 depends on S00 which doesn't exist
+      writeRoadmap(base, "M001", [
+        "# M001: Test Milestone",
+        "",
+        "**Vision:** Test blocked slices.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: First** `risk:low` `depends:[S00]`",
+        "  > After this: done.",
+      ].join("\n"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "blocked");
+      assert.ok(
+        state.blockers.some(b => b.includes("dependency") || b.includes("eligible")),
+        "blockers should mention dependency or eligibility",
+      );
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // RECONCILIATION
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Reconciliation", () => {
+    test("DB: task with SUMMARY on disk but DB says pending → reconciliation fixes status (#2514)", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // Write SUMMARY files on disk for both tasks (simulating session disconnect)
+      writeTaskSummary(base, "M001", "S01", "T01");
+      writeTaskSummary(base, "M001", "S01", "T02");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // Reconciliation should detect SUMMARY→DB mismatch and update
+      // All tasks done → summarizing (not executing)
+      assert.equal(state.phase, "summarizing", "reconciliation should advance past pending tasks");
+    });
+
+    test("empty DB with disk milestones → disk-to-DB sync (#2631)", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "# M001: Test\n\nContext.");
+
+      // Open DB — milestones table starts empty
+      openDatabase(":memory:");
+      const before = getAllMilestones();
+      assert.equal(before.length, 0, "DB should start empty");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      // After deriveState, DB should have the disk milestone
+      const after = getAllMilestones();
+      assert.ok(after.length > 0, "DB should have milestones after reconciliation");
+      assert.equal(after[0]!.id, "M001");
+      assert.ok(state.activeMilestone !== null);
+    });
+
+    test("ghost milestone (empty dir) → NOT in registry", async () => {
+      const base = createFixtureBase();
+      // Create empty milestone dir (ghost — no CONTEXT, ROADMAP, SUMMARY)
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      // Create a real milestone too
+      writeContext(base, "M002", "# M002: Real\n\nContext.");
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      // M001 (ghost) should not appear in registry
+      const m001 = state.registry.find(e => e.id === "M001");
+      assert.equal(m001, undefined, "ghost milestone should not appear in registry");
+      // M002 should be there
+      const m002 = state.registry.find(e => e.id === "M002");
+      assert.ok(m002 !== undefined, "real milestone should appear in registry");
+    });
+
+    test("ghost milestone detection helper", () => {
+      const base = createFixtureBase();
+      // Ghost: empty dir
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      clearPathCache();
+      assert.equal(isGhostMilestone(base, "M001"), true, "empty dir is ghost");
+
+      // Not ghost: has CONTEXT
+      writeContext(base, "M002", "# M002\n\nContext.");
+      clearPathCache();
+      assert.equal(isGhostMilestone(base, "M002"), false, "dir with CONTEXT is not ghost");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // CROSS-VALIDATION
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Cross-validation: DB vs filesystem", () => {
+    test("executing scenario produces same phase on both paths", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: First", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Second", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      closeDatabase();
+
+      invalidateStateCache();
+      const fsState = await deriveState(base);
+
+      assert.equal(dbState.phase, "executing", "DB path should produce executing");
+      assert.equal(fsState.phase, "executing", "filesystem path should produce executing");
+      assert.equal(dbState.activeTask?.id, fsState.activeTask?.id, "active task should match");
+    });
+
+    test("summarizing scenario produces same phase on both paths", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: First", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Second", status: "complete" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+
+      invalidateStateCache();
+      const dbState = await deriveStateFromDb(base);
+
+      closeDatabase();
+
+      invalidateStateCache();
+      const fsState = await deriveState(base);
+
+      assert.equal(dbState.phase, "summarizing", "DB path should produce summarizing");
+      assert.equal(fsState.phase, "summarizing", "filesystem path should produce summarizing");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // EDGE CASES
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Edge cases", () => {
+    test("isValidationTerminal: terminal verdicts", () => {
+      assert.equal(isValidationTerminal("---\nverdict: pass\n---\n"), true, "pass is terminal");
+      assert.equal(isValidationTerminal("---\nverdict: fail\n---\n"), true, "fail is terminal");
+      assert.equal(isValidationTerminal("---\nverdict: needs-remediation\n---\n"), true, "needs-remediation is terminal");
+      assert.equal(isValidationTerminal("---\nverdict: needs-attention\n---\n"), true, "needs-attention is terminal");
+    });
+
+    test("isValidationTerminal: non-terminal content", () => {
+      assert.equal(isValidationTerminal("No frontmatter at all"), false, "no frontmatter is not terminal");
+      assert.equal(isValidationTerminal(""), false, "empty string is not terminal");
+      assert.equal(isValidationTerminal("---\n---\n"), false, "empty frontmatter is not terminal");
+    });
+
+    test("isClosedStatus boundary", () => {
+      assert.equal(isClosedStatus("complete"), true);
+      assert.equal(isClosedStatus("done"), true);
+      assert.equal(isClosedStatus("pending"), false);
+      assert.equal(isClosedStatus("in-progress"), false);
+      assert.equal(isClosedStatus("blocked"), false);
+      assert.equal(isClosedStatus("active"), false);
+      assert.equal(isClosedStatus(""), false);
+    });
+
+    test("multiple milestones: M001 complete, M002 active → M002 is activeMilestone", async () => {
+      const base = createFixtureBase();
+      // M001: complete
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      writeMilestoneSummary(base, "M001");
+
+      // M002: active, in planning phase
+      writeContext(base, "M002", "# M002: Next Milestone\n\nContext for M002.");
+      writeRoadmap(base, "M002", [
+        "# M002: Next Milestone",
+        "",
+        "**Vision:** Next phase.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: New Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.activeMilestone?.id, "M002", "active milestone should be M002");
+      assert.notEqual(state.phase, "complete", "should not be complete while M002 is active");
+      // M001 in registry as complete
+      const m001 = state.registry.find(e => e.id === "M001");
+      assert.ok(m001 !== undefined, "M001 should be in registry");
+      assert.equal(m001?.status, "complete", "M001 should be complete");
+      // M002 in registry as active
+      const m002 = state.registry.find(e => e.id === "M002");
+      assert.ok(m002 !== undefined, "M002 should be in registry");
+      assert.equal(m002?.status, "active", "M002 should be active");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // FAILURE MODES: What happens when things go wrong
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Failure: DB has slice but no task rows (partial migration)", () => {
+    test("DB tasks empty but PLAN on disk has tasks → wrong phase (planning)", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      // NO insertTask() — simulates partial migration / failed write
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // BUG: Returns "planning" because getSliceTasks() returns []
+      // and line 703 treats empty tasks as "no tasks defined".
+      // PLAN file on disk has T01/T02 but DB doesn't know about them.
+      assert.equal(state.phase, "planning",
+        "KNOWN ISSUE: DB empty tasks → planning even though PLAN has tasks on disk");
+    });
+  });
+
+  describe("Failure: partial SUMMARY reconciliation", () => {
+    test("only one task has SUMMARY, other still pending → executing next task", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      // Only T01 has SUMMARY, T02 does not
+      writeTaskSummary(base, "M001", "S01", "T01");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // T01 reconciled to complete, T02 still pending → executing T02
+      assert.equal(state.phase, "executing");
+      assert.equal(state.activeTask?.id, "T02", "should advance to next pending task");
+    });
+  });
+
+  describe("Failure: 0-byte files", () => {
+    test("0-byte SUMMARY file triggers reconciliation (existsSync-only check)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+      // Write 0-byte SUMMARY — existsSync returns true for empty files
+      const tasksDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
+      mkdirSync(tasksDir, { recursive: true });
+      writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "");
+
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // The reconciler checks existsSync(summaryPath) at line 1328
+      // — it does NOT read content. So 0-byte file counts as "done".
+      // This is a known gap: empty SUMMARY treated as completion.
+      assert.equal(state.phase, "executing",
+        "0-byte SUMMARY marks T01 done via reconciliation, T02 becomes active");
+      assert.equal(state.activeTask?.id, "T02");
+    });
+
+    test("0-byte VALIDATION file → stays in validating-milestone", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-VALIDATION.md"), "");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone",
+        "0-byte VALIDATION should not be treated as terminal");
+    });
+
+    test("0-byte PLAN file → planning phase", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), "");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning", "0-byte PLAN should stay in planning");
+    });
+  });
+
+  describe("Failure: DB/filesystem divergence", () => {
+    test("DB says slice complete, no milestone VALIDATION → validating-milestone", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "complete", depends: [] });
+
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "validating-milestone",
+        "DB-complete slice should trigger milestone validation");
+    });
+
+    test("DB says task complete but SUMMARY missing → no crash, advances to next", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "executing");
+      assert.equal(state.activeTask?.id, "T02");
+    });
+
+    test("milestone in DB but directory missing from disk → no crash", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.ok(state.phase !== undefined, "should produce a valid phase");
+    });
+  });
+
+  describe("Failure: corrupt frontmatter", () => {
+    test("VALIDATION with broken frontmatter → stays in validating", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-VALIDATION.md"), [
+        "---",
+        "this is not: valid: yaml: {{{}}}",
+        "---",
+        "",
+        "Some content.",
+      ].join("\n"));
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "validating-milestone",
+        "corrupt frontmatter should keep milestone in validating phase");
+    });
+
+    test("CONTEXT with broken depends_on → no crash, deps empty", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", [
+        "---",
+        "depends_on: {{{invalid}}}",
+        "---",
+        "",
+        "# M001: Test",
+      ].join("\n"));
+      writeRoadmap(base, "M001", standardRoadmap());
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+
+      assert.ok(state.phase !== undefined, "should not crash on corrupt depends_on");
+      // With corrupt deps, parseContextDependsOn returns [] → no blocking
+      assert.notEqual(state.phase, "blocked",
+        "corrupt deps should not falsely block milestone");
+    });
+  });
+
+  describe("Failure: missing task plan files in DB path", () => {
+    test("DB has tasks but no T##-PLAN.md files → planning phase", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(join(dir, "tasks"), { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), standardPlan());
+      // NO T01-PLAN.md
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      assert.equal(state.phase, "planning",
+        "missing T##-PLAN.md files should keep state in planning");
+    });
+  });
+
+  describe("Failure: stale path cache", () => {
+    test("file created after cache populated → must clear path cache", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+
+      invalidateStateCache();
+      clearPathCache();
+      const state1 = await deriveState(base);
+      assert.equal(state1.phase, "planning");
+
+      // Write PLAN AFTER first derivation cached paths
+      writePlan(base, "M001", "S01", standardPlan());
+
+      // Without clearPathCache, stale cache may miss the new file
+      invalidateStateCache();
+      clearPathCache();
+      const state2 = await deriveState(base);
+
+      assert.equal(state2.phase, "executing",
+        "after cache clear, should see the new PLAN file");
+    });
+  });
+
+  describe("Failure: blocker detection edge cases", () => {
+    test("filesystem: blocker in SUMMARY but task not marked [x] → still detected", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      // T01 marked done in plan, T02 pending
+      writePlan(base, "M001", "S01", partialDonePlan());
+      // T01 SUMMARY has blocker_discovered in frontmatter
+      writeTaskSummaryWithBlocker(base, "M001", "S01", "T01");
+
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "replanning-slice",
+        "blocker_discovered in SUMMARY frontmatter should trigger replanning");
+    });
+  });
+
+  // ═══════════════════════════════════════════════════════════════════════════
+  // FAILURE AT EVERY PHASE: What breaks mid-transition
+  // ═══════════════════════════════════════════════════════════════════════════
+
+  describe("Failure at pre-planning: CONTEXT file half-written", () => {
+    test("CONTEXT exists but is garbage → still enters pre-planning (no roadmap)", async () => {
+      const base = createFixtureBase();
+      writeContext(base, "M001", "\x00\x00\x00binary garbage\xff\xfe");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // File exists so milestone is not ghost, but no roadmap → pre-planning
+      assert.equal(state.phase, "pre-planning");
+      assert.ok(state.activeMilestone !== null);
+    });
+  });
+
+  describe("Failure at needs-discussion: CONTEXT-DRAFT is empty", () => {
+    test("0-byte CONTEXT-DRAFT → should still trigger needs-discussion", async () => {
+      const base = createFixtureBase();
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "M001-CONTEXT-DRAFT.md"), "");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // File exists (even empty) → not a ghost, has draft → needs-discussion
+      assert.equal(state.phase, "needs-discussion",
+        "0-byte draft should still trigger discussion phase");
+    });
+  });
+
+  describe("Failure at planning: ROADMAP exists but is unparseable", () => {
+    test("ROADMAP with no slices section → pre-planning (zero slices)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", "# M001: Test\n\nJust some text, no ## Slices section.");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // parseRoadmap finds no slices → empty array → pre-planning
+      assert.equal(state.phase, "pre-planning",
+        "unparseable roadmap with no slices should fall to pre-planning");
+    });
+
+    test("ROADMAP with broken slice syntax → treats as zero slices", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", [
+        "# M001: Test",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "This is not a valid slice entry at all.",
+        "Neither is this.",
+      ].join("\n"));
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // No parseable slice entries → zero slices → pre-planning
+      assert.equal(state.phase, "pre-planning",
+        "broken slice syntax should result in zero slices");
+    });
+  });
+
+  describe("Failure at planning: PLAN file is corrupt", () => {
+    test("PLAN exists but tasks section is garbage → zero tasks → planning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), [
+        "# S01: Slice",
+        "",
+        "## Tasks",
+        "",
+        "random garbage with no task markers",
+        "more garbage",
+      ].join("\n"));
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "planning",
+        "PLAN with unparseable tasks should stay in planning");
+    });
+  });
+
+  describe("Failure at executing: task plan file is empty", () => {
+    test("T01-PLAN.md exists but is 0-byte → still enters executing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      const dir = join(base, ".gsd", "milestones", "M001", "slices", "S01");
+      const tasksDir = join(dir, "tasks");
+      mkdirSync(tasksDir, { recursive: true });
+      writeFileSync(join(dir, "S01-PLAN.md"), standardPlan());
+      // Create task plan files but make them 0-byte
+      writeFileSync(join(tasksDir, "T01-PLAN.md"), "");
+      writeFileSync(join(tasksDir, "T02-PLAN.md"), "");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // Task plan file existence check at line 718-730 uses readdirSync
+      // to count .md files. 0-byte files still count.
+      assert.equal(state.phase, "executing",
+        "0-byte task plan files still pass the existence check");
+    });
+  });
+
+  describe("Failure at executing: DB has task but wrong status string", () => {
+    test("task with unexpected status string → not treated as closed", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01: Task", status: "pending" });
+
+      // Set a garbage status that isn't "complete" or "done"
+      updateTaskStatus("M001", "S01", "T01", "finished");
+
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", standardPlan());
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // isClosedStatus("finished") → false → task treated as active
+      assert.equal(state.phase, "executing");
+      assert.equal(state.activeTask?.id, "T01",
+        "non-standard status 'finished' is NOT treated as closed");
+    });
+  });
+
+  describe("Failure at summarizing: slice SUMMARY write fails (file missing)", () => {
+    test("all tasks [x] but no slice SUMMARY → stays in summarizing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", allDonePlan());
+      // All tasks done but no S01-SUMMARY.md written
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      assert.equal(state.phase, "summarizing");
+      // Next derivation still returns summarizing — no infinite loop
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "summarizing", "stays in summarizing until SUMMARY written");
+    });
+  });
+
+  describe("Failure at validating-milestone: VALIDATION write crashes", () => {
+    test("all slices done, validation never written → stuck in validating", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // No VALIDATION file at all
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+      assert.equal(state.phase, "validating-milestone");
+
+      // Call again — still validating (idempotent, not looping)
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "validating-milestone",
+        "stays in validating until VALIDATION file appears");
+    });
+  });
+
+  describe("Failure at completing-milestone: SUMMARY write fails", () => {
+    test("validation terminal but SUMMARY never written → stuck in completing", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      writeMilestoneValidation(base, "M001", "pass");
+      // No milestone SUMMARY
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+      assert.equal(state.phase, "completing-milestone");
+
+      // Repeated calls stay in completing
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "completing-milestone",
+        "stays in completing until SUMMARY written");
+    });
+  });
+
+  describe("Failure at replanning: REPLAN.md never written (loop risk)", () => {
+    test("blocker detected, replan dispatched but REPLAN.md not created → re-enters replanning", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", standardRoadmap());
+      writePlan(base, "M001", "S01", partialDonePlan());
+      writeTaskSummaryWithBlocker(base, "M001", "S01", "T01");
+      // No REPLAN.md — simulates failed replan execution
+
+      invalidateStateCache();
+      clearPathCache();
+      const state1 = await deriveState(base);
+      assert.equal(state1.phase, "replanning-slice");
+
+      // Call again — same result, stuck in replanning until REPLAN.md appears
+      invalidateStateCache();
+      const state2 = await deriveState(base);
+      assert.equal(state2.phase, "replanning-slice",
+        "without REPLAN.md, state stays in replanning (dispatch will retry)");
+    });
+  });
+
+  describe("Failure at complete: SUMMARY exists but VALIDATION missing", () => {
+    test("milestone SUMMARY without VALIDATION → still complete (SUMMARY is terminal artifact)", async () => {
+      const base = createFixtureBase();
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // SUMMARY exists but NO VALIDATION
+      writeMilestoneSummary(base, "M001");
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // Per #864: SUMMARY is the terminal artifact, validation optional
+      assert.equal(state.phase, "complete",
+        "SUMMARY alone should mark milestone complete per #864");
+    });
+  });
+
+  describe("Failure at blocked: dependency milestone partially complete", () => {
+    test("M001 has slices done but no SUMMARY → M002 (depends on M001) is blocked", async () => {
+      const base = createFixtureBase();
+      // M001: all slices done but no SUMMARY/VALIDATION
+      writeRoadmap(base, "M001", doneSliceRoadmap());
+      // M001 has no SUMMARY → it's in validating/completing, NOT complete
+
+      // M002: depends on M001
+      writeContext(base, "M002", [
+        "---",
+        "depends_on:",
+        "  - M001",
+        "---",
+        "",
+        "# M002: Dependent",
+      ].join("\n"));
+      writeRoadmap(base, "M002", [
+        "# M002: Dependent",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n"));
+
+      invalidateStateCache();
+      clearPathCache();
+      const state = await deriveState(base);
+
+      // M001 is active (not yet complete), M002 should wait
+      assert.equal(state.activeMilestone?.id, "M001",
+        "M001 should be active (not complete without SUMMARY)");
+      assert.notEqual(state.activeMilestone?.id, "M002",
+        "M002 should not be active while M001 is incomplete");
+    });
+  });
+
+  describe("Failure: multiple reconciliation in single derivation", () => {
+    test("DB has 3 stale tasks, all with SUMMARY on disk → all reconciled in one pass", async () => {
+      const base = createFixtureBase();
+      const dbPath = join(base, ".gsd", "gsd.db");
+      openDatabase(dbPath);
+
+      insertMilestone({ id: "M001", title: "M001: Test", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "S01: Slice", status: "active", depends: [] });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "T01", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "T02", status: "in-progress" });
+      insertTask({ id: "T03", sliceId: "S01", milestoneId: "M001", title: "T03", status: "pending" });
+
+      const threeTaskRoadmap = [
+        "# M001: Test",
+        "",
+        "**Vision:** Test.",
+        "",
+        "## Slices",
+        "",
+        "- [ ] **S01: Slice** `risk:low` `depends:[]`",
+        "  > After this: done.",
+      ].join("\n");
+      writeRoadmap(base, "M001", threeTaskRoadmap);
+
+      const threeTaskPlan = [
+        "# S01: Slice",
+        "",
+        "**Goal:** Test.",
+        "**Demo:** Tests pass.",
+        "",
+        "## Tasks",
+        "",
+        "- [ ] **T01: First** `est:10m`",
+        "  First.",
+        "",
+        "- [ ] **T02: Second** `est:10m`",
+        "  Second.",
+        "",
+        "- [ ] **T03: Third** `est:10m`",
+        "  Third.",
+      ].join("\n");
+      writePlan(base, "M001", "S01", threeTaskPlan);
+
+      // All 3 tasks have SUMMARY on disk
+      writeTaskSummary(base, "M001", "S01", "T01");
+      writeTaskSummary(base, "M001", "S01", "T02");
+      writeTaskSummary(base, "M001", "S01", "T03");
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+
+      // All 3 should be reconciled in one pass → summarizing
+      assert.equal(state.phase, "summarizing",
+        "all 3 stale tasks should be reconciled to complete in one derivation");
+    });
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/stop-auto-race-null-unit.test.ts b/src/resources/extensions/gsd/tests/stop-auto-race-null-unit.test.ts
new file mode 100644
index 000000000..d5883a14b
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stop-auto-race-null-unit.test.ts
@@ -0,0 +1,106 @@
+/**
+ * stop-auto-race-null-unit.test.ts — Regression test for #2939.
+ *
+ * When the user stops auto-mode while a unit is executing, stopAuto()
+ * calls s.reset() which sets s.currentUnit = null. The resumed
+ * runUnitPhase() then hits s.currentUnit.startedAt on the closeout
+ * line and throws a TypeError.
+ *
+ * The fix adds null guards (matching the existing pattern at lines 136
+ * and 344) so that closeout and subsequent accesses are skipped when
+ * s.currentUnit has been nulled by a concurrent stopAuto().
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const phasesPath = join(import.meta.dirname, "..", "auto", "phases.ts");
+const phasesSrc = readFileSync(phasesPath, "utf-8");
+
+console.log("\n=== #2939: stopAuto race — null guard on s.currentUnit in closeout ===");
+
+// ── Test 1: closeoutUnit call is guarded by if (s.currentUnit) ──────────
+// The closeout block starting around the "Immediate unit closeout" comment
+// must be wrapped in an `if (s.currentUnit)` guard, matching the pattern
+// already used at lines 136 and 344.
+
+const closeoutComment = "Immediate unit closeout";
+const closeoutIdx = phasesSrc.indexOf(closeoutComment);
+assertTrue(
+  closeoutIdx > 0,
+  "phases.ts contains the 'Immediate unit closeout' comment block",
+);
+
+// Extract the region from the closeout comment to the next section comment
+const closeoutRegion = phasesSrc.slice(closeoutIdx, closeoutIdx + 500);
+assertTrue(
+  closeoutRegion.includes("if (s.currentUnit)"),
+  "closeoutUnit call is guarded by `if (s.currentUnit)` check (#2939)",
+);
+
+// ── Test 2: zero-tool-call guard uses s.currentUnit?.startedAt ──────────
+// The zero-tool-call section accesses s.currentUnit!.startedAt (non-null
+// assertion) which will throw if currentUnit is null.
+
+const zeroToolComment = "Zero tool-call guard";
+const zeroToolIdx = phasesSrc.indexOf(zeroToolComment);
+assertTrue(
+  zeroToolIdx > 0,
+  "phases.ts contains the 'Zero tool-call guard' comment block",
+);
+
+const zeroToolRegion = phasesSrc.slice(zeroToolIdx, zeroToolIdx + 600);
+
+// The non-null assertion `s.currentUnit!.startedAt` must be replaced with
+// optional chaining `s.currentUnit?.startedAt`
+assertTrue(
+  !zeroToolRegion.includes("s.currentUnit!.startedAt"),
+  "zero-tool-call guard no longer uses non-null assertion on s.currentUnit (#2939)",
+);
+
+// ── Test 3: return value uses optional chaining for startedAt ───────────
+// The final return at the end of runUnitPhase uses s.currentUnit.startedAt
+// which will throw if currentUnit was nulled. It must use optional chaining.
+
+// Find the last return statement in runUnitPhase that references startedAt.
+// There are two: one inside the zero-tool-call block and one at the end.
+// Both must use s.currentUnit?.startedAt
+
+// Count unguarded s.currentUnit.startedAt (without optional chaining)
+// after the "Immediate unit closeout" comment. All of them should use
+// optional chaining or be inside a guard.
+const afterCloseout = phasesSrc.slice(closeoutIdx);
+
+// Count s.currentUnit!.startedAt (non-null assertion — always unsafe)
+const nonNullPattern = /s\.currentUnit!\.startedAt/g;
+const nonNullAfterCloseout = [...afterCloseout.matchAll(nonNullPattern)];
+assertTrue(
+  nonNullAfterCloseout.length === 0,
+  `no non-null assertions s.currentUnit!.startedAt after closeout comment (found ${nonNullAfterCloseout.length}, expected 0) (#2939)`,
+);
+
+// Count bare s.currentUnit.startedAt that are NOT inside an if (s.currentUnit) guard.
+// The closeout block itself uses s.currentUnit.startedAt inside a guard — that's fine.
+// But any usage outside a guard block (e.g. in a return statement) must use optional chaining.
+// We check that all return statements use optional chaining.
+const returnWithBareAccess = /return\s*\{[^}]*s\.currentUnit\.startedAt/g;
+const bareReturnCount = [...afterCloseout.matchAll(returnWithBareAccess)].length;
+assertTrue(
+  bareReturnCount === 0,
+  `no return statements use bare s.currentUnit.startedAt (found ${bareReturnCount}, expected 0) (#2939)`,
+);
+
+// ── Test 4: the return at end of runUnitPhase uses optional chaining ────
+// The final `return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } }`
+// must use optional chaining.
+
+const finalReturnPattern = /unitStartedAt:\s*s\.currentUnit\?\.startedAt/;
+assertTrue(
+  finalReturnPattern.test(afterCloseout),
+  "final return uses s.currentUnit?.startedAt with optional chaining (#2939)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts b/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts
new file mode 100644
index 000000000..c0d4d748e
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts
@@ -0,0 +1,174 @@
+// GSD State Machine Regression Tests — Stuck Detection Coverage (#3161)
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { detectStuck } from "../auto/detect-stuck.ts";
+
+// ─── Baseline: window too small ──────────────────────────────────────────────
+
+test("returns null for empty window", () => {
+  assert.equal(detectStuck([]), null);
+});
+
+test("returns null for single entry", () => {
+  assert.equal(detectStuck([{ key: "A" }]), null);
+});
+
+test("returns null for two different entries without errors", () => {
+  assert.equal(detectStuck([{ key: "A" }, { key: "B" }]), null);
+});
+
+// ─── Rule 1: Same error repeated consecutively ───────────────────────────────
+
+test("Rule 1: same error twice consecutively triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: no such file" },
+    { key: "A", error: "ENOENT: no such file" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(result!.reason.includes("Same error"), `reason was: ${result!.reason}`);
+});
+
+test("Rule 1: different errors do not trigger stuck", () => {
+  // Only 2 entries with different errors — Rule 2 needs 3 entries, so null.
+  const result = detectStuck([
+    { key: "A", error: "err1" },
+    { key: "A", error: "err2" },
+  ]);
+  assert.equal(result, null);
+});
+
+test("Rule 1: only last two entries matter for error check", () => {
+  // First two share an error, but the last two have distinct errors — no trigger.
+  const result = detectStuck([
+    { key: "A", error: "same-error" },
+    { key: "A", error: "same-error" },
+    { key: "B", error: "different-error-1" },
+    { key: "C", error: "different-error-2" },
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Rule 2: Same unit key 3+ consecutive times ───────────────────────────────
+
+test("Rule 2: same unit key 3 consecutive times triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("3 consecutive times"),
+    `reason was: ${result!.reason}`,
+  );
+});
+
+test("Rule 2: same key twice is not enough", () => {
+  assert.equal(detectStuck([{ key: "A" }, { key: "A" }]), null);
+});
+
+test("Rule 2: interrupted sequence does not trigger", () => {
+  // A, B, A — last three are not all the same key.
+  assert.equal(
+    detectStuck([{ key: "A" }, { key: "B" }, { key: "A" }]),
+    null,
+  );
+});
+
+// ─── Rule 3: Oscillation A→B→A→B ─────────────────────────────────────────────
+
+test("Rule 3: A-B-A-B oscillation triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "A" },
+    { key: "B" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("Oscillation"),
+    `reason was: ${result!.reason}`,
+  );
+});
+
+test("Rule 3: A-B-A-C does not trigger oscillation", () => {
+  assert.equal(
+    detectStuck([{ key: "A" }, { key: "B" }, { key: "A" }, { key: "C" }]),
+    null,
+  );
+});
+
+test("Rule 3: A-A-A-A triggers Rule 2 not Rule 3", () => {
+  // Rule 2 fires first (last 3 are all the same key).
+  const result = detectStuck([
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("3 consecutive times"),
+    `expected Rule 2 reason but got: ${result!.reason}`,
+  );
+  assert.ok(
+    !result!.reason.includes("Oscillation"),
+    `unexpectedly matched Rule 3: ${result!.reason}`,
+  );
+});
+
+// ─── Gap documentation: 3-unit cycle evades detection ────────────────────────
+
+test("Three-unit cycle A-B-C-A-B-C does NOT trigger stuck (documents gap L13)", () => {
+  // None of the three rules fires for a 3-unit repeating cycle.
+  // This test intentionally documents the coverage gap where such cycles
+  // slip through undetected (#3161).
+  const result = detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "C" },
+    { key: "A" },
+    { key: "B" },
+    { key: "C" },
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Window boundary: earlier patterns do not contaminate recent check ─────────
+
+test("window bounded: detection uses last N entries correctly", () => {
+  // The first three entries would trigger Rule 2, but the last entries are
+  // healthy — only the tail matters.
+  const result = detectStuck([
+    { key: "X" },
+    { key: "X" },
+    { key: "X" }, // would be stuck if this were the end
+    { key: "A" },
+    { key: "B" }, // last two: different keys, no error
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Rule priority: Rule 1 before Rule 2 ─────────────────────────────────────
+
+test("Rule 1 takes priority over Rule 2 when both match", () => {
+  // Last 3 entries share the same key (Rule 2 candidate) AND last 2 share
+  // the same error (Rule 1 candidate). Rule 1 is evaluated first.
+  const result = detectStuck([
+    { key: "A", error: "boom" },
+    { key: "A", error: "boom" },
+    { key: "A", error: "boom" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("Same error"),
+    `expected Rule 1 reason but got: ${result!.reason}`,
+  );
+});
diff --git a/src/resources/extensions/gsd/tests/summary-render-parity.test.ts b/src/resources/extensions/gsd/tests/summary-render-parity.test.ts
new file mode 100644
index 000000000..ffd4fc955
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/summary-render-parity.test.ts
@@ -0,0 +1,221 @@
+/**
+ * summary-render-parity.test.ts — Regression test for #2720
+ *
+ * Asserts that the SUMMARY.md produced at task-completion time
+ * (renderSummaryMarkdown in complete-task.ts) is structurally identical
+ * to the SUMMARY.md produced at projection-regeneration time
+ * (renderSummaryContent in workflow-projections.ts).
+ *
+ * Both render paths receive equivalent data (CompleteTaskParams vs TaskRow)
+ * and must produce the same output. If they diverge, projection regeneration
+ * silently replaces richer content with a stripped-down version.
+ */
+
+import { createTestContext } from './test-helpers.ts';
+import { renderSummaryContent } from '../workflow-projections.ts';
+import type { TaskRow } from '../gsd-db.ts';
+
+const { assertEq, assertTrue, report } = createTestContext();
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Fixtures — same logical data in both shapes
+// ═══════════════════════════════════════════════════════════════════════════
+
+const SLICE_ID = "S01";
+const MILESTONE_ID = "M001";
+
+const taskRow: TaskRow = {
+  milestone_id: MILESTONE_ID,
+  slice_id: SLICE_ID,
+  id: "T01",
+  title: "Implement widget parser",
+  status: "complete",
+  one_liner: "Implement widget parser",
+  narrative: "Added a recursive descent parser for widget DSL.",
+  verification_result: "All 42 unit tests pass; linter clean.",
+  duration: "2h",
+  completed_at: "2025-01-15T10:30:00.000Z",
+  blocker_discovered: false,
+  deviations: "Switched from PEG to hand-rolled parser for perf.",
+  known_issues: "No known issues.",
+  key_files: ["src/parser.ts", "src/lexer.ts"],
+  key_decisions: ["Hand-rolled parser over PEG for 3x throughput"],
+  full_summary_md: "",
+  description: "",
+  estimate: "",
+  files: [],
+  verify: "",
+  inputs: [],
+  expected_output: [],
+  observability_impact: "",
+  full_plan_md: "",
+  sequence: 1,
+};
+
+const verificationEvidence = [
+  { command: "npm test", exitCode: 0, verdict: "42/42 passed ✅", durationMs: 3200 },
+  { command: "npm run lint", exitCode: 0, verdict: "No warnings ✅", durationMs: 1100 },
+];
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+// Test 1: renderSummaryContent includes Verification section
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Verification"),
+    "renderSummaryContent must include a ## Verification section",
+  );
+}
+
+// Test 2: renderSummaryContent includes Verification Evidence table
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID, verificationEvidence);
+  assertTrue(
+    output.includes("## Verification Evidence"),
+    "renderSummaryContent must include a ## Verification Evidence section",
+  );
+  assertTrue(
+    output.includes("npm test"),
+    "Verification Evidence table must include the command",
+  );
+  assertTrue(
+    output.includes("| Exit Code |") || output.includes("exit_code") || output.includes("Exit Code"),
+    "Verification Evidence table must include exit code column",
+  );
+}
+
+// Test 3: renderSummaryContent includes Files Created/Modified section
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Files Created/Modified"),
+    "renderSummaryContent must include a ## Files Created/Modified section",
+  );
+  assertTrue(
+    output.includes("`src/parser.ts`"),
+    "Files section must list key_files as inline code",
+  );
+}
+
+// Test 4: one_liner renders as bold (not blockquote) for consistency
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes(`**${taskRow.one_liner}**`),
+    "one_liner must render as bold text (not blockquote)",
+  );
+}
+
+// Test 5: frontmatter key_files uses YAML list format (not JSON array)
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("key_files:\n  - src/parser.ts\n  - src/lexer.ts"),
+    "key_files frontmatter must use YAML list format, not JSON array",
+  );
+}
+
+// Test 6: frontmatter key_decisions uses YAML list format (not JSON array)
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("key_decisions:\n  - Hand-rolled parser over PEG for 3x throughput"),
+    "key_decisions frontmatter must use YAML list format, not JSON array",
+  );
+}
+
+// Test 7: Deviations section always present (with "None." fallback)
+{
+  const noDeviations = { ...taskRow, deviations: "" };
+  const output = renderSummaryContent(noDeviations, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Deviations"),
+    "Deviations section must always be present even when empty",
+  );
+  assertTrue(
+    output.includes("None."),
+    "Deviations section must show 'None.' when no deviations",
+  );
+}
+
+// Test 8: Known Issues section always present (with "None." fallback)
+{
+  const noKnownIssues = { ...taskRow, known_issues: "" };
+  const output = renderSummaryContent(noKnownIssues, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("## Known Issues"),
+    "Known Issues section must always be present even when empty",
+  );
+}
+
+// Test 9: verification_result frontmatter not double-quoted
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  // Should be: verification_result: passed (not "passed")
+  assertTrue(
+    !output.includes('verification_result: "'),
+    "verification_result frontmatter value must not be double-quoted",
+  );
+}
+
+// Test 10: duration frontmatter not double-quoted
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    !output.includes('duration: "'),
+    "duration frontmatter value must not be double-quoted",
+  );
+}
+
+// Test 11: empty key_files renders YAML placeholder, not empty array
+{
+  const noFiles = { ...taskRow, key_files: [] };
+  const output = renderSummaryContent(noFiles, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    output.includes("key_files:\n  - (none)"),
+    "empty key_files must render as YAML list with (none) placeholder",
+  );
+}
+
+// Test 12: frontmatter does not contain extra projection-only fields
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID);
+  assertTrue(
+    !output.includes("provides:"),
+    "frontmatter must not contain provides field",
+  );
+  assertTrue(
+    !output.includes("requires:"),
+    "frontmatter must not contain requires field",
+  );
+  assertTrue(
+    !output.includes("affects:"),
+    "frontmatter must not contain affects field",
+  );
+  assertTrue(
+    !output.includes("patterns_established:"),
+    "frontmatter must not contain patterns_established field",
+  );
+  assertTrue(
+    !output.includes("drill_down_paths:"),
+    "frontmatter must not contain drill_down_paths field",
+  );
+  assertTrue(
+    !output.includes("observability_surfaces:"),
+    "frontmatter must not contain observability_surfaces field",
+  );
+}
+
+// Test 13: no verification evidence renders empty table row
+{
+  const output = renderSummaryContent(taskRow, SLICE_ID, MILESTONE_ID, []);
+  assertTrue(
+    output.includes("No verification commands discovered"),
+    "Empty evidence array must render placeholder row",
+  );
+}
+
+report();
diff --git a/src/resources/extensions/gsd/tests/tool-naming.test.ts b/src/resources/extensions/gsd/tests/tool-naming.test.ts
index 772a4eed6..b7e333cff 100644
--- a/src/resources/extensions/gsd/tests/tool-naming.test.ts
+++ b/src/resources/extensions/gsd/tests/tool-naming.test.ts
@@ -24,6 +24,7 @@ function makeMockPi() {
 const RENAME_MAP: Array<{ canonical: string; alias: string }> = [
   { canonical: "gsd_decision_save", alias: "gsd_save_decision" },
   { canonical: "gsd_requirement_update", alias: "gsd_update_requirement" },
+  { canonical: "gsd_requirement_save", alias: "gsd_save_requirement" },
   { canonical: "gsd_summary_save", alias: "gsd_save_summary" },
   { canonical: "gsd_milestone_generate_id", alias: "gsd_generate_milestone_id" },
   { canonical: "gsd_task_complete", alias: "gsd_complete_task" },
@@ -44,7 +45,7 @@ console.log('\n── Tool naming: registration count ──');
 const pi = makeMockPi();
 registerDbTools(pi);
 
-assert.deepStrictEqual(pi.tools.length, 27, 'Should register exactly 27 tools (13 canonical + 13 aliases + 1 gate tool)');
+assert.deepStrictEqual(pi.tools.length, 29, 'Should register exactly 29 tools (14 canonical + 14 aliases + 1 gate tool)');
 
 // ─── Both names exist for each pair ──────────────────────────────────────────
 
diff --git a/src/resources/extensions/gsd/tests/triage-resolution.test.ts b/src/resources/extensions/gsd/tests/triage-resolution.test.ts
index 496685732..deb924347 100644
--- a/src/resources/extensions/gsd/tests/triage-resolution.test.ts
+++ b/src/resources/extensions/gsd/tests/triage-resolution.test.ts
@@ -212,6 +212,14 @@ test("resolution: buildQuickTaskPrompt includes capture text and ID", () => {
   assert.ok(prompt.includes("add retry logic to OAuth"), "should include capture text");
   assert.ok(prompt.includes("Quick Task"), "should have Quick Task header");
   assert.ok(prompt.includes("Do NOT modify"), "should warn about plan files");
+  assert.ok(
+    prompt.includes("Verify the issue still exists"),
+    "should instruct agent to verify issue still exists (#2872)",
+  );
+  assert.ok(
+    prompt.includes("Already resolved"),
+    "should instruct agent to report already resolved if fixed (#2872)",
+  );
 });
 
 // ─── markCaptureExecuted ─────────────────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/tests/uat-stuck-loop-orphaned-worktree.test.ts b/src/resources/extensions/gsd/tests/uat-stuck-loop-orphaned-worktree.test.ts
new file mode 100644
index 000000000..44ae79661
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/uat-stuck-loop-orphaned-worktree.test.ts
@@ -0,0 +1,289 @@
+/**
+ * uat-stuck-loop-orphaned-worktree.test.ts — Regression tests for #2821.
+ *
+ * Reproduces two cascading bugs:
+ *
+ * Bug 1 — UAT stuck-loop: syncProjectRootToWorktree uses force:false for
+ *   milestone files. When the project root has an ASSESSMENT with a verdict
+ *   but the worktree has a stale/empty ASSESSMENT (or none at all after DB
+ *   rebuild), the verdict is NOT synced into the worktree. checkNeedsRunUat
+ *   finds no verdict → re-dispatches run-uat indefinitely.
+ *
+ * Bug 2 — Orphaned worktree: removeWorktree silently swallows failures when
+ *   git worktree remove fails (untracked files, CWD inside worktree, etc.).
+ *   The worktree directory and branch persist on disk after teardown.
+ *   teardownAutoWorktree has a fallback rmSync but it also fails when the
+ *   git internal .git/worktrees/<name> directory holds a lock.
+ */
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  rmSync,
+  existsSync,
+  readFileSync,
+} from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+
+import { syncProjectRootToWorktree } from "../auto-worktree.ts";
+import {
+  createWorktree,
+  removeWorktree,
+  worktreePath,
+} from "../worktree-manager.ts";
+
+function git(args: string[], cwd: string): string {
+  return execFileSync("git", args, {
+    cwd,
+    stdio: ["ignore", "pipe", "pipe"],
+    encoding: "utf-8",
+  }).trim();
+}
+
+function makeBaseRepo(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-2821-"));
+  git(["init", "-b", "main"], base);
+  git(["config", "user.name", "Test"], base);
+  git(["config", "user.email", "test@test.com"], base);
+  writeFileSync(join(base, "README.md"), "# test\n");
+  mkdirSync(join(base, ".gsd", "milestones", "M011"), { recursive: true });
+  git(["add", "."], base);
+  git(["commit", "-m", "init"], base);
+  return base;
+}
+
+// ─── Bug 1: ASSESSMENT force-sync ─────────────────────────────────────────
+
+describe("#2821 Bug 1 — ASSESSMENT file force-synced on resume", () => {
+  let mainBase: string;
+  let wtBase: string;
+
+  beforeEach(() => {
+    mainBase = mkdtempSync(join(tmpdir(), "gsd-2821-main-"));
+    wtBase = mkdtempSync(join(tmpdir(), "gsd-2821-wt-"));
+    mkdirSync(join(mainBase, ".gsd", "milestones", "M011", "slices", "S01"), {
+      recursive: true,
+    });
+    mkdirSync(join(wtBase, ".gsd", "milestones", "M011", "slices", "S01"), {
+      recursive: true,
+    });
+  });
+
+  afterEach(() => {
+    rmSync(mainBase, { recursive: true, force: true });
+    rmSync(wtBase, { recursive: true, force: true });
+  });
+
+  test("force-syncs ASSESSMENT with verdict from project root into worktree when worktree copy has no verdict", () => {
+    // Project root has ASSESSMENT with a PASS verdict (written by run-uat, synced by post-unit)
+    const prAssessment = join(
+      mainBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      prAssessment,
+      "---\nverdict: pass\n---\n# S01 Assessment\nAll tests pass.\n",
+    );
+
+    // Worktree has a stale ASSESSMENT with FAIL verdict (from the initial run-uat execution)
+    const wtAssessment = join(
+      wtBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      wtAssessment,
+      "---\nverdict: fail\n---\n# S01 Assessment\nSome tests fail.\n",
+    );
+
+    syncProjectRootToWorktree(mainBase, wtBase, "M011");
+
+    // The worktree ASSESSMENT must now have the project root's PASS verdict
+    const content = readFileSync(wtAssessment, "utf-8");
+    assert.ok(
+      content.includes("verdict: pass"),
+      `Expected worktree ASSESSMENT to have verdict:pass after sync, got: ${content.slice(0, 100)}`,
+    );
+  });
+
+  test("force-syncs ASSESSMENT from project root when worktree has no ASSESSMENT at all", () => {
+    // Project root has ASSESSMENT with verdict
+    const prAssessment = join(
+      mainBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      prAssessment,
+      "---\nverdict: pass\n---\n# S01 Assessment\n",
+    );
+
+    // Worktree has NO ASSESSMENT (deleted during DB rebuild)
+    // — file simply doesn't exist
+
+    syncProjectRootToWorktree(mainBase, wtBase, "M011");
+
+    const wtAssessment = join(
+      wtBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    assert.ok(
+      existsSync(wtAssessment),
+      "ASSESSMENT should be copied to worktree when missing",
+    );
+    const content = readFileSync(wtAssessment, "utf-8");
+    assert.ok(
+      content.includes("verdict: pass"),
+      `Synced ASSESSMENT should contain verdict:pass, got: ${content.slice(0, 100)}`,
+    );
+  });
+
+  test("does NOT overwrite worktree ASSESSMENT when project root has no verdict", () => {
+    // Project root has ASSESSMENT without verdict (incomplete)
+    const prAssessment = join(
+      mainBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(prAssessment, "# S01 Assessment\nIn progress...\n");
+
+    // Worktree has ASSESSMENT with verdict:fail
+    const wtAssessment = join(
+      wtBase,
+      ".gsd",
+      "milestones",
+      "M011",
+      "slices",
+      "S01",
+      "S01-ASSESSMENT.md",
+    );
+    writeFileSync(
+      wtAssessment,
+      "---\nverdict: fail\n---\n# S01 Assessment\nSome tests fail.\n",
+    );
+
+    syncProjectRootToWorktree(mainBase, wtBase, "M011");
+
+    // Worktree copy should NOT be overwritten by the verdictless project root copy
+    const content = readFileSync(wtAssessment, "utf-8");
+    assert.ok(
+      content.includes("verdict: fail"),
+      `Worktree ASSESSMENT should keep verdict:fail when project root has no verdict, got: ${content.slice(0, 100)}`,
+    );
+  });
+});
+
+// ─── Bug 2: Orphaned worktree cleanup ─────────────────────────────────────
+
+describe("#2821 Bug 2 — removeWorktree cleans up despite untracked files", () => {
+  let base: string;
+
+  beforeEach(() => {
+    base = makeBaseRepo();
+  });
+
+  afterEach(() => {
+    rmSync(base, { recursive: true, force: true });
+  });
+
+  test("removes worktree directory even when it contains untracked files", () => {
+    const info = createWorktree(base, "M011", {
+      branch: "milestone/M011",
+    });
+
+    // Simulate run-uat writing untracked files (S01-UAT-RESULT.md, ASSESSMENT)
+    mkdirSync(
+      join(info.path, ".gsd", "milestones", "M011", "slices", "S01"),
+      { recursive: true },
+    );
+    writeFileSync(
+      join(
+        info.path,
+        ".gsd",
+        "milestones",
+        "M011",
+        "slices",
+        "S01",
+        "S01-UAT-RESULT.md",
+      ),
+      "# UAT Result\nverdict: fail\n",
+    );
+    writeFileSync(
+      join(
+        info.path,
+        ".gsd",
+        "milestones",
+        "M011",
+        "slices",
+        "S01",
+        "S01-ASSESSMENT.md",
+      ),
+      "---\nverdict: fail\n---\n# Assessment\n",
+    );
+
+    removeWorktree(base, "M011", {
+      branch: "milestone/M011",
+      deleteBranch: true,
+      force: true,
+    });
+
+    const wtDir = worktreePath(base, "M011");
+    assert.ok(
+      !existsSync(wtDir),
+      `Worktree directory should be removed after teardown, but still exists at ${wtDir}`,
+    );
+  });
+
+  test("removes git internal worktree metadata after filesystem removal", () => {
+    createWorktree(base, "M011", {
+      branch: "milestone/M011",
+    });
+
+    removeWorktree(base, "M011", {
+      branch: "milestone/M011",
+      deleteBranch: true,
+      force: true,
+    });
+
+    // The git internal worktree directory should be cleaned up
+    const gitInternalWorktreeDir = join(base, ".git", "worktrees", "M011");
+    assert.ok(
+      !existsSync(gitInternalWorktreeDir),
+      `Git internal worktree dir should be removed: ${gitInternalWorktreeDir}`,
+    );
+
+    // The branch should be deleted
+    const branches = git(["branch"], base);
+    assert.ok(
+      !branches.includes("milestone/M011"),
+      "milestone/M011 branch should be deleted after removeWorktree",
+    );
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/unit-ownership.test.ts b/src/resources/extensions/gsd/tests/unit-ownership.test.ts
index fd062c9c8..39ea6202f 100644
--- a/src/resources/extensions/gsd/tests/unit-ownership.test.ts
+++ b/src/resources/extensions/gsd/tests/unit-ownership.test.ts
@@ -3,7 +3,7 @@
 
 import test from 'node:test';
 import assert from 'node:assert/strict';
-import { mkdtempSync, rmSync, existsSync, readFileSync } from 'node:fs';
+import { mkdtempSync, rmSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 
@@ -14,6 +14,8 @@ import {
   checkOwnership,
   taskUnitKey,
   sliceUnitKey,
+  initOwnershipTable,
+  closeOwnershipDb,
 } from '../unit-ownership.ts';
 
 function makeTmpBase(): string {
@@ -34,28 +36,51 @@ test('sliceUnitKey: builds correct key', () => {
   assert.equal(sliceUnitKey('M001', 'S01'), 'M001/S01');
 });
 
-// ─── Claim / get / release ───────────────────────────────────────────────
+// ─── Claim / get / release (SQLite-backed) ──────────────────────────────
 
-test('claimUnit: creates claim file and records agent', () => {
+test('claimUnit: creates DB and records agent', () => {
   const base = makeTmpBase();
   try {
-    claimUnit(base, 'M001/S01/T01', 'executor-01');
+    initOwnershipTable(base);
+    const claimed = claimUnit(base, 'M001/S01/T01', 'executor-01');
 
-    assert.ok(existsSync(join(base, '.gsd', 'unit-claims.json')), 'claim file should exist');
+    assert.equal(claimed, true, 'first claim should succeed');
     assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-01');
   } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
 
-test('claimUnit: overwrites existing claim (last writer wins)', () => {
+test('claimUnit: rejects second claim on same unit (first-writer-wins)', () => {
   const base = makeTmpBase();
   try {
-    claimUnit(base, 'M001/S01/T01', 'executor-01');
-    claimUnit(base, 'M001/S01/T01', 'executor-02');
+    initOwnershipTable(base);
+    const first = claimUnit(base, 'M001/S01/T01', 'executor-01');
+    const second = claimUnit(base, 'M001/S01/T01', 'executor-02');
 
-    assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-02');
+    assert.equal(first, true, 'first claim should succeed');
+    assert.equal(second, false, 'second claim should fail (first-writer-wins)');
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'executor-01',
+      'original owner must be preserved');
   } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('claimUnit: same agent re-claiming same unit succeeds', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    const first = claimUnit(base, 'M001/S01/T01', 'agent-a');
+    const second = claimUnit(base, 'M001/S01/T01', 'agent-a');
+
+    assert.equal(first, true);
+    assert.equal(second, true, 're-claim by same agent should succeed');
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-a');
+  } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -63,21 +88,25 @@ test('claimUnit: overwrites existing claim (last writer wins)', () => {
 test('claimUnit: multiple units can be claimed independently', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     claimUnit(base, 'M001/S01/T01', 'agent-a');
     claimUnit(base, 'M001/S01/T02', 'agent-b');
 
     assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-a');
     assert.equal(getOwner(base, 'M001/S01/T02'), 'agent-b');
   } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
 
-test('getOwner: returns null when no claim file exists', () => {
+test('getOwner: returns null when no DB initialized', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     assert.equal(getOwner(base, 'M001/S01/T01'), null);
   } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -85,9 +114,11 @@ test('getOwner: returns null when no claim file exists', () => {
 test('getOwner: returns null for unclaimed unit', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     claimUnit(base, 'M001/S01/T01', 'agent-a');
     assert.equal(getOwner(base, 'M001/S01/T99'), null);
   } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -95,11 +126,13 @@ test('getOwner: returns null for unclaimed unit', () => {
 test('releaseUnit: removes claim', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     claimUnit(base, 'M001/S01/T01', 'agent-a');
     releaseUnit(base, 'M001/S01/T01');
 
     assert.equal(getOwner(base, 'M001/S01/T01'), null);
   } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -107,9 +140,27 @@ test('releaseUnit: removes claim', () => {
 test('releaseUnit: no-op for non-existent claim', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     // Should not throw
     releaseUnit(base, 'M001/S01/T01');
   } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+test('releaseUnit: allows reclaim after release', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+    claimUnit(base, 'M001/S01/T01', 'agent-a');
+    releaseUnit(base, 'M001/S01/T01');
+
+    const reclaimed = claimUnit(base, 'M001/S01/T01', 'agent-b');
+    assert.equal(reclaimed, true, 'reclaim after release should succeed');
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-b');
+  } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -119,20 +170,13 @@ test('releaseUnit: no-op for non-existent claim', () => {
 test('checkOwnership: returns null when no actorName provided (opt-in)', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     claimUnit(base, 'M001/S01/T01', 'agent-a');
 
     // No actorName → ownership not enforced
     assert.equal(checkOwnership(base, 'M001/S01/T01', undefined), null);
   } finally {
-    cleanup(base);
-  }
-});
-
-test('checkOwnership: returns null when no claim file exists', () => {
-  const base = makeTmpBase();
-  try {
-    assert.equal(checkOwnership(base, 'M001/S01/T01', 'agent-a'), null);
-  } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -140,11 +184,13 @@ test('checkOwnership: returns null when no claim file exists', () => {
 test('checkOwnership: returns null when unit is unclaimed', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     claimUnit(base, 'M001/S01/T01', 'agent-a');
 
     // Different unit, unclaimed
     assert.equal(checkOwnership(base, 'M001/S01/T99', 'agent-b'), null);
   } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -152,10 +198,12 @@ test('checkOwnership: returns null when unit is unclaimed', () => {
 test('checkOwnership: returns null when actor matches owner', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     claimUnit(base, 'M001/S01/T01', 'agent-a');
 
     assert.equal(checkOwnership(base, 'M001/S01/T01', 'agent-a'), null);
   } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
@@ -163,6 +211,7 @@ test('checkOwnership: returns null when actor matches owner', () => {
 test('checkOwnership: returns error string when actor does not match owner', () => {
   const base = makeTmpBase();
   try {
+    initOwnershipTable(base);
     claimUnit(base, 'M001/S01/T01', 'agent-a');
 
     const err = checkOwnership(base, 'M001/S01/T01', 'agent-b');
@@ -170,6 +219,40 @@ test('checkOwnership: returns error string when actor does not match owner', ()
     assert.match(err!, /owned by agent-a/);
     assert.match(err!, /not agent-b/);
   } finally {
+    closeOwnershipDb(base);
+    cleanup(base);
+  }
+});
+
+// ─── Race condition: first-writer-wins atomicity ─────────────────────────
+
+test('claimUnit: concurrent claims — only first writer wins (no lost update)', () => {
+  const base = makeTmpBase();
+  try {
+    initOwnershipTable(base);
+
+    // Simulate the race described in #2728:
+    // Two agents both try to claim the same unit.
+    // With SQLite INSERT OR IGNORE, only the first succeeds.
+    const results: boolean[] = [];
+    const agents = ['agent-alpha', 'agent-beta', 'agent-gamma'];
+    for (const agent of agents) {
+      results.push(claimUnit(base, 'M001/S01/T01', agent));
+    }
+
+    // Exactly one agent should have won
+    const wins = results.filter(r => r === true);
+    assert.equal(wins.length, 1, 'exactly one agent should win the claim');
+
+    // The winner is the first agent (deterministic in single-threaded)
+    assert.equal(results[0], true);
+    assert.equal(results[1], false);
+    assert.equal(results[2], false);
+
+    // The owner must be the first agent
+    assert.equal(getOwner(base, 'M001/S01/T01'), 'agent-alpha');
+  } finally {
+    closeOwnershipDb(base);
     cleanup(base);
   }
 });
diff --git a/src/resources/extensions/gsd/tests/vacuum-recovery.test.ts b/src/resources/extensions/gsd/tests/vacuum-recovery.test.ts
new file mode 100644
index 000000000..5cd0bb230
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/vacuum-recovery.test.ts
@@ -0,0 +1,154 @@
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import { createRequire } from 'node:module';
+import {
+  openDatabase,
+  closeDatabase,
+  isDbAvailable,
+  _getAdapter,
+} from '../gsd-db.ts';
+
+const _require = createRequire(import.meta.url);
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function tempDbPath(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-vacuum-test-'));
+  return path.join(dir, 'test.db');
+}
+
+function cleanup(dbPath: string): void {
+  closeDatabase();
+  try {
+    const dir = path.dirname(dbPath);
+    for (const f of fs.readdirSync(dir)) {
+      fs.unlinkSync(path.join(dir, f));
+    }
+    fs.rmdirSync(dir);
+  } catch { /* best effort */ }
+}
+
+/**
+ * Create a SQLite DB with a corrupt freelist that causes DDL to fail
+ * with "database disk image is malformed" but is recoverable via VACUUM.
+ *
+ * Strategy:
+ * 1. Create a DB with schema_version at v0 (so initSchema needs to run DDL)
+ * 2. Add padding rows to create many pages, then delete + drop to free them
+ * 3. Corrupt the freelist trunk pointer to point at a B-tree page
+ *
+ * This simulates the real-world scenario described in #2519: an interrupted
+ * WAL checkpoint leaves the freelist in an inconsistent state.
+ */
+function createCorruptFreelistDb(dbPath: string): void {
+  // Use node:sqlite directly to build the minimal corrupt DB
+  const sqlite = _require('node:sqlite');
+  const db = new sqlite.DatabaseSync(dbPath);
+  db.exec('PRAGMA journal_mode=WAL');
+  db.exec('CREATE TABLE schema_version (version INTEGER NOT NULL, applied_at TEXT NOT NULL)');
+  db.exec("INSERT INTO schema_version VALUES (0, '2024-01-01')");
+  // Pad with data to create many pages, then free them
+  db.exec('CREATE TABLE _padding (id INTEGER PRIMARY KEY, data TEXT)');
+  for (let i = 0; i < 30; i++) {
+    db.exec(`INSERT INTO _padding (data) VALUES ('${'x'.repeat(4000)}')`);
+  }
+  db.exec('DELETE FROM _padding');
+  db.exec('DROP TABLE _padding');
+  db.exec('PRAGMA wal_checkpoint(TRUNCATE)');
+  db.close();
+
+  // Remove WAL/SHM files to ensure clean file-only state
+  try { fs.unlinkSync(dbPath + '-wal'); } catch { /* may not exist */ }
+  try { fs.unlinkSync(dbPath + '-shm'); } catch { /* may not exist */ }
+
+  // Corrupt: point freelist trunk (offset 32-35) to page 2 (a B-tree page),
+  // and claim 10 free pages (offset 36-39)
+  const fd = fs.openSync(dbPath, 'r+');
+  try {
+    const buf = Buffer.alloc(8);
+    buf.writeUInt32BE(2, 0);   // trunk page = page 2 (actually a B-tree page)
+    buf.writeUInt32BE(10, 4);  // freelist count = 10
+    fs.writeSync(fd, buf, 0, 8, 32);
+  } finally {
+    fs.closeSync(fd);
+  }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe('openDatabase VACUUM recovery on corrupt freelist', () => {
+
+  test('recovers a file-backed DB with corrupt freelist via VACUUM', () => {
+    const dbPath = tempDbPath();
+
+    // Create a DB with corrupt freelist (schema at v0 so initSchema runs DDL)
+    createCorruptFreelistDb(dbPath);
+
+    // Without the fix, this throws "database disk image is malformed".
+    // With the fix, openDatabase detects "malformed", runs VACUUM, retries.
+    const ok = openDatabase(dbPath);
+    assert.ok(ok, 'openDatabase should succeed after VACUUM recovery');
+    assert.ok(isDbAvailable(), 'DB should be available after recovery');
+
+    // Verify full schema was applied
+    const adapter = _getAdapter()!;
+    const row = adapter.prepare(
+      'SELECT MAX(version) as version FROM schema_version',
+    ).get();
+    assert.ok(
+      typeof row?.['version'] === 'number' && (row['version'] as number) > 0,
+      'schema_version should have a positive version after recovery',
+    );
+
+    cleanup(dbPath);
+  });
+
+  test('does not attempt VACUUM for non-malformed errors', () => {
+    // openDatabase with :memory: never hits the fileBacked VACUUM path,
+    // so non-malformed errors propagate directly. We verify by checking
+    // that a non-file error from an in-memory DB propagates unchanged.
+    // (In-memory DBs always succeed for initSchema, so this is a design
+    // check — the VACUUM path is only for fileBacked = true.)
+    const ok = openDatabase(':memory:');
+    assert.ok(ok, 'in-memory DB should open fine');
+    closeDatabase();
+  });
+
+  test('throws if VACUUM itself fails on unrecoverable corruption', () => {
+    const dbPath = tempDbPath();
+
+    // Create a file with valid SQLite header but thoroughly corrupt content
+    const page = Buffer.alloc(4096);
+    // SQLite magic: "SQLite format 3\0"
+    page.write('SQLite format 3\0', 0, 'utf8');
+    // Page size: 4096 (big-endian at offset 16)
+    page.writeUInt16BE(4096, 16);
+    page[18] = 1;  // write version
+    page[19] = 1;  // read version
+    page[20] = 0;  // reserved space
+    page[21] = 64; // max embedded payload fraction
+    page[22] = 32; // min embedded payload fraction
+    page[23] = 32; // leaf payload fraction
+    page.writeUInt32BE(1, 28);   // page_count = 1
+    page.writeUInt32BE(999, 32); // corrupt freelist trunk
+    page.writeUInt32BE(5, 36);   // freelist count = 5
+
+    fs.writeFileSync(dbPath, page);
+
+    // Should throw — VACUUM cannot save a thoroughly corrupt file
+    assert.throws(
+      () => openDatabase(dbPath),
+      /./,
+      'should throw for unrecoverable corruption',
+    );
+
+    cleanup(dbPath);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/validate-milestone-write-order.test.ts b/src/resources/extensions/gsd/tests/validate-milestone-write-order.test.ts
index 120751b60..1f07791e0 100644
--- a/src/resources/extensions/gsd/tests/validate-milestone-write-order.test.ts
+++ b/src/resources/extensions/gsd/tests/validate-milestone-write-order.test.ts
@@ -6,7 +6,7 @@ import { tmpdir } from "node:os";
 import { randomUUID } from "node:crypto";
 
 import { handleValidateMilestone } from "../tools/validate-milestone.js";
-import { openDatabase, closeDatabase, _getAdapter, insertMilestone } from "../gsd-db.js";
+import { openDatabase, closeDatabase, _getAdapter, insertMilestone, insertSlice } from "../gsd-db.js";
 import { clearPathCache } from "../paths.js";
 import { clearParseCache } from "../files.js";
 
@@ -45,6 +45,7 @@ describe("handleValidateMilestone write ordering (#2725)", () => {
     const dbPath = join(base, ".gsd", "gsd.db");
     openDatabase(dbPath);
     insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
 
     const result = await handleValidateMilestone(VALID_PARAMS, base);
     assert.ok(!("error" in result), `unexpected error: ${"error" in result ? result.error : ""}`);
@@ -71,6 +72,7 @@ describe("handleValidateMilestone write ordering (#2725)", () => {
     const dbPath = join(base, ".gsd", "gsd.db");
     openDatabase(dbPath);
     insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
 
     const result = await handleValidateMilestone(
       { ...VALID_PARAMS, verificationClasses: undefined },
@@ -88,6 +90,7 @@ describe("handleValidateMilestone write ordering (#2725)", () => {
     const dbPath = join(base, ".gsd", "gsd.db");
     openDatabase(dbPath);
     insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
 
     // Force disk write failure by replacing the milestone directory with a
     // regular file. saveFile() will fail because it cannot write inside a
diff --git a/src/resources/extensions/gsd/tests/verdict-parser.test.ts b/src/resources/extensions/gsd/tests/verdict-parser.test.ts
new file mode 100644
index 000000000..c8aafea8c
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/verdict-parser.test.ts
@@ -0,0 +1,156 @@
+/**
+ * Tests for verdict-parser.ts — extraction, normalization, and schema validation.
+ *
+ * Regression tests for #2960: extractVerdict() must detect verdicts in both
+ * YAML frontmatter and common markdown body patterns (LLM manual writes).
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+  extractVerdict,
+  hasVerdict,
+  isAcceptableUatVerdict,
+  isValidMilestoneVerdict,
+} from "../verdict-parser.ts";
+
+// ── extractVerdict ──────────────────────────────────────────────────────────
+
+describe("extractVerdict", () => {
+  it("extracts verdict from YAML frontmatter", () => {
+    const content = "---\nverdict: pass\n---\n\n# Validation";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("normalizes 'passed' to 'pass' in frontmatter", () => {
+    const content = "---\nverdict: passed\n---\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("extracts case-insensitive verdict from frontmatter", () => {
+    const content = "---\nVerdict: PASS\n---\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("extracts needs-remediation from frontmatter", () => {
+    const content = "---\nverdict: needs-remediation\n---\n";
+    assert.equal(extractVerdict(content), "needs-remediation");
+  });
+
+  it("returns undefined when content has no frontmatter and no markdown verdict", () => {
+    const content = "# Just a heading\n\nSome text without any verdict.";
+    assert.equal(extractVerdict(content), undefined);
+  });
+
+  // ── Regression: #2960 — markdown body verdicts ─────────────────────────
+
+  it("detects **Verdict:** PASS in markdown body (#2960)", () => {
+    const content = [
+      "# M013 — Milestone Validation",
+      "",
+      "**Verdict:** PASS",
+      "",
+      "All slices completed successfully.",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("detects **Verdict:** with emoji prefix in markdown body (#2960)", () => {
+    const content = [
+      "# Milestone Validation",
+      "",
+      "**Verdict:** ✅ PASS",
+      "",
+      "Everything looks good.",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("detects **Verdict:** needs-remediation in markdown body (#2960)", () => {
+    const content = [
+      "# Milestone Validation",
+      "",
+      "**Verdict:** needs-remediation",
+      "",
+      "Several issues found.",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "needs-remediation");
+  });
+
+  it("normalizes 'passed' to 'pass' in markdown body (#2960)", () => {
+    const content = "# Validation\n\n**Verdict:** Passed\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("detects verdict without colon in bold pattern (#2960)", () => {
+    const content = "# Validation\n\n**Verdict** PASS\n";
+    assert.equal(extractVerdict(content), "pass");
+  });
+
+  it("prefers frontmatter verdict over markdown body", () => {
+    const content = [
+      "---",
+      "verdict: needs-remediation",
+      "---",
+      "",
+      "**Verdict:** PASS",
+    ].join("\n");
+    assert.equal(extractVerdict(content), "needs-remediation");
+  });
+});
+
+// ── hasVerdict ────────────────────────────────────────────────────────────
+
+describe("hasVerdict", () => {
+  it("returns true when verdict field exists", () => {
+    assert.equal(hasVerdict("verdict: pass"), true);
+  });
+
+  it("returns false when no verdict field exists", () => {
+    assert.equal(hasVerdict("# Just a heading"), false);
+  });
+});
+
+// ── isAcceptableUatVerdict ───────────────────────────────────────────────
+
+describe("isAcceptableUatVerdict", () => {
+  it("accepts pass verdict", () => {
+    assert.equal(isAcceptableUatVerdict("pass", undefined), true);
+  });
+
+  it("accepts passed verdict", () => {
+    assert.equal(isAcceptableUatVerdict("passed", undefined), true);
+  });
+
+  it("rejects fail verdict", () => {
+    assert.equal(isAcceptableUatVerdict("fail", undefined), false);
+  });
+
+  it("accepts partial for mixed UAT type", () => {
+    assert.equal(isAcceptableUatVerdict("partial", "mixed"), true);
+  });
+
+  it("rejects partial for artifact-driven UAT type", () => {
+    assert.equal(isAcceptableUatVerdict("partial", "artifact-driven"), false);
+  });
+});
+
+// ── isValidMilestoneVerdict ──────────────────────────────────────────────
+
+describe("isValidMilestoneVerdict", () => {
+  it("accepts pass", () => {
+    assert.equal(isValidMilestoneVerdict("pass"), true);
+  });
+
+  it("accepts needs-attention", () => {
+    assert.equal(isValidMilestoneVerdict("needs-attention"), true);
+  });
+
+  it("accepts needs-remediation", () => {
+    assert.equal(isValidMilestoneVerdict("needs-remediation"), true);
+  });
+
+  it("rejects unknown verdict", () => {
+    assert.equal(isValidMilestoneVerdict("fail"), false);
+  });
+});
diff --git a/src/resources/extensions/gsd/tests/verification-operational-gate.test.ts b/src/resources/extensions/gsd/tests/verification-operational-gate.test.ts
new file mode 100644
index 000000000..a9ae8d83a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/verification-operational-gate.test.ts
@@ -0,0 +1,82 @@
+/**
+ * Regression test for #2931: completing-milestone gate should treat
+ * "None required", "N/A", "Not applicable", etc. as equivalent to "none"
+ * and skip the operational verification content check entirely.
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+
+import { isVerificationNotApplicable } from "../auto-dispatch.ts";
+
+test("isVerificationNotApplicable: bare 'none' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("none"), true);
+});
+
+test("isVerificationNotApplicable: 'None' (capitalized) is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("None"), true);
+});
+
+test("isVerificationNotApplicable: 'NONE' (uppercase) is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("NONE"), true);
+});
+
+test("isVerificationNotApplicable: 'None required' is not applicable (#2931)", () => {
+  assert.equal(isVerificationNotApplicable("None required"), true);
+});
+
+test("isVerificationNotApplicable: 'None needed' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("None needed"), true);
+});
+
+test("isVerificationNotApplicable: 'None planned' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("None planned"), true);
+});
+
+test("isVerificationNotApplicable: 'N/A' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("N/A"), true);
+});
+
+test("isVerificationNotApplicable: 'n/a' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("n/a"), true);
+});
+
+test("isVerificationNotApplicable: 'Not applicable' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("Not applicable"), true);
+});
+
+test("isVerificationNotApplicable: 'Not required' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("Not required"), true);
+});
+
+test("isVerificationNotApplicable: 'Not needed' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("Not needed"), true);
+});
+
+test("isVerificationNotApplicable: 'No operational verification needed' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("No operational verification needed"), true);
+});
+
+test("isVerificationNotApplicable: 'No operational' is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("No operational"), true);
+});
+
+test("isVerificationNotApplicable: empty string is not applicable", () => {
+  assert.equal(isVerificationNotApplicable(""), true);
+});
+
+test("isVerificationNotApplicable: whitespace-only is not applicable", () => {
+  assert.equal(isVerificationNotApplicable("   "), true);
+});
+
+// Positive cases: these SHOULD require verification
+test("isVerificationNotApplicable: 'Run load tests' requires verification", () => {
+  assert.equal(isVerificationNotApplicable("Run load tests"), false);
+});
+
+test("isVerificationNotApplicable: 'Verify API response times under load' requires verification", () => {
+  assert.equal(isVerificationNotApplicable("Verify API response times under load"), false);
+});
+
+test("isVerificationNotApplicable: 'Monitor error rates for 24h' requires verification", () => {
+  assert.equal(isVerificationNotApplicable("Monitor error rates for 24h"), false);
+});
diff --git a/src/resources/extensions/gsd/tests/workflow-logger.test.ts b/src/resources/extensions/gsd/tests/workflow-logger.test.ts
index 015e4ff85..69fd2dcd4 100644
--- a/src/resources/extensions/gsd/tests/workflow-logger.test.ts
+++ b/src/resources/extensions/gsd/tests/workflow-logger.test.ts
@@ -317,6 +317,54 @@ describe("workflow-logger", () => {
     });
   });
 
+  describe("new log components (db, dispatch)", () => {
+    test("logError with 'db' component stores correct component", () => {
+      logError("db", "failed to copy DB to worktree", { error: "ENOENT" });
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "error");
+      assert.equal(entries[0].component, "db");
+      assert.equal(entries[0].message, "failed to copy DB to worktree");
+      assert.deepEqual(entries[0].context, { error: "ENOENT" });
+    });
+
+    test("logError with 'dispatch' component stores correct component", () => {
+      logError("dispatch", "reactive graph derivation failed", { error: "timeout" });
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "error");
+      assert.equal(entries[0].component, "dispatch");
+      assert.deepEqual(entries[0].context, { error: "timeout" });
+    });
+
+    test("logWarning with 'reconcile' component for centralized logging path", () => {
+      logWarning("reconcile", "could not acquire sync lock — another reconciliation may be in progress");
+      const entries = peekLogs();
+      assert.equal(entries.length, 1);
+      assert.equal(entries[0].severity, "warn");
+      assert.equal(entries[0].component, "reconcile");
+    });
+
+    test("summarizeLogs includes db and dispatch entries", () => {
+      logError("db", "worktree DB reconciliation failed: path contains unsafe characters");
+      logWarning("dispatch", "graph derivation timeout");
+      const summary = summarizeLogs()!;
+      assert.ok(summary.includes("1 error(s)"));
+      assert.ok(summary.includes("1 warning(s)"));
+      assert.ok(summary.includes("unsafe characters"));
+      assert.ok(summary.includes("graph derivation timeout"));
+    });
+
+    test("formatForNotification renders db and dispatch components", () => {
+      logError("db", "copy failed");
+      logWarning("dispatch", "slow derivation");
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.ok(formatted.includes("[db] copy failed"));
+      assert.ok(formatted.includes("[dispatch] slow derivation"));
+    });
+  });
+
   describe("stderr output", () => {
     test("writes WARN prefix to stderr for warnings", (t) => {
       const written: string[] = [];
diff --git a/src/resources/extensions/gsd/tests/workflow-manifest.test.ts b/src/resources/extensions/gsd/tests/workflow-manifest.test.ts
index fa0618cbb..5e4591f9d 100644
--- a/src/resources/extensions/gsd/tests/workflow-manifest.test.ts
+++ b/src/resources/extensions/gsd/tests/workflow-manifest.test.ts
@@ -12,6 +12,7 @@ import {
   insertMilestone,
   insertSlice,
   insertTask,
+  _getAdapter,
 } from '../gsd-db.ts';
 import {
   writeManifest,
@@ -165,6 +166,97 @@ test('workflow-manifest: bootstrapFromManifest restores DB from manifest (round-
   }
 });
 
+// ─── snapshotState: numeric column coercion (#2962) ─────────────────────
+
+test('workflow-manifest: snapshotState coerces string placeholders in numeric columns to null (#2962)', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    // Set up prerequisite rows
+    insertMilestone({ id: 'M001' });
+    insertSlice({ id: 'S01', milestoneId: 'M001' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task', status: 'complete' });
+
+    // Insert verification_evidence with string placeholders in numeric columns
+    // This simulates what happens after schema migrations or manual inserts
+    const db = _getAdapter()!;
+    db.prepare(
+      `INSERT INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('T01', 'S01', 'M001', 'npm test', '-', 'pass', '-', new Date().toISOString());
+
+    // snapshotState should coerce "-" to null for numeric columns
+    const snap = snapshotState();
+    const ev = snap.verification_evidence[0];
+    assert.strictEqual(ev.exit_code, null, 'exit_code "-" should be coerced to null');
+    assert.strictEqual(ev.duration_ms, null, 'duration_ms "-" should be coerced to null');
+
+    // Round-trip through JSON should not throw
+    const json = JSON.stringify(snap, null, 2);
+    const reparsed = JSON.parse(json);
+    assert.strictEqual(reparsed.verification_evidence[0].exit_code, null);
+    assert.strictEqual(reparsed.verification_evidence[0].duration_ms, null);
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('workflow-manifest: snapshotState coerces empty string and N/A in numeric columns (#2962)', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    insertMilestone({ id: 'M001' });
+    insertSlice({ id: 'S01', milestoneId: 'M001' });
+    insertTask({ id: 'T01', sliceId: 'S01', milestoneId: 'M001', title: 'Task', status: 'complete' });
+
+    const db = _getAdapter()!;
+    db.prepare(
+      `INSERT INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('T01', 'S01', 'M001', 'npm test', 'N/A', 'pass', '', new Date().toISOString());
+
+    const snap = snapshotState();
+    const ev = snap.verification_evidence[0];
+    assert.strictEqual(ev.exit_code, null, 'exit_code "N/A" should be coerced to null');
+    assert.strictEqual(ev.duration_ms, null, 'duration_ms "" should be coerced to null');
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
+test('workflow-manifest: snapshotState coerces string placeholders in sequence columns (#2962)', () => {
+  const base = tempDir();
+  openDatabase(tempDbPath(base));
+  try {
+    insertMilestone({ id: 'M001' });
+
+    // Insert a slice with a string sequence via raw SQL
+    const db = _getAdapter()!;
+    db.prepare(
+      `INSERT INTO slices (milestone_id, id, title, status, risk, depends, demo, created_at, sequence)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('M001', 'S01', 'Test Slice', 'planned', 'low', '[]', '', new Date().toISOString(), '-');
+
+    db.prepare(
+      `INSERT INTO tasks (milestone_id, slice_id, id, title, status, sequence)
+       VALUES (?, ?, ?, ?, ?, ?)`,
+    ).run('M001', 'S01', 'T01', 'Test Task', 'planned', 'N/A');
+
+    const snap = snapshotState();
+    assert.strictEqual(snap.slices[0].sequence, 0, 'slice sequence "-" should be coerced to 0');
+    assert.strictEqual(snap.tasks[0].sequence, 0, 'task sequence "N/A" should be coerced to 0');
+
+    // JSON round-trip must not throw
+    const json = JSON.stringify(snap, null, 2);
+    assert.doesNotThrow(() => JSON.parse(json));
+  } finally {
+    closeDatabase();
+    cleanupDir(base);
+  }
+});
+
 // ─── readManifest: version check ─────────────────────────────────────────
 
 test('workflow-manifest: readManifest throws on unsupported version', () => {
diff --git a/src/resources/extensions/gsd/tests/workflow-projections.test.ts b/src/resources/extensions/gsd/tests/workflow-projections.test.ts
index cf21052e2..b9379ede8 100644
--- a/src/resources/extensions/gsd/tests/workflow-projections.test.ts
+++ b/src/resources/extensions/gsd/tests/workflow-projections.test.ts
@@ -86,10 +86,12 @@ test('workflow-projections: renderPlanContent falls back to TBD when goal and fu
   assert.ok(content.includes('**Goal:** TBD'));
 });
 
-test('workflow-projections: renderPlanContent falls back to full_summary_md when goal is empty', () => {
+test('workflow-projections: renderPlanContent falls back to TBD when goal is empty (full_summary_md ignored #2945)', () => {
   const slice = makeSlice({ goal: '', full_summary_md: 'Fallback goal text' });
   const content = renderPlanContent(slice, []);
-  assert.ok(content.includes('**Goal:** Fallback goal text'));
+  // #2945: full_summary_md is no longer used as a fallback — it contains
+  // multi-line rendered markdown that corrupts single-line fields.
+  assert.ok(content.includes('**Goal:** TBD'), `expected TBD fallback, got: ${content}`);
 });
 
 test('workflow-projections: renderPlanContent includes ## Tasks section', () => {
diff --git a/src/resources/extensions/gsd/tests/worktree-db-respawn-truncation.test.ts b/src/resources/extensions/gsd/tests/worktree-db-respawn-truncation.test.ts
new file mode 100644
index 000000000..1870f2ad6
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-db-respawn-truncation.test.ts
@@ -0,0 +1,140 @@
+/**
+ * worktree-db-respawn-truncation.test.ts — Regression test for #2815.
+ *
+ * Verifies that syncProjectRootToWorktree does NOT delete a non-empty
+ * worktree gsd.db. On worker respawn, gsd-migrate populates the DB
+ * (~1.7MB) before the auto-loop calls syncProjectRootToWorktree. The
+ * sync step must preserve the freshly-migrated DB to avoid truncating
+ * it to 0 bytes and causing "no such table: slices" failures.
+ *
+ * Covers:
+ *   - Non-empty worktree gsd.db preserved after sync (#2815)
+ *   - Empty (0-byte) worktree gsd.db still deleted (#853 preserved)
+ *   - WAL/SHM sidecar files cleaned up when empty DB is deleted
+ */
+
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readFileSync, statSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import { syncProjectRootToWorktree } from '../auto-worktree.ts';
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+
+
+function createBase(name: string): string {
+  const base = mkdtempSync(join(tmpdir(), `gsd-wt-respawn-${name}-`));
+  mkdirSync(join(base, '.gsd', 'milestones'), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+describe('worktree-db-respawn-truncation (#2815)', async () => {
+
+  // ─── 1. Non-empty worktree gsd.db preserved after sync ───────────────
+  console.log('\n=== 1. non-empty worktree gsd.db preserved after sync (#2815) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      // Set up milestone artifacts in main project root
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Simulate a freshly-migrated worktree DB (non-empty, like after gsd-migrate)
+      // Real DBs are ~1.7MB; we use a smaller payload to prove the size check works
+      const fakeDbContent = Buffer.alloc(4096, 0x42); // 4KB non-empty DB
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), fakeDbContent);
+
+      const sizeBefore = statSync(join(wtBase, '.gsd', 'gsd.db')).size;
+      assert.ok(sizeBefore > 0, 'gsd.db is non-empty before sync');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      // The non-empty DB must survive the sync
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'gsd.db')),
+        '#2815: non-empty gsd.db must not be deleted by sync',
+      );
+      const sizeAfter = statSync(join(wtBase, '.gsd', 'gsd.db')).size;
+      assert.equal(
+        sizeAfter,
+        sizeBefore,
+        '#2815: gsd.db size must be unchanged after sync',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 2. Empty (0-byte) worktree gsd.db still deleted ─────────────────
+  console.log('\n=== 2. empty (0-byte) worktree gsd.db still deleted (#853) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Create an empty (0-byte) gsd.db — this is stale/corrupt and should be deleted
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), '');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), 'empty gsd.db exists before sync');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      assert.ok(
+        !existsSync(join(wtBase, '.gsd', 'gsd.db')),
+        '#853: empty gsd.db must still be deleted after sync',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 3. Milestone artifacts still synced when DB is preserved ────────
+  console.log('\n=== 3. milestone artifacts still synced even when DB preserved ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+      mkdirSync(join(m001Dir, 'slices', 'S01'), { recursive: true });
+      writeFileSync(join(m001Dir, 'slices', 'S01', 'S01-PLAN.md'), '# Plan');
+
+      // Non-empty DB in worktree
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), 'populated-db-data');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      // Artifacts must still be synced
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'M001-ROADMAP.md')),
+        'milestone artifacts synced even with preserved DB',
+      );
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'milestones', 'M001', 'slices', 'S01', 'S01-PLAN.md')),
+        'slice artifacts synced even with preserved DB',
+      );
+      // DB must still exist
+      assert.ok(
+        existsSync(join(wtBase, '.gsd', 'gsd.db')),
+        '#2815: DB preserved alongside artifact sync',
+      );
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-nested-git-safety.test.ts b/src/resources/extensions/gsd/tests/worktree-nested-git-safety.test.ts
new file mode 100644
index 000000000..27ec1383a
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/worktree-nested-git-safety.test.ts
@@ -0,0 +1,101 @@
+/**
+ * worktree-nested-git-safety.test.ts — #2616
+ *
+ * When scaffolding tools (create-next-app, cargo init, etc.) run inside a
+ * worktree, they create nested .git directories. Git treats these as gitlinks
+ * (mode 160000) without a .gitmodules entry, so the worktree cleanup destroys
+ * the only copy of those object databases — causing permanent data loss.
+ *
+ * This test verifies that removeWorktree detects nested .git directories
+ * (orphaned gitlinks) and absorbs or removes them before cleanup so files
+ * are tracked as regular content instead of unreachable gitlink pointers.
+ */
+
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, report } = createTestContext();
+
+const srcPath = join(import.meta.dirname, "..", "worktree-manager.ts");
+const src = readFileSync(srcPath, "utf-8");
+
+console.log("\n=== #2616: Worktree cleanup detects nested .git directories ===");
+
+// ── Test 1: removeWorktree scans for nested .git directories ─────────
+
+const removeWorktreeIdx = src.indexOf("export function removeWorktree");
+assertTrue(removeWorktreeIdx > 0, "worktree-manager.ts exports removeWorktree");
+
+const fnBody = src.slice(removeWorktreeIdx, removeWorktreeIdx + 5000);
+
+const detectsNestedGit =
+  fnBody.includes("nested") && fnBody.includes(".git") ||
+  fnBody.includes("gitlink") ||
+  fnBody.includes("160000") ||
+  fnBody.includes("findNestedGitDirs") ||
+  fnBody.includes("nestedGitDirs");
+
+assertTrue(
+  detectsNestedGit,
+  "removeWorktree detects nested .git directories or gitlinks (#2616)",
+);
+
+// ── Test 2: A helper function exists to find nested .git directories ──
+
+const hasNestedGitHelper =
+  src.includes("findNestedGitDirs") ||
+  src.includes("detectNestedGitDirs") ||
+  src.includes("scanNestedGit") ||
+  src.includes("absorbNestedGit") ||
+  src.includes("nestedGitDirs");
+
+assertTrue(
+  hasNestedGitHelper,
+  "worktree-manager has a helper to find nested .git directories (#2616)",
+);
+
+// ── Test 3: Nested .git dirs are absorbed or removed before cleanup ───
+
+const absorbsOrRemoves =
+  fnBody.includes("absorb") ||
+  fnBody.includes("rmSync") && fnBody.includes("nested") ||
+  (fnBody.includes("nestedGitDirs") || fnBody.includes("findNestedGitDirs")) &&
+    (fnBody.includes("rm") || fnBody.includes("absorb") || fnBody.includes("remove"));
+
+assertTrue(
+  absorbsOrRemoves,
+  "removeWorktree absorbs or removes nested .git dirs before cleanup (#2616)",
+);
+
+// ── Test 4: A warning is logged when nested .git dirs are found ───────
+
+const warnsAboutNestedGit =
+  fnBody.includes("nested") && fnBody.includes("logWarning") ||
+  fnBody.includes("gitlink") && fnBody.includes("logWarning") ||
+  fnBody.includes("scaffold") && fnBody.includes("logWarning");
+
+assertTrue(
+  warnsAboutNestedGit,
+  "removeWorktree warns when nested .git directories are detected (#2616)",
+);
+
+// ── Test 5: The findNestedGitDirs helper correctly identifies nested repos ──
+// Verify the helper scans subdirectories but skips .gsd/, node_modules/, .git/
+
+const helperBody = src.includes("findNestedGitDirs")
+  ? src.slice(src.indexOf("findNestedGitDirs"))
+  : "";
+
+const skipsExcludedDirs =
+  helperBody.includes("node_modules") ||
+  helperBody.includes(".gsd") ||
+  helperBody.includes("skip") ||
+  helperBody.includes("exclude");
+
+assertTrue(
+  skipsExcludedDirs,
+  "findNestedGitDirs skips node_modules and other excluded directories (#2616)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tests/worktree-resolver.test.ts b/src/resources/extensions/gsd/tests/worktree-resolver.test.ts
index c3a7f7aba..9cfda718d 100644
--- a/src/resources/extensions/gsd/tests/worktree-resolver.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-resolver.test.ts
@@ -481,7 +481,8 @@ test("mergeAndExit resolves roadmap from worktree when missing at project root (
 
   // Should have called mergeMilestoneToMain, not bare teardown
   assert.equal(findCalls(deps.calls, "mergeMilestoneToMain").length, 1);
-  assert.equal(findCalls(deps.calls, "teardownAutoWorktree").length, 0);
+  // #2945 Bug 3: secondary teardown is now called after merge for cleanup
+  assert.equal(findCalls(deps.calls, "teardownAutoWorktree").length, 1);
   assert.equal(s.basePath, "/project"); // restored
   assert.ok(ctx.messages.some((m) => m.msg.includes("merged to main")));
 });
@@ -913,3 +914,49 @@ test("isolationDegraded is reset by session.reset() (#2483)", () => {
 
   assert.equal(s.isolationDegraded, false);
 });
+
+// ─── #2625 — Default isolation mode change must not orphan worktree commits ──
+
+test("mergeAndExit still merges when mode is 'none' but session is in a worktree (#2625)", () => {
+  // Scenario: user upgraded from a version where default was "worktree" to one
+  // where default is "none". They have an active worktree with committed work.
+  // mergeAndExit must detect the active worktree and merge regardless of config.
+  const s = makeSession({
+    basePath: "/project/.gsd/worktrees/M001",
+    originalBasePath: "/project",
+  });
+  const deps = makeDeps({
+    isInAutoWorktree: () => true,
+    getIsolationMode: () => "none", // config says "none" — but we ARE in a worktree
+  });
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.mergeAndExit("M001", ctx);
+
+  // Must still merge — not skip silently
+  assert.equal(findCalls(deps.calls, "mergeMilestoneToMain").length, 1,
+    "must call mergeMilestoneToMain even when isolation mode is 'none' but we are in a worktree");
+  assert.equal(s.basePath, "/project", "basePath must be restored to project root");
+  assert.ok(ctx.messages.some((m) => m.msg.includes("merged to main")),
+    "must notify about the merge");
+});
+
+test("mergeAndExit in none mode remains a no-op when NOT in a worktree (#2625)", () => {
+  // When mode is "none" and we are genuinely not in a worktree, it should still be a no-op.
+  const s = makeSession({
+    basePath: "/project",
+    originalBasePath: "/project",
+  });
+  const deps = makeDeps({
+    isInAutoWorktree: () => false,
+    getIsolationMode: () => "none",
+  });
+  const ctx = makeNotifyCtx();
+  const resolver = new WorktreeResolver(s, deps);
+
+  resolver.mergeAndExit("M001", ctx);
+
+  assert.equal(findCalls(deps.calls, "mergeMilestoneToMain").length, 0,
+    "must NOT merge when not in a worktree and mode is none");
+});
diff --git a/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts b/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts
index 94cebb383..f50d9df7b 100644
--- a/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts
+++ b/src/resources/extensions/gsd/tests/worktree-sync-milestones.test.ts
@@ -100,8 +100,8 @@ describe('worktree-sync-milestones', async () => {
     }
   }
 
-  // ─── 3. gsd.db deleted in worktree after sync ─────────────────────────
-  console.log('\n=== 3. gsd.db deleted in worktree after sync ===');
+  // ─── 3. empty gsd.db deleted in worktree after sync ────────────────────
+  console.log('\n=== 3. empty gsd.db deleted in worktree after sync ===');
   {
     const mainBase = createBase('main');
     const wtBase = createBase('wt');
@@ -111,13 +111,37 @@ describe('worktree-sync-milestones', async () => {
       mkdirSync(m001Dir, { recursive: true });
       writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
 
-      // Worktree has a stale gsd.db
-      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), 'stale data');
+      // Worktree has an empty (0-byte) gsd.db — stale/corrupt
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), '');
       assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), 'gsd.db exists before sync');
 
       syncProjectRootToWorktree(mainBase, wtBase, 'M001');
 
-      assert.ok(!existsSync(join(wtBase, '.gsd', 'gsd.db')), '#853: gsd.db deleted after sync');
+      assert.ok(!existsSync(join(wtBase, '.gsd', 'gsd.db')), '#853: empty gsd.db deleted after sync');
+    } finally {
+      cleanup(mainBase);
+      cleanup(wtBase);
+    }
+  }
+
+  // ─── 3b. non-empty gsd.db preserved in worktree after sync (#2815) ───
+  console.log('\n=== 3b. non-empty gsd.db preserved in worktree after sync (#2815) ===');
+  {
+    const mainBase = createBase('main');
+    const wtBase = createBase('wt');
+
+    try {
+      const m001Dir = join(mainBase, '.gsd', 'milestones', 'M001');
+      mkdirSync(m001Dir, { recursive: true });
+      writeFileSync(join(m001Dir, 'M001-ROADMAP.md'), '# Roadmap');
+
+      // Worktree has a populated gsd.db (e.g. from gsd-migrate on respawn)
+      writeFileSync(join(wtBase, '.gsd', 'gsd.db'), 'migrated-db-content');
+      assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), 'gsd.db exists before sync');
+
+      syncProjectRootToWorktree(mainBase, wtBase, 'M001');
+
+      assert.ok(existsSync(join(wtBase, '.gsd', 'gsd.db')), '#2815: non-empty gsd.db preserved after sync');
     } finally {
       cleanup(mainBase);
       cleanup(wtBase);
diff --git a/src/resources/extensions/gsd/tests/zombie-gsd-state.test.ts b/src/resources/extensions/gsd/tests/zombie-gsd-state.test.ts
new file mode 100644
index 000000000..d18a7fcf8
--- /dev/null
+++ b/src/resources/extensions/gsd/tests/zombie-gsd-state.test.ts
@@ -0,0 +1,95 @@
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { createTestContext } from "./test-helpers.ts";
+
+const { assertTrue, assertMatch, assertNoMatch, report } = createTestContext();
+
+// ─── #2942: Zombie .gsd state skips init wizard ─────────────────────────────
+//
+// A partially initialized .gsd/ (symlink exists but no PREFERENCES.md or
+// milestones/) causes the init wizard gate in showSmartEntry to be skipped,
+// resulting in an uninitialized project session.
+
+console.log("\n=== #2942: zombie .gsd state must not skip init wizard ===");
+
+// ── guided-flow.ts — init wizard gate must check bootstrap completeness ──
+
+const guidedFlowSrc = readFileSync(
+  join(import.meta.dirname, "..", "guided-flow.ts"),
+  "utf-8",
+);
+
+// Find the showSmartEntry function
+const smartEntryIdx = guidedFlowSrc.indexOf("export async function showSmartEntry(");
+assertTrue(smartEntryIdx >= 0, "guided-flow.ts defines showSmartEntry");
+
+// Extract the region between showSmartEntry and the first showProjectInit call
+// This is where the init wizard gate lives.
+const afterSmartEntry = smartEntryIdx >= 0 ? guidedFlowSrc.slice(smartEntryIdx, smartEntryIdx + 3000) : "";
+
+// The gate must NOT be a bare `!existsSync(gsdRoot(basePath))` check.
+// It must also verify that bootstrap artifacts (PREFERENCES.md or milestones/) exist.
+assertTrue(
+  afterSmartEntry.includes("PREFERENCES.md") || afterSmartEntry.includes("PREFERENCES"),
+  "init wizard gate checks for PREFERENCES.md, not just .gsd/ existence (#2942)",
+);
+
+assertTrue(
+  afterSmartEntry.includes("milestones"),
+  "init wizard gate checks for milestones/ directory, not just .gsd/ existence (#2942)",
+);
+
+// The init wizard should be shown when .gsd/ exists but has no bootstrap artifacts.
+// The old code was: if (!existsSync(gsdRoot(basePath))) { ... showProjectInit ... }
+// The fix should use a compound check so zombie states trigger the wizard.
+// Verify we no longer have the bare existence check as the sole gate.
+
+// Find the specific init wizard gate pattern — the detection preamble block.
+const detectionPreambleIdx = afterSmartEntry.indexOf("Detection preamble");
+const detectionRegion = detectionPreambleIdx >= 0
+  ? afterSmartEntry.slice(detectionPreambleIdx, detectionPreambleIdx + 600)
+  : afterSmartEntry.slice(0, 1500);
+
+// The gate condition must reference PREFERENCES.md or milestones (bootstrap artifacts)
+assertMatch(
+  detectionRegion,
+  /PREFERENCES\.md|milestones/,
+  "detection preamble gate references bootstrap artifacts, not just directory existence (#2942)",
+);
+
+// ── auto-start.ts — milestones/ dir creation must not be dead code ──────────
+
+console.log("\n=== #2942: auto-start milestones/ bootstrap not dead code ===");
+
+const autoStartSrc = readFileSync(
+  join(import.meta.dirname, "..", "auto-start.ts"),
+  "utf-8",
+);
+
+// After ensureGsdSymlink, the code that creates milestones/ must check for
+// the milestones directory specifically (not .gsd/ which ensureGsdSymlink already created).
+const symlinkIdx = autoStartSrc.indexOf("ensureGsdSymlink(base)");
+assertTrue(symlinkIdx >= 0, "auto-start.ts calls ensureGsdSymlink(base)");
+
+const afterSymlink = symlinkIdx >= 0 ? autoStartSrc.slice(symlinkIdx, symlinkIdx + 800) : "";
+
+// The milestones bootstrap must check milestones path, not gsdDir
+// Old (dead) code: if (!existsSync(gsdDir)) { mkdirSync(join(gsdDir, "milestones"), ...) }
+// Fixed code should check: if (!existsSync(milestonesPath)) or similar
+assertTrue(
+  afterSymlink.includes("milestones") && afterSymlink.includes("mkdirSync"),
+  "auto-start.ts creates milestones/ directory after ensureGsdSymlink (#2942)",
+);
+
+// The guard for milestones/ creation should NOT be `!existsSync(gsdDir)` —
+// that's dead code since ensureGsdSymlink already created gsdDir.
+// It should check for the milestones/ dir directly.
+const mkdirRegion = afterSymlink.slice(0, afterSymlink.indexOf("mkdirSync") + 200);
+assertMatch(
+  mkdirRegion,
+  /existsSync\([^)]*milestones/,
+  "milestones bootstrap checks milestones path existence, not .gsd/ (#2942)",
+);
+
+report();
diff --git a/src/resources/extensions/gsd/tools/complete-task.ts b/src/resources/extensions/gsd/tools/complete-task.ts
index d7805b20d..8de2daa74 100644
--- a/src/resources/extensions/gsd/tools/complete-task.ts
+++ b/src/resources/extensions/gsd/tools/complete-task.ts
@@ -30,7 +30,7 @@ import { checkOwnership, taskUnitKey } from "../unit-ownership.js";
 import { saveFile, clearParseCache } from "../files.js";
 import { invalidateStateCache } from "../state.js";
 import { renderPlanCheckboxes } from "../markdown-renderer.js";
-import { renderAllProjections } from "../workflow-projections.js";
+import { renderAllProjections, renderSummaryContent } from "../workflow-projections.js";
 import { writeManifest } from "../workflow-manifest.js";
 import { appendEvent } from "../workflow-events.js";
 
@@ -41,79 +41,40 @@ export interface CompleteTaskResult {
   summaryPath: string;
 }
 
+import type { TaskRow } from "../gsd-db.js";
+
 /**
- * Render task summary markdown matching the template format.
- * YAML frontmatter uses snake_case keys for parseSummary() compatibility.
+ * Build a TaskRow-shaped object from CompleteTaskParams so the unified
+ * renderSummaryContent() can be used at completion time (#2720).
  */
-function renderSummaryMarkdown(params: CompleteTaskParams): string {
-  const now = new Date().toISOString();
-  const keyFilesYaml = params.keyFiles.length > 0
-    ? params.keyFiles.map(f => `  - ${f}`).join("\n")
-    : "  - (none)";
-  const keyDecisionsYaml = params.keyDecisions.length > 0
-    ? params.keyDecisions.map(d => `  - ${d}`).join("\n")
-    : "  - (none)";
-
-  // Build verification evidence table rows
-  let evidenceTable = "| # | Command | Exit Code | Verdict | Duration |\n|---|---------|-----------|---------|----------|\n";
-  if (params.verificationEvidence.length > 0) {
-    params.verificationEvidence.forEach((e, i) => {
-      evidenceTable += `| ${i + 1} | \`${e.command}\` | ${e.exitCode} | ${e.verdict} | ${e.durationMs}ms |\n`;
-    });
-  } else {
-    evidenceTable += "| — | No verification commands discovered | — | — | — |\n";
-  }
-
-  // Determine verification_result from evidence
-  const allPassed = params.verificationEvidence.length > 0 &&
-    params.verificationEvidence.every(e => e.exitCode === 0 || e.verdict.includes("✅") || e.verdict.toLowerCase().includes("pass"));
-  const verificationResult = allPassed ? "passed" : (params.verificationEvidence.length === 0 ? "untested" : "mixed");
-
-  // Extract a title from the oneLiner or taskId
-  const title = params.oneLiner || params.taskId;
-
-  return `---
-id: ${params.taskId}
-parent: ${params.sliceId}
-milestone: ${params.milestoneId}
-key_files:
-${keyFilesYaml}
-key_decisions:
-${keyDecisionsYaml}
-duration: ""
-verification_result: ${verificationResult}
-completed_at: ${now}
-blocker_discovered: ${params.blockerDiscovered}
----
-
-# ${params.taskId}: ${title}
-
-**${params.oneLiner}**
-
-## What Happened
-
-${params.narrative}
-
-## Verification
-
-${params.verification}
-
-## Verification Evidence
-
-${evidenceTable}
-
-## Deviations
-
-${params.deviations || "None."}
-
-## Known Issues
-
-${params.knownIssues || "None."}
-
-## Files Created/Modified
-
-${params.keyFiles.map(f => `- \`${f}\``).join("\n") || "None."}
-`;
+function paramsToTaskRow(params: CompleteTaskParams, completedAt: string): TaskRow {
+  return {
+    milestone_id: params.milestoneId,
+    slice_id: params.sliceId,
+    id: params.taskId,
+    title: params.oneLiner || params.taskId,
+    status: "complete",
+    one_liner: params.oneLiner,
+    narrative: params.narrative,
+    verification_result: params.verification,
+    duration: "",
+    completed_at: completedAt,
+    blocker_discovered: params.blockerDiscovered,
+    deviations: params.deviations,
+    known_issues: params.knownIssues,
+    key_files: params.keyFiles,
+    key_decisions: params.keyDecisions,
+    full_summary_md: "",
+    description: "",
+    estimate: "",
+    files: [],
+    verify: "",
+    inputs: [],
+    expected_output: [],
+    observability_impact: "",
+    full_plan_md: "",
+    sequence: 0,
+  };
 }
 
 /**
@@ -218,8 +179,9 @@ export async function handleCompleteTask(
   // If disk render fails, roll back the DB status so deriveState() and
   // verifyExpectedArtifact() stay consistent (both say "not done").
 
-  // Render summary markdown
-  const summaryMd = renderSummaryMarkdown(params);
+  // Render summary markdown via the single source of truth (#2720)
+  const taskRow = paramsToTaskRow(params, completedAt);
+  const summaryMd = renderSummaryContent(taskRow, params.sliceId, params.milestoneId, params.verificationEvidence);
 
   // Resolve and write summary to disk
   let summaryPath: string;
diff --git a/src/resources/extensions/gsd/tools/plan-milestone.ts b/src/resources/extensions/gsd/tools/plan-milestone.ts
index 6a09d4163..17c47c632 100644
--- a/src/resources/extensions/gsd/tools/plan-milestone.ts
+++ b/src/resources/extensions/gsd/tools/plan-milestone.ts
@@ -4,6 +4,7 @@ import { isNonEmptyString, validateStringArray } from "../validation.js";
 import {
   transaction,
   getMilestone,
+  getMilestoneSlices,
   insertMilestone,
   insertSlice,
   upsertMilestonePlanning,
@@ -189,6 +190,17 @@ export async function handlePlanMilestone(
         return;
       }
 
+      // Guard: refuse to re-plan a milestone that has completed slices (#2960).
+      // INSERT OR IGNORE on slices won't overwrite existing rows, but a full
+      // re-plan after worktree recreation or DB resync can create new slice rows
+      // that shadow completed work. Block early when any slice is already done.
+      const existingSlices = getMilestoneSlices(params.milestoneId);
+      const completedSlices = existingSlices.filter(s => isClosedStatus(s.status));
+      if (completedSlices.length > 0) {
+        guardError = `cannot re-plan milestone ${params.milestoneId}: ${completedSlices.length} slice(s) already completed (${completedSlices.map(s => s.id).join(", ")}). Use gsd_reassess_roadmap to modify the roadmap.`;
+        return;
+      }
+
       // Validate depends_on: all dependencies must exist and be complete
       if (params.dependsOn && params.dependsOn.length > 0) {
         for (const depId of params.dependsOn) {
@@ -223,7 +235,7 @@ export async function handlePlanMilestone(
         definitionOfDone: params.definitionOfDone,
         requirementCoverage: params.requirementCoverage,
         boundaryMapMarkdown: params.boundaryMapMarkdown,
-      });
+      }, params.title);
 
       for (const slice of params.slices) {
         insertSlice({
diff --git a/src/resources/extensions/gsd/tools/reassess-roadmap.ts b/src/resources/extensions/gsd/tools/reassess-roadmap.ts
index 040aacf56..933fabec5 100644
--- a/src/resources/extensions/gsd/tools/reassess-roadmap.ts
+++ b/src/resources/extensions/gsd/tools/reassess-roadmap.ts
@@ -1,4 +1,5 @@
 import { join } from "node:path";
+import { existsSync, unlinkSync } from "node:fs";
 import { clearParseCache } from "../files.js";
 import { isClosedStatus } from "../status-guards.js";
 import { isNonEmptyString } from "../validation.js";
@@ -10,6 +11,7 @@ import {
   insertSlice,
   updateSliceFields,
   insertAssessment,
+  deleteAssessmentByScope,
   deleteSlice,
 } from "../gsd-db.js";
 import { invalidateStateCache } from "../state.js";
@@ -200,6 +202,21 @@ export async function handleReassessRoadmap(
       for (const removedId of params.sliceChanges.removed) {
         deleteSlice(params.milestoneId, removedId);
       }
+
+      // ── Invalidate stale milestone validation (#2957) ──────────────
+      // When roadmap structure changes (slices added/modified/removed),
+      // any prior milestone-validation verdict is stale. Delete the DB
+      // row so deriveState() returns phase: 'validating-milestone' once
+      // the new slices complete, rather than advancing directly to
+      // 'completing-milestone' with a stale needs-remediation verdict.
+      const hasStructuralChanges =
+        params.sliceChanges.added.length > 0 ||
+        params.sliceChanges.modified.length > 0 ||
+        params.sliceChanges.removed.length > 0;
+
+      if (hasStructuralChanges) {
+        deleteAssessmentByScope(params.milestoneId, "milestone-validation");
+      }
     });
   } catch (err) {
     return { error: `db write failed: ${(err as Error).message}` };
@@ -218,6 +235,25 @@ export async function handleReassessRoadmap(
       completedSliceId: params.completedSliceId,
     });
 
+    // ── Remove stale VALIDATION file from disk (#2957) ────────────
+    const hasStructuralChanges =
+      params.sliceChanges.added.length > 0 ||
+      params.sliceChanges.modified.length > 0 ||
+      params.sliceChanges.removed.length > 0;
+
+    if (hasStructuralChanges) {
+      const validationFile = join(
+        basePath, ".gsd", "milestones", params.milestoneId,
+        `${params.milestoneId}-VALIDATION.md`,
+      );
+      try {
+        if (existsSync(validationFile)) unlinkSync(validationFile);
+      } catch {
+        // Best-effort: DB row is already deleted, so state derivation
+        // will not see the file-based verdict as authoritative.
+      }
+    }
+
     // ── Invalidate caches ─────────────────────────────────────────
     invalidateStateCache();
     clearParseCache();
diff --git a/src/resources/extensions/gsd/tools/validate-milestone.ts b/src/resources/extensions/gsd/tools/validate-milestone.ts
index 305b75c06..5e3f57ee4 100644
--- a/src/resources/extensions/gsd/tools/validate-milestone.ts
+++ b/src/resources/extensions/gsd/tools/validate-milestone.ts
@@ -1,8 +1,12 @@
 /**
  * validate-milestone handler — the core operation behind gsd_validate_milestone.
  *
- * Persists milestone validation results to the assessments table,
- * renders VALIDATION.md to disk, and invalidates caches.
+ * Persists milestone validation results to the assessments table and
+ * quality_gates table, renders VALIDATION.md to disk, and invalidates caches.
+ *
+ * #2945 Bug 4: Previously only wrote to assessments — quality_gates records
+ * were never persisted, causing M002+ milestones to have zero gate records
+ * despite passing validation.
  */
 
 import { join } from "node:path";
@@ -11,11 +15,13 @@ import {
   transaction,
   insertAssessment,
   deleteAssessmentByScope,
+  getMilestoneSlices,
 } from "../gsd-db.js";
 import { resolveMilestonePath, clearPathCache } from "../paths.js";
 import { saveFile, clearParseCache } from "../files.js";
 import { invalidateStateCache } from "../state.js";
 import { VALIDATION_VERDICTS, isValidMilestoneVerdict } from "../verdict-parser.js";
+import { insertMilestoneValidationGates } from "../milestone-validation-gates.js";
 
 export interface ValidateMilestoneParams {
   milestoneId: string;
@@ -112,6 +118,18 @@ export async function handleValidateMilestone(
       scope: 'milestone-validation',
       fullContent: validationMd,
     });
+
+    // #2945 Bug 4: persist quality_gates records alongside the assessment.
+    // Previously only the assessment was written, leaving M002+ milestones
+    // with zero quality_gate records despite passing validation.
+    const slices = getMilestoneSlices(params.milestoneId);
+    const sliceId = slices.length > 0 ? slices[0].id : "_milestone";
+    insertMilestoneValidationGates(
+      params.milestoneId,
+      sliceId,
+      params.verdict,
+      validatedAt,
+    );
   });
 
   // ── Filesystem render (outside transaction) ────────────────────────────
diff --git a/src/resources/extensions/gsd/triage-resolution.ts b/src/resources/extensions/gsd/triage-resolution.ts
index eefb2caa8..d6aaad962 100644
--- a/src/resources/extensions/gsd/triage-resolution.ts
+++ b/src/resources/extensions/gsd/triage-resolution.ts
@@ -22,6 +22,7 @@ import {
   loadActionableCaptures,
   markCaptureResolved,
   markCaptureExecuted,
+  stampCaptureMilestone,
 } from "./captures.js";
 
 // ─── Resolution Executors ─────────────────────────────────────────────────────
@@ -271,11 +272,15 @@ export function buildQuickTaskPrompt(capture: CaptureEntry): string {
     ``,
     `## Instructions`,
     ``,
-    `1. Execute this task as a small, self-contained change.`,
-    `2. Do NOT modify any \`.gsd/\` plan files — this is a one-off, not a planned task.`,
-    `3. Commit your changes with a descriptive message.`,
-    `4. Keep changes minimal and focused on the capture text.`,
-    `5. When done, say: "Quick task complete."`,
+    `1. **Verify the issue still exists.** Before making any changes, inspect the`,
+    `   relevant code to confirm the problem described above is actually present in`,
+    `   the current codebase. If the issue has already been fixed (e.g., by planned`,
+    `   milestone work), report "Already resolved — no changes needed." and stop.`,
+    `2. Execute this task as a small, self-contained change.`,
+    `3. Do NOT modify any \`.gsd/\` plan files — this is a one-off, not a planned task.`,
+    `4. Commit your changes with a descriptive message.`,
+    `5. Keep changes minimal and focused on the capture text.`,
+    `6. When done, say: "Quick task complete."`,
   ].join("\n");
 }
 
@@ -324,7 +329,19 @@ export function executeTriageResolutions(
     actions: [],
   };
 
-  const actionable = loadActionableCaptures(basePath);
+  const actionable = loadActionableCaptures(basePath, mid || undefined);
+
+  // Reconciliation: stamp actionable captures that are missing the Milestone field
+  // with the current milestone ID.  This covers captures resolved by the triage LLM
+  // before the prompt included the Milestone instruction, and acts as a safety net
+  // when the LLM omits the field (#2872).
+  if (mid) {
+    for (const capture of actionable) {
+      if (!capture.resolvedInMilestone) {
+        stampCaptureMilestone(basePath, capture.id, mid);
+      }
+    }
+  }
 
   // Also process deferred captures that target milestone IDs — create
   // milestone directories so deriveState() discovers them.
diff --git a/src/resources/extensions/gsd/types.ts b/src/resources/extensions/gsd/types.ts
index ffecfc75e..25bee6774 100644
--- a/src/resources/extensions/gsd/types.ts
+++ b/src/resources/extensions/gsd/types.ts
@@ -249,6 +249,8 @@ export interface GSDState {
     slices?: { done: number; total: number };
     tasks?: { done: number; total: number };
   };
+  /** When phase=complete, holds the last completed milestone (instead of activeMilestone). */
+  lastCompletedMilestone?: ActiveRef | null;
 }
 
 // ─── Post-Unit Hook Types ─────────────────────────────────────────────────
@@ -563,8 +565,8 @@ export interface CompleteSliceParams {
 
 // ─── Quality Gates ───────────────────────────────────────────────────────
 
-export type GateId = "Q3" | "Q4" | "Q5" | "Q6" | "Q7" | "Q8";
-export type GateScope = "slice" | "task";
+export type GateId = "Q3" | "Q4" | "Q5" | "Q6" | "Q7" | "Q8" | "MV01" | "MV02" | "MV03" | "MV04";
+export type GateScope = "slice" | "task" | "milestone";
 export type GateStatus = "pending" | "complete" | "omitted";
 export type GateVerdict = "pass" | "flag" | "omitted" | "";
 
diff --git a/src/resources/extensions/gsd/undo.ts b/src/resources/extensions/gsd/undo.ts
index 3d0c589b2..5d68c5e82 100644
--- a/src/resources/extensions/gsd/undo.ts
+++ b/src/resources/extensions/gsd/undo.ts
@@ -5,7 +5,7 @@
 
 import type { ExtensionCommandContext, ExtensionAPI } from "@gsd/pi-coding-agent";
 import { existsSync, readFileSync, writeFileSync, unlinkSync, readdirSync } from "node:fs";
-import { join } from "node:path";
+import { join, basename } from "node:path";
 import { nativeRevertCommit, nativeRevertAbort } from "./native-git-bridge.js";
 import { parseUnitId } from "./unit-id.js";
 import { deriveState } from "./state.js";
@@ -133,7 +133,7 @@ export async function handleUndo(args: string, ctx: ExtensionCommandContext, _pi
   }
 
   ctx.ui.notify(results.join("\n"), "success");
-  sendDesktopNotification("GSD", `Undone: ${unitType} (${unitId})`, "info", "complete");
+  sendDesktopNotification("GSD", `Undone: ${unitType} (${unitId})`, "info", "complete", basename(basePath));
 }
 
 // ─── Targeted State Reset ────────────────────────────────────────────────────
diff --git a/src/resources/extensions/gsd/unit-ownership.ts b/src/resources/extensions/gsd/unit-ownership.ts
index 9bbeb4f22..acae94999 100644
--- a/src/resources/extensions/gsd/unit-ownership.ts
+++ b/src/resources/extensions/gsd/unit-ownership.ts
@@ -3,18 +3,20 @@
 //
 // An agent can claim a unit (task, slice) before working on it.
 // complete-task and complete-slice enforce ownership when claims exist.
-// If no claim file is present, ownership is not enforced (backward compatible).
+// Claims are stored in SQLite (.gsd/unit-claims.db) for atomic
+// first-writer-wins semantics via INSERT OR IGNORE.
 //
-// Claim file location: .gsd/unit-claims.json
 // Unit key format:
 //   task:  "<milestoneId>/<sliceId>/<taskId>"
 //   slice: "<milestoneId>/<sliceId>"
 //
 // Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
 
-import { existsSync, readFileSync, mkdirSync } from "node:fs";
+import { createRequire } from "node:module";
+import { mkdirSync } from "node:fs";
 import { join } from "node:path";
-import { atomicWriteSync } from "./atomic-write.js";
+
+const _require = createRequire(import.meta.url);
 
 // ─── Types ───────────────────────────────────────────────────────────────
 
@@ -23,7 +25,133 @@ export interface UnitClaim {
   claimed_at: string;
 }
 
-type ClaimsMap = Record<string, UnitClaim>;
+// ─── SQLite Provider (mirrors gsd-db.ts pattern) ─────────────────────────
+
+interface StmtLike {
+  run(...params: unknown[]): unknown;
+  get(...params: unknown[]): Record<string, unknown> | undefined;
+}
+
+interface DbLike {
+  exec(sql: string): void;
+  prepare(sql: string): StmtLike;
+  close(): void;
+}
+
+type ProviderName = "node:sqlite" | "better-sqlite3";
+
+let providerName: ProviderName | null = null;
+let providerModule: unknown = null;
+let loadAttempted = false;
+
+function suppressSqliteWarning(): void {
+  const origEmit = process.emit;
+  // @ts-expect-error overriding process.emit for warning filter
+  process.emit = function (event: string, ...args: unknown[]): boolean {
+    if (
+      event === "warning" &&
+      args[0] &&
+      typeof args[0] === "object" &&
+      "name" in args[0] &&
+      (args[0] as { name: string }).name === "ExperimentalWarning" &&
+      "message" in args[0] &&
+      typeof (args[0] as { message: string }).message === "string" &&
+      (args[0] as { message: string }).message.includes("SQLite")
+    ) {
+      return false;
+    }
+    return origEmit.apply(process, [event, ...args] as Parameters<typeof process.emit>) as unknown as boolean;
+  };
+}
+
+function loadProvider(): void {
+  if (loadAttempted) return;
+  loadAttempted = true;
+
+  try {
+    suppressSqliteWarning();
+    const mod = _require("node:sqlite");
+    if (mod.DatabaseSync) {
+      providerModule = mod;
+      providerName = "node:sqlite";
+      return;
+    }
+  } catch {
+    // unavailable
+  }
+
+  try {
+    const mod = _require("better-sqlite3");
+    if (typeof mod === "function" || (mod && mod.default)) {
+      providerModule = mod.default || mod;
+      providerName = "better-sqlite3";
+      return;
+    }
+  } catch {
+    // unavailable
+  }
+}
+
+function normalizeRow(row: unknown): Record<string, unknown> | undefined {
+  if (row == null) return undefined;
+  if (Object.getPrototypeOf(row) === null) {
+    return { ...(row as Record<string, unknown>) };
+  }
+  return row as Record<string, unknown>;
+}
+
+function openRawDb(path: string): unknown {
+  loadProvider();
+  if (!providerModule || !providerName) return null;
+
+  if (providerName === "node:sqlite") {
+    const { DatabaseSync } = providerModule as {
+      DatabaseSync: new (path: string) => unknown;
+    };
+    return new DatabaseSync(path);
+  }
+
+  const Database = providerModule as new (path: string) => unknown;
+  return new Database(path);
+}
+
+function wrapDb(rawDb: unknown): DbLike {
+  const db = rawDb as {
+    exec(sql: string): void;
+    prepare(sql: string): {
+      run(...args: unknown[]): unknown;
+      get(...args: unknown[]): unknown;
+    };
+    close(): void;
+  };
+  return {
+    exec(sql: string): void { db.exec(sql); },
+    prepare(sql: string): StmtLike {
+      const raw = db.prepare(sql);
+      return {
+        run(...params: unknown[]): unknown { return raw.run(...params); },
+        get(...params: unknown[]): Record<string, unknown> | undefined {
+          return normalizeRow(raw.get(...params));
+        },
+      };
+    },
+    close(): void { db.close(); },
+  };
+}
+
+// ─── Per-basePath DB pool ────────────────────────────────────────────────
+
+const dbPool = new Map<string, DbLike>();
+
+function claimsDbPath(basePath: string): string {
+  return join(basePath, ".gsd", "unit-claims.db");
+}
+
+function getDb(basePath: string): DbLike | null {
+  const existing = dbPool.get(basePath);
+  if (existing) return existing;
+  return null;
+}
 
 // ─── Key Builders ────────────────────────────────────────────────────────
 
@@ -35,60 +163,103 @@ export function sliceUnitKey(milestoneId: string, sliceId: string): string {
   return `${milestoneId}/${sliceId}`;
 }
 
-// ─── File Path ───────────────────────────────────────────────────────────
+// ─── Lifecycle ───────────────────────────────────────────────────────────
 
-function claimsPath(basePath: string): string {
-  return join(basePath, ".gsd", "unit-claims.json");
+/**
+ * Initialize the ownership SQLite database for a given basePath.
+ * Creates .gsd/ directory and unit-claims.db with the unit_claims table.
+ * Safe to call multiple times (idempotent).
+ */
+export function initOwnershipTable(basePath: string): void {
+  if (dbPool.has(basePath)) return;
+
+  const dir = join(basePath, ".gsd");
+  mkdirSync(dir, { recursive: true });
+
+  const raw = openRawDb(claimsDbPath(basePath));
+  if (!raw) {
+    throw new Error("No SQLite provider available for unit-ownership");
+  }
+
+  const db = wrapDb(raw);
+
+  db.exec("PRAGMA journal_mode=WAL");
+  db.exec("PRAGMA busy_timeout = 5000");
+  db.exec("PRAGMA synchronous = NORMAL");
+
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS unit_claims (
+      unit_key TEXT PRIMARY KEY,
+      agent_name TEXT NOT NULL,
+      claimed_at TEXT NOT NULL
+    )
+  `);
+
+  dbPool.set(basePath, db);
 }
 
-// ─── Read Claims ─────────────────────────────────────────────────────────
-
-function readClaims(basePath: string): ClaimsMap | null {
-  const path = claimsPath(basePath);
-  if (!existsSync(path)) return null;
-  try {
-    return JSON.parse(readFileSync(path, "utf-8")) as ClaimsMap;
-  } catch {
-    return null;
-  }
+/**
+ * Close the ownership database for a given basePath.
+ * Safe to call even if not initialized.
+ */
+export function closeOwnershipDb(basePath: string): void {
+  const db = dbPool.get(basePath);
+  if (!db) return;
+  try { db.close(); } catch { /* swallow */ }
+  dbPool.delete(basePath);
 }
 
 // ─── Public API ──────────────────────────────────────────────────────────
 
 /**
  * Claim a unit for an agent.
- * Overwrites any existing claim for this unit (last writer wins).
+ * Uses INSERT OR IGNORE for atomic first-writer-wins semantics.
+ * Returns true if the claim was acquired (or the same agent already owns it).
+ * Returns false if a different agent already owns the unit.
  */
-export function claimUnit(basePath: string, unitKey: string, agentName: string): void {
-  const claims = readClaims(basePath) ?? {};
-  claims[unitKey] = { agent: agentName, claimed_at: new Date().toISOString() };
-  const dir = join(basePath, ".gsd");
-  mkdirSync(dir, { recursive: true });
-  atomicWriteSync(claimsPath(basePath), JSON.stringify(claims, null, 2) + "\n");
+export function claimUnit(basePath: string, unitKey: string, agentName: string): boolean {
+  const db = getDb(basePath);
+  if (!db) {
+    // Auto-init if not already initialized (backward compat)
+    initOwnershipTable(basePath);
+    return claimUnit(basePath, unitKey, agentName);
+  }
+
+  // INSERT OR IGNORE: if the row already exists, this is a no-op.
+  // The PRIMARY KEY constraint on unit_key prevents duplicate claims.
+  db.prepare(
+    "INSERT OR IGNORE INTO unit_claims (unit_key, agent_name, claimed_at) VALUES (?, ?, ?)",
+  ).run(unitKey, agentName, new Date().toISOString());
+
+  // Check who owns it now
+  const row = db.prepare("SELECT agent_name FROM unit_claims WHERE unit_key = ?").get(unitKey);
+  const owner = row?.agent_name as string | undefined;
+
+  return owner === agentName;
 }
 
 /**
- * Release a unit claim (remove it from the claims map).
+ * Release a unit claim (remove it from the claims table).
  */
 export function releaseUnit(basePath: string, unitKey: string): void {
-  const claims = readClaims(basePath);
-  if (!claims || !(unitKey in claims)) return;
-  delete claims[unitKey];
-  atomicWriteSync(claimsPath(basePath), JSON.stringify(claims, null, 2) + "\n");
+  const db = getDb(basePath);
+  if (!db) return;
+  db.prepare("DELETE FROM unit_claims WHERE unit_key = ?").run(unitKey);
 }
 
 /**
- * Get the current owner of a unit, or null if unclaimed / no claims file.
+ * Get the current owner of a unit, or null if unclaimed.
  */
 export function getOwner(basePath: string, unitKey: string): string | null {
-  const claims = readClaims(basePath);
-  if (!claims) return null;
-  return claims[unitKey]?.agent ?? null;
+  const db = getDb(basePath);
+  if (!db) return null;
+  const row = db.prepare("SELECT agent_name FROM unit_claims WHERE unit_key = ?").get(unitKey);
+  return (row?.agent_name as string) ?? null;
 }
 
 /**
  * Check if an actor is authorized to operate on a unit.
- * Returns null if ownership passes (or is unclaimed / no file).
+ * Returns null if ownership passes (or is unclaimed).
  * Returns an error string if a different agent owns the unit.
  */
 export function checkOwnership(
@@ -98,7 +269,7 @@ export function checkOwnership(
 ): string | null {
   if (!actorName) return null; // no actor identity provided — opt-in, so allow
   const owner = getOwner(basePath, unitKey);
-  if (owner === null) return null; // unit unclaimed or no claims file
+  if (owner === null) return null; // unit unclaimed
   if (owner === actorName) return null; // actor is the owner
   return `Unit ${unitKey} is owned by ${owner}, not ${actorName}`;
 }
diff --git a/src/resources/extensions/gsd/verdict-parser.ts b/src/resources/extensions/gsd/verdict-parser.ts
index 18794436a..b0c0826b8 100644
--- a/src/resources/extensions/gsd/verdict-parser.ts
+++ b/src/resources/extensions/gsd/verdict-parser.ts
@@ -20,13 +20,28 @@ import type { UatType } from "./files.js";
  * Returns `undefined` when frontmatter is absent or has no `verdict` field.
  */
 export function extractVerdict(content: string): string | undefined {
+  // Primary: YAML frontmatter verdict (canonical format)
   const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
-  if (!fmMatch) return undefined;
-  const verdictMatch = fmMatch[1].match(/verdict:\s*([\w-]+)/i);
-  if (!verdictMatch) return undefined;
-  let v = verdictMatch[1].toLowerCase();
-  if (v === "passed") v = "pass";
-  return v;
+  if (fmMatch) {
+    const verdictMatch = fmMatch[1].match(/verdict:\s*([\w-]+)/i);
+    if (verdictMatch) {
+      let v = verdictMatch[1].toLowerCase();
+      if (v === "passed") v = "pass";
+      return v;
+    }
+    return undefined;
+  }
+
+  // Fallback: detect verdict in markdown body (LLM manual writes, #2960).
+  // Matches patterns like: **Verdict:** PASS, **Verdict:** ✅ PASS, **Verdict** needs-remediation
+  const bodyMatch = content.match(/\*\*Verdict:?\*\*\s*(?:✅\s*)?(\w[\w-]*)/i);
+  if (bodyMatch) {
+    let v = bodyMatch[1].toLowerCase();
+    if (v === "passed") v = "pass";
+    return v;
+  }
+
+  return undefined;
 }
 
 /**
diff --git a/src/resources/extensions/gsd/workflow-logger.ts b/src/resources/extensions/gsd/workflow-logger.ts
index 0770408d0..882059302 100644
--- a/src/resources/extensions/gsd/workflow-logger.ts
+++ b/src/resources/extensions/gsd/workflow-logger.ts
@@ -31,7 +31,9 @@ export type LogComponent =
   | "state"         // deriveState fallback/degradation
   | "tool"          // Tool handler errors
   | "compaction"    // Event compaction
-  | "reconcile";    // Worktree reconciliation
+  | "reconcile"     // Worktree reconciliation
+  | "db"            // Database operations (gsd-db)
+  | "dispatch";     // Auto-dispatch rule evaluation
 
 export interface LogEntry {
   ts: string;
diff --git a/src/resources/extensions/gsd/workflow-manifest.ts b/src/resources/extensions/gsd/workflow-manifest.ts
index d88dda8e9..3d6af0327 100644
--- a/src/resources/extensions/gsd/workflow-manifest.ts
+++ b/src/resources/extensions/gsd/workflow-manifest.ts
@@ -42,6 +42,23 @@ function requireDb() {
   return db;
 }
 
+/**
+ * Coerce a raw DB value to a number, returning `fallback` for
+ * null/undefined/non-numeric strings (e.g. "-", "N/A", "").
+ * SQLite can store TEXT in INTEGER columns after migrations or manual inserts.
+ */
+export function toNumeric(value: unknown, fallback: number | null = null): number | null {
+  if (value === null || value === undefined) return fallback;
+  if (typeof value === "number") return Number.isFinite(value) ? value : fallback;
+  if (typeof value === "string") {
+    const trimmed = value.trim();
+    if (trimmed === "" || trimmed === "-" || trimmed === "N/A") return fallback;
+    const n = Number(trimmed);
+    return Number.isFinite(n) ? n : fallback;
+  }
+  return fallback;
+}
+
 // ─── snapshotState ───────────────────────────────────────────────────────
 
 /**
@@ -99,7 +116,7 @@ export function snapshotState(): StateManifest {
     proof_level: (r["proof_level"] as string) ?? "",
     integration_closure: (r["integration_closure"] as string) ?? "",
     observability_impact: (r["observability_impact"] as string) ?? "",
-    sequence: (r["sequence"] as number) ?? 0,
+    sequence: toNumeric(r["sequence"], 0) as number,
     replan_triggered_at: (r["replan_triggered_at"] as string) ?? null,
   }));
 
@@ -129,12 +146,12 @@ export function snapshotState(): StateManifest {
     expected_output: JSON.parse((r["expected_output"] as string) || "[]"),
     observability_impact: (r["observability_impact"] as string) ?? "",
     full_plan_md: (r["full_plan_md"] as string) ?? "",
-    sequence: (r["sequence"] as number) ?? 0,
+    sequence: toNumeric(r["sequence"], 0) as number,
   }));
 
   const rawDecisions = db.prepare("SELECT * FROM decisions ORDER BY seq").all() as Record<string, unknown>[];
   const decisions: Decision[] = rawDecisions.map((r) => ({
-    seq: r["seq"] as number,
+    seq: toNumeric(r["seq"], 0) as number,
     id: r["id"] as string,
     when_context: (r["when_context"] as string) ?? "",
     scope: (r["scope"] as string) ?? "",
@@ -153,9 +170,9 @@ export function snapshotState(): StateManifest {
     slice_id: r["slice_id"] as string,
     milestone_id: r["milestone_id"] as string,
     command: r["command"] as string,
-    exit_code: (r["exit_code"] as number) ?? null,
+    exit_code: toNumeric(r["exit_code"]),
     verdict: (r["verdict"] as string) ?? "",
-    duration_ms: (r["duration_ms"] as number) ?? null,
+    duration_ms: toNumeric(r["duration_ms"]),
     created_at: r["created_at"] as string,
   }));
 
diff --git a/src/resources/extensions/gsd/workflow-projections.ts b/src/resources/extensions/gsd/workflow-projections.ts
index 4affbec8a..7a16c0e56 100644
--- a/src/resources/extensions/gsd/workflow-projections.ts
+++ b/src/resources/extensions/gsd/workflow-projections.ts
@@ -9,8 +9,9 @@ import {
   getMilestone,
   getMilestoneSlices,
   getSliceTasks,
+  getVerificationEvidence,
 } from "./gsd-db.js";
-import type { MilestoneRow, SliceRow, TaskRow } from "./gsd-db.js";
+import type { MilestoneRow, SliceRow, TaskRow, VerificationEvidenceRow } from "./gsd-db.js";
 import { atomicWriteSync } from "./atomic-write.js";
 import { join } from "node:path";
 import { mkdirSync, existsSync } from "node:fs";
@@ -29,8 +30,10 @@ export function renderPlanContent(sliceRow: SliceRow, taskRows: TaskRow[]): stri
 
   lines.push(`# ${sliceRow.id}: ${sliceRow.title}`);
   lines.push("");
-  lines.push(`**Goal:** ${sliceRow.goal || sliceRow.full_summary_md || "TBD"}`);
-  lines.push(`**Demo:** After this: ${sliceRow.demo || sliceRow.full_uat_md || "TBD"}`);
+  // #2945: never use full_summary_md/full_uat_md as display fallbacks —
+  // they contain multi-line rendered markdown that corrupts single-line fields.
+  lines.push(`**Goal:** ${sliceRow.goal || "TBD"}`);
+  lines.push(`**Demo:** After this: ${sliceRow.demo || "TBD"}`);
   lines.push("");
   lines.push("## Tasks");
 
@@ -113,7 +116,10 @@ export function renderRoadmapContent(milestoneRow: MilestoneRow, sliceRows: Slic
     }
 
     const risk = (slice.risk || "low").toLowerCase();
-    const demo = slice.demo || slice.full_uat_md || "TBD";
+    // #2945 Bug 1: never use full_uat_md as a table cell fallback — it contains
+    // multi-line UAT content (preconditions, steps, expected results) that
+    // corrupts the markdown table and makes subsequent slices invisible.
+    const demo = slice.demo || "TBD";
 
     lines.push(`| ${slice.id} | ${slice.title} | ${risk} | ${depends} | ${done} | ${demo} |`);
   }
@@ -142,71 +148,93 @@ export function renderRoadmapProjection(basePath: string, milestoneId: string):
 
 /**
  * Render SUMMARY.md content from a task row.
- * Pure function — no side effects.
+ * Single source of truth for summary rendering — used both at completion
+ * time and at projection regeneration time (#2720).
+ *
+ * @param evidence - Optional verification evidence rows. When called from
+ *   complete-task, these are passed directly. When called from projection
+ *   regeneration, they are queried from the DB by renderSummaryProjection.
  */
-export function renderSummaryContent(taskRow: TaskRow, sliceId: string, milestoneId: string): string {
-  const lines: string[] = [];
+export function renderSummaryContent(
+  taskRow: TaskRow,
+  sliceId: string,
+  milestoneId: string,
+  evidence?: Array<{ command: string; exitCode?: number; exit_code?: number; verdict: string; durationMs?: number; duration_ms?: number }>,
+): string {
+  // ── Frontmatter (YAML list format, matches parseSummary() expectations) ──
+  const keyFilesYaml = taskRow.key_files && taskRow.key_files.length > 0
+    ? taskRow.key_files.map(f => `  - ${f}`).join("\n")
+    : "  - (none)";
+  const keyDecisionsYaml = taskRow.key_decisions && taskRow.key_decisions.length > 0
+    ? taskRow.key_decisions.map(d => `  - ${d}`).join("\n")
+    : "  - (none)";
 
-  // Frontmatter
-  lines.push("---");
-  lines.push(`id: ${taskRow.id}`);
-  lines.push(`parent: ${sliceId}`);
-  lines.push(`milestone: ${milestoneId}`);
-  lines.push("provides: []");
-  lines.push("requires: []");
-  lines.push("affects: []");
+  // Derive verification_result from evidence if available
+  const evidenceList = evidence ?? [];
+  const allPassed = evidenceList.length > 0 &&
+    evidenceList.every(e => {
+      const code = e.exitCode ?? e.exit_code ?? -1;
+      return code === 0 || e.verdict.includes("\u2705") || e.verdict.toLowerCase().includes("pass");
+    });
+  const verificationResult = taskRow.verification_result
+    ? (allPassed ? "passed" : (evidenceList.length === 0 ? "untested" : "mixed"))
+    : (allPassed ? "passed" : (evidenceList.length === 0 ? "untested" : "mixed"));
 
-  // key_files is already parsed to string[]
-  if (taskRow.key_files && taskRow.key_files.length > 0) {
-    lines.push(`key_files: [${taskRow.key_files.map(f => `"${f}"`).join(", ")}]`);
+  // Build verification evidence table
+  let evidenceTable = "| # | Command | Exit Code | Verdict | Duration |\n|---|---------|-----------|---------|----------|\n";
+  if (evidenceList.length > 0) {
+    evidenceList.forEach((e, i) => {
+      const code = e.exitCode ?? e.exit_code ?? 0;
+      const dur = e.durationMs ?? e.duration_ms ?? 0;
+      evidenceTable += `| ${i + 1} | \`${e.command}\` | ${code} | ${e.verdict} | ${dur}ms |\n`;
+    });
   } else {
-    lines.push("key_files: []");
+    evidenceTable += "| \u2014 | No verification commands discovered | \u2014 | \u2014 | \u2014 |\n";
   }
 
-  // key_decisions is already parsed to string[]
-  if (taskRow.key_decisions && taskRow.key_decisions.length > 0) {
-    lines.push(`key_decisions: [${taskRow.key_decisions.map(d => `"${d}"`).join(", ")}]`);
-  } else {
-    lines.push("key_decisions: []");
-  }
+  const title = taskRow.one_liner || taskRow.title || taskRow.id;
 
-  lines.push("patterns_established: []");
-  lines.push("drill_down_paths: []");
-  lines.push("observability_surfaces: []");
-  lines.push(`duration: "${taskRow.duration || ""}"`);
-  lines.push(`verification_result: "${taskRow.verification_result || ""}"`);
-  lines.push(`completed_at: ${taskRow.completed_at || ""}`);
-  lines.push(`blocker_discovered: ${taskRow.blocker_discovered ? "true" : "false"}`);
-  lines.push("---");
-  lines.push("");
-  lines.push(`# ${taskRow.id}: ${taskRow.title}`);
-  lines.push("");
+  return `---
+id: ${taskRow.id}
+parent: ${sliceId}
+milestone: ${milestoneId}
+key_files:
+${keyFilesYaml}
+key_decisions:
+${keyDecisionsYaml}
+duration: ${taskRow.duration || ""}
+verification_result: ${verificationResult}
+completed_at: ${taskRow.completed_at || ""}
+blocker_discovered: ${taskRow.blocker_discovered ? "true" : "false"}
+---
 
-  // One-liner (if present)
-  if (taskRow.one_liner) {
-    lines.push(`> ${taskRow.one_liner}`);
-    lines.push("");
-  }
+# ${taskRow.id}: ${title}
 
-  lines.push("## What Happened");
-  lines.push(taskRow.full_summary_md || taskRow.narrative || "No summary recorded.");
-  lines.push("");
+**${taskRow.one_liner || ""}**
 
-  // Deviations (if present)
-  if (taskRow.deviations) {
-    lines.push("## Deviations");
-    lines.push(taskRow.deviations);
-    lines.push("");
-  }
+## What Happened
 
-  // Known issues (if present)
-  if (taskRow.known_issues) {
-    lines.push("## Known Issues");
-    lines.push(taskRow.known_issues);
-    lines.push("");
-  }
+${taskRow.narrative || "No summary recorded."}
 
-  return lines.join("\n");
+## Verification
+
+${taskRow.verification_result || "No verification recorded."}
+
+## Verification Evidence
+
+${evidenceTable}
+## Deviations
+
+${taskRow.deviations || "None."}
+
+## Known Issues
+
+${taskRow.known_issues || "None."}
+
+## Files Created/Modified
+
+${taskRow.key_files && taskRow.key_files.length > 0 ? taskRow.key_files.map(f => `- \`${f}\``).join("\n") : "None."}
+`;
 }
 
 /**
@@ -218,7 +246,8 @@ export function renderSummaryProjection(basePath: string, milestoneId: string, s
   const taskRow = taskRows.find(t => t.id === taskId);
   if (!taskRow) return;
 
-  const content = renderSummaryContent(taskRow, sliceId, milestoneId);
+  const evidenceRows = getVerificationEvidence(milestoneId, sliceId, taskId);
+  const content = renderSummaryContent(taskRow, sliceId, milestoneId, evidenceRows);
   const dir = join(basePath, ".gsd", "milestones", milestoneId, "slices", sliceId, "tasks");
   mkdirSync(dir, { recursive: true });
   atomicWriteSync(join(dir, `${taskId}-SUMMARY.md`), content);
@@ -235,14 +264,18 @@ export function renderStateContent(state: GSDState): string {
   const lines: string[] = [];
   lines.push("# GSD State", "");
 
-  const activeMilestone = state.activeMilestone
-    ? `${state.activeMilestone.id}: ${state.activeMilestone.title}`
-    : "None";
   const activeSlice = state.activeSlice
     ? `${state.activeSlice.id}: ${state.activeSlice.title}`
     : "None";
 
-  lines.push(`**Active Milestone:** ${activeMilestone}`);
+  if (state.phase === 'complete' && state.lastCompletedMilestone) {
+    lines.push(`**Last Completed Milestone:** ${state.lastCompletedMilestone.id}: ${state.lastCompletedMilestone.title}`);
+  } else {
+    const activeMilestone = state.activeMilestone
+      ? `${state.activeMilestone.id}: ${state.activeMilestone.title}`
+      : "None";
+    lines.push(`**Active Milestone:** ${activeMilestone}`);
+  }
   lines.push(`**Active Slice:** ${activeSlice}`);
   lines.push(`**Phase:** ${state.phase}`);
   if (state.requirements) {
diff --git a/src/resources/extensions/gsd/workflow-reconcile.ts b/src/resources/extensions/gsd/workflow-reconcile.ts
index 4704501b0..216f1019a 100644
--- a/src/resources/extensions/gsd/workflow-reconcile.ts
+++ b/src/resources/extensions/gsd/workflow-reconcile.ts
@@ -1,19 +1,53 @@
 import { join } from "node:path";
 import { mkdirSync, existsSync, readFileSync, unlinkSync } from "node:fs";
+import { logWarning, logError } from "./workflow-logger.js";
 import { readEvents, findForkPoint, appendEvent, getSessionId } from "./workflow-events.js";
 import type { WorkflowEvent } from "./workflow-events.js";
 import {
   transaction,
   updateTaskStatus,
   updateSliceStatus,
+  getSliceTasks,
   insertVerificationEvidence,
   upsertDecision,
   openDatabase,
 } from "./gsd-db.js";
+import { isClosedStatus } from "./status-guards.js";
 import { writeManifest } from "./workflow-manifest.js";
 import { atomicWriteSync } from "./atomic-write.js";
 import { acquireSyncLock, releaseSyncLock } from "./sync-lock.js";
 
+// ─── Replay Helpers ──────────────────────────────────────────────────────────
+
+/**
+ * Replay a complete_slice event with task validation.
+ *
+ * #2945 Bug 2: The original replay blindly called updateSliceStatus("done")
+ * without checking whether all tasks in the slice are actually complete.
+ * During API overload or partial execution, a complete_slice event could
+ * be logged even when tasks were skipped, causing the milestone completion
+ * guard to see the slice as "done" and allow premature milestone completion.
+ *
+ * This function validates that every task in the slice has a closed status
+ * before marking the slice as done. If any task is still pending, the slice
+ * status is left unchanged.
+ */
+export function replaySliceComplete(milestoneId: string, sliceId: string, ts: string): void {
+  const tasks = getSliceTasks(milestoneId, sliceId);
+  // If there are tasks and any are not closed, skip the status update
+  if (tasks.length > 0) {
+    const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status));
+    if (incompleteTasks.length > 0) {
+      process.stderr.write(
+        `[gsd] reconcile: skipping complete_slice replay for ${sliceId} — ` +
+        `${incompleteTasks.length} task(s) still pending\n`,
+      );
+      return;
+    }
+  }
+  updateSliceStatus(milestoneId, sliceId, "done", ts);
+}
+
 // ─── Public Types ─────────────────────────────────────────────────────────────
 
 export interface ConflictEntry {
@@ -82,7 +116,8 @@ function replayEvents(events: WorkflowEvent[]): void {
       case "complete_slice": {
         const milestoneId = p["milestoneId"] as string;
         const sliceId = p["sliceId"] as string;
-        updateSliceStatus(milestoneId, sliceId, "done", event.ts);
+        // #2945 Bug 2: validate tasks before marking slice done
+        replaySliceComplete(milestoneId, sliceId, event.ts);
         break;
       }
       case "plan_slice": {
@@ -274,9 +309,7 @@ export function reconcileWorktreeLogs(
   // Acquire advisory lock to prevent concurrent reconcile + append races
   const lock = acquireSyncLock(mainBasePath);
   if (!lock.acquired) {
-    process.stderr.write(
-      `[gsd] reconcile: could not acquire sync lock — another reconciliation may be in progress\n`,
-    );
+    logWarning("reconcile", "could not acquire sync lock — another reconciliation may be in progress");
     return { autoMerged: 0, conflicts: [] };
   }
 
@@ -315,9 +348,7 @@ function _reconcileWorktreeLogsInner(
   if (conflicts.length > 0) {
     // D-04: atomic all-or-nothing — block entire merge
     writeConflictsFile(mainBasePath, conflicts, worktreeBasePath);
-    process.stderr.write(
-      `[gsd] reconcile: ${conflicts.length} conflict(s) detected — see ${join(mainBasePath, ".gsd", "CONFLICTS.md")}\n`,
-    );
+    logError("reconcile", `${conflicts.length} conflict(s) detected`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") });
     return { autoMerged: 0, conflicts };
   }
 
@@ -341,9 +372,7 @@ function _reconcileWorktreeLogsInner(
   try {
     writeManifest(mainBasePath);
   } catch (err) {
-    process.stderr.write(
-      `[gsd] reconcile: manifest write failed (non-fatal): ${(err as Error).message}\n`,
-    );
+    logWarning("reconcile", "manifest write failed (non-fatal)", { error: (err as Error).message });
   }
 
   return { autoMerged: merged.length, conflicts: [] };
diff --git a/src/resources/extensions/gsd/workspace-index.ts b/src/resources/extensions/gsd/workspace-index.ts
index 8b270662b..28fa95df1 100644
--- a/src/resources/extensions/gsd/workspace-index.ts
+++ b/src/resources/extensions/gsd/workspace-index.ts
@@ -11,6 +11,7 @@ import {
   resolveTasksDir,
 } from "./paths.js";
 import { deriveState } from "./state.js";
+import { extractVerdict } from "./verdict-parser.js";
 import { milestoneIdSort, findMilestoneIds } from "./guided-flow.js";
 import type { RiskLevel } from "./types.js";
 import { getSliceBranchName, detectWorktreeName } from "./worktree.js";
@@ -42,6 +43,10 @@ export interface WorkspaceMilestoneTarget {
   id: string;
   title: string;
   roadmapPath?: string;
+  /** Authoritative milestone lifecycle status from the GSD state registry. */
+  status?: "complete" | "active" | "pending" | "parked";
+  /** Milestone validation verdict, when validation has been performed. */
+  validationVerdict?: "pass" | "needs-attention" | "needs-remediation";
   slices: WorkspaceSliceTarget[];
 }
 
@@ -192,6 +197,31 @@ export async function indexWorkspace(basePath: string, opts: IndexWorkspaceOptio
     phase: state.phase,
   };
 
+  // Enrich milestones with authoritative status from state registry (#2807)
+  if (state.registry) {
+    const registryMap = new Map(state.registry.map(e => [e.id, e]));
+    for (const milestone of milestones) {
+      const entry = registryMap.get(milestone.id);
+      if (entry) {
+        milestone.status = entry.status;
+      }
+    }
+  }
+
+  // Populate validationVerdict from VALIDATION files (#2807)
+  for (const milestone of milestones) {
+    const validationPath = resolveMilestoneFile(basePath, milestone.id, "VALIDATION");
+    if (validationPath) {
+      const validationContent = await loadFile(validationPath);
+      if (validationContent) {
+        const verdict = extractVerdict(validationContent);
+        if (verdict === "pass" || verdict === "needs-attention" || verdict === "needs-remediation") {
+          milestone.validationVerdict = verdict;
+        }
+      }
+    }
+  }
+
   const scopes: WorkspaceScopeTarget[] = [{ scope: "project", label: "project", kind: "project" }];
   for (const milestone of milestones) {
     scopes.push({ scope: milestone.id, label: `${milestone.id}: ${milestone.title}`, kind: "milestone" });
diff --git a/src/resources/extensions/gsd/worktree-manager.ts b/src/resources/extensions/gsd/worktree-manager.ts
index 5cf93e387..b929e02e7 100644
--- a/src/resources/extensions/gsd/worktree-manager.ts
+++ b/src/resources/extensions/gsd/worktree-manager.ts
@@ -15,7 +15,7 @@
  *   4. remove()  — git worktree remove + branch cleanup
  */
 
-import { existsSync, mkdirSync, readFileSync, realpathSync, rmSync } from "node:fs";
+import { existsSync, lstatSync, mkdirSync, readdirSync, readFileSync, realpathSync, rmSync } from "node:fs";
 import { execFileSync } from "node:child_process";
 import { join, resolve, sep } from "node:path";
 import { GSDError, GSD_PARSE_ERROR, GSD_STALE_STATE, GSD_LOCK_HELD, GSD_GIT_ERROR, GSD_MERGE_CONFLICT } from "./errors.js";
@@ -277,6 +277,78 @@ export function listWorktrees(basePath: string): WorktreeInfo[] {
   return worktrees;
 }
 
+// ─── Nested .git Detection (#2616) ──────────────────────────────────────
+//
+// Scaffolding tools (create-next-app, cargo init, etc.) create nested .git
+// directories inside worktrees. Git records these as gitlinks (mode 160000)
+// without a .gitmodules entry — so worktree cleanup destroys the only copy
+// of their object database, causing permanent silent data loss.
+
+/** Directories to skip when scanning for nested .git dirs. */
+const NESTED_GIT_SKIP_DIRS = new Set([
+  ".git", ".gsd", "node_modules", ".next", ".nuxt", "dist", "build",
+  "__pycache__", ".tox", ".venv", "venv", "target", "vendor",
+]);
+
+/**
+ * Recursively find nested .git directories inside a worktree root.
+ * Returns paths to directories that contain their own .git (directory, not file).
+ * Skips node_modules, .gsd, and other non-project directories for performance.
+ *
+ * A nested .git *directory* (not a .git file — which is a legitimate worktree
+ * pointer) indicates a scaffolded repo that will become an orphaned gitlink.
+ */
+export function findNestedGitDirs(rootPath: string): string[] {
+  const results: string[] = [];
+
+  function walk(dir: string, depth: number): void {
+    // Cap recursion depth to avoid runaway scanning
+    if (depth > 10) return;
+
+    let entries: string[];
+    try {
+      entries = readdirSync(dir);
+    } catch {
+      return; // Permission denied, broken symlink, etc.
+    }
+
+    for (const entry of entries) {
+      if (NESTED_GIT_SKIP_DIRS.has(entry)) continue;
+
+      const fullPath = join(dir, entry);
+
+      // Only follow real directories, not symlinks
+      let stat;
+      try {
+        stat = lstatSync(fullPath);
+      } catch {
+        continue;
+      }
+      if (!stat.isDirectory()) continue;
+
+      // Check if this directory contains a .git *directory* (not a .git file).
+      // A .git file is a worktree pointer and is legitimate.
+      // A .git directory is a standalone repo created by scaffolding.
+      const innerGit = join(fullPath, ".git");
+      try {
+        const innerStat = lstatSync(innerGit);
+        if (innerStat.isDirectory()) {
+          results.push(fullPath);
+          // Don't recurse into the nested repo — we found what we need
+          continue;
+        }
+      } catch {
+        // No .git here — continue scanning
+      }
+
+      walk(fullPath, depth + 1);
+    }
+  }
+
+  walk(rootPath, 0);
+  return results;
+}
+
 /**
  * Remove a worktree and optionally delete its branch.
  * If the process is currently inside the worktree, chdir out first.
@@ -355,6 +427,30 @@ export function removeWorktree(
     }
   }
 
+  // Nested .git safety (#2616): detect nested .git directories created by
+  // scaffolding tools (create-next-app, cargo init, etc.). These produce
+  // gitlink entries (mode 160000) without .gitmodules — cleanup would destroy
+  // the only copy of the nested object database, causing permanent data loss.
+  // Fix: remove the nested .git dirs so git tracks the files as regular content.
+  const nestedGitDirs = findNestedGitDirs(resolvedWtPath);
+  if (nestedGitDirs.length > 0) {
+    for (const nestedDir of nestedGitDirs) {
+      const nestedGitPath = join(nestedDir, ".git");
+      try {
+        rmSync(nestedGitPath, { recursive: true, force: true });
+        logWarning("reconcile",
+          `Removed nested .git directory from scaffolded project to prevent data loss (#2616)`,
+          { worktree: name, nestedRepo: nestedDir },
+        );
+      } catch {
+        logWarning("reconcile",
+          `Failed to remove nested .git directory — files may be lost as orphaned gitlink`,
+          { worktree: name, nestedRepo: nestedDir },
+        );
+      }
+    }
+  }
+
   // Remove worktree: try non-force first when submodules have changes,
   // falling back to force only after submodule state has been preserved.
   const useForce = hasSubmoduleChanges ? false : force;
@@ -365,6 +461,29 @@ export function removeWorktree(
     try { nativeWorktreeRemove(basePath, resolvedWtPath, true); } catch { /* may fail */ }
   }
 
+  // (#2821) If the worktree directory STILL exists after both native removal
+  // attempts (e.g. untracked files like ASSESSMENT/UAT-RESULT prevent git
+  // worktree remove), force-remove the git internal worktree metadata first,
+  // then remove the filesystem directory. Without this, the .git/worktrees/<name>
+  // lock prevents rmSync from cleaning up, and the orphaned worktree directory
+  // causes every subsequent `/gsd auto` to re-enter the stale worktree.
+  if (existsSync(resolvedWtPath)) {
+    try {
+      const wtInternalDir = join(basePath, ".git", "worktrees", name);
+      if (existsSync(wtInternalDir)) {
+        rmSync(wtInternalDir, { recursive: true, force: true });
+      }
+      rmSync(resolvedWtPath, { recursive: true, force: true });
+    } catch {
+      logWarning(
+        "reconcile",
+        `Worktree directory could not be removed after git internal cleanup: ${resolvedWtPath}. ` +
+          `Manual cleanup: rm -rf "${resolvedWtPath.replaceAll("\\", "/")}"`,
+        { worktree: name },
+      );
+    }
+  }
+
   // Prune stale entries so git knows the worktree is gone
   nativeWorktreePrune(basePath);
 
diff --git a/src/resources/extensions/gsd/worktree-resolver.ts b/src/resources/extensions/gsd/worktree-resolver.ts
index c84d44656..413096fe0 100644
--- a/src/resources/extensions/gsd/worktree-resolver.ts
+++ b/src/resources/extensions/gsd/worktree-resolver.ts
@@ -350,7 +350,13 @@ export class WorktreeResolver {
       data: { milestoneId, mode },
     });
 
-    if (mode === "none") {
+    // #2625: If we are physically inside an auto-worktree, we MUST merge
+    // regardless of the current isolation config. This prevents data loss when
+    // the default isolation mode changes between versions (e.g., "worktree" ->
+    // "none"): the worktree branch still holds real commits that need merging.
+    const inWorktree = this.deps.isInAutoWorktree(this.s.basePath) && this.s.originalBasePath;
+
+    if (mode === "none" && !inWorktree) {
       debugLog("WorktreeResolver", {
         action: "mergeAndExit",
         milestoneId,
@@ -361,8 +367,7 @@ export class WorktreeResolver {
     }
 
     if (
-      mode === "worktree" ||
-      (this.deps.isInAutoWorktree(this.s.basePath) && this.s.originalBasePath)
+      mode === "worktree" || inWorktree
     ) {
       this._mergeWorktreeMode(milestoneId, ctx);
     } else if (mode === "branch") {
@@ -432,6 +437,20 @@ export class WorktreeResolver {
           milestoneId,
           roadmapContent,
         );
+
+        // #2945 Bug 3: mergeMilestoneToMain performs best-effort worktree
+        // cleanup internally (step 12), but it can silently fail on Windows
+        // or when the worktree directory is locked. Perform a secondary
+        // teardown here to ensure the worktree is properly cleaned up.
+        // This is idempotent — if the worktree was already removed,
+        // teardownAutoWorktree handles the no-op case gracefully.
+        try {
+          this.deps.teardownAutoWorktree(originalBase, milestoneId);
+        } catch {
+          // Best-effort — the primary cleanup in mergeMilestoneToMain may
+          // have already removed the worktree.
+        }
+
         if (mergeResult.codeFilesChanged) {
           ctx.notify(
             `Milestone ${milestoneId} merged to main.${mergeResult.pushed ? " Pushed to remote." : ""}`,
diff --git a/src/resources/extensions/mcp-client/index.ts b/src/resources/extensions/mcp-client/index.ts
index 38d001aa1..f62173455 100644
--- a/src/resources/extensions/mcp-client/index.ts
+++ b/src/resources/extensions/mcp-client/index.ts
@@ -111,7 +111,11 @@ function readConfigs(): McpServerConfig[] {
 }
 
 function getServerConfig(name: string): McpServerConfig | undefined {
-	return readConfigs().find((s) => s.name === name);
+	const trimmed = name.trim();
+	return readConfigs().find((s) =>
+		s.name === trimmed ||
+		s.name.toLowerCase() === trimmed.toLowerCase(),
+	);
 }
 
 /** Resolve ${VAR} references in env values against process.env. */
@@ -131,12 +135,14 @@ function resolveEnv(env: Record<string, string>): Record<string, string> {
 }
 
 async function getOrConnect(name: string, signal?: AbortSignal): Promise<Client> {
-	const existing = connections.get(name);
-	if (existing) return existing.client;
-
 	const config = getServerConfig(name);
 	if (!config) throw new Error(`Unknown MCP server: "${name}". Use mcp_servers to list available servers.`);
 
+	// Always use config.name as the canonical cache key so that variant
+	// casing / whitespace still hits the same connection.
+	const existing = connections.get(config.name);
+	if (existing) return existing.client;
+
 	const client = new Client({ name: "gsd", version: "1.0.0" });
 	let transport: StdioClientTransport | StreamableHTTPClientTransport;
 
@@ -151,15 +157,15 @@ async function getOrConnect(name: string, signal?: AbortSignal): Promise<Client>
 	} else if (config.transport === "http" && config.url) {
 		const resolvedUrl = config.url.replace(
 			/\$\{([^}]+)\}/g,
-			(_, name) => process.env[name] ?? "",
+			(_, varName) => process.env[varName] ?? "",
 		);
 		transport = new StreamableHTTPClientTransport(new URL(resolvedUrl));
 	} else {
-		throw new Error(`Server "${name}" has unsupported transport: ${config.transport}`);
+		throw new Error(`Server "${config.name}" has unsupported transport: ${config.transport}`);
 	}
 
 	await client.connect(transport, { signal, timeout: 30000 });
-	connections.set(name, { client, transport });
+	connections.set(config.name, { client, transport });
 	return client;
 }
 
diff --git a/src/resources/extensions/mcp-client/tests/server-name-spaces.test.ts b/src/resources/extensions/mcp-client/tests/server-name-spaces.test.ts
new file mode 100644
index 000000000..1cdb30f6e
--- /dev/null
+++ b/src/resources/extensions/mcp-client/tests/server-name-spaces.test.ts
@@ -0,0 +1,55 @@
+/**
+ * Regression test for #3029 — mcp_discover fails for server names with spaces.
+ *
+ * The getServerConfig lookup must handle:
+ *   1. Exact match (already works)
+ *   2. Names with leading/trailing whitespace (trimming)
+ *   3. Case-insensitive matching (e.g. "Langgraph code" vs "langgraph Code")
+ *
+ * We test at the source level since getServerConfig is not exported.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const source = readFileSync(join(__dirname, "..", "index.ts"), "utf-8");
+
+test("#3029: getServerConfig trims whitespace from input name", () => {
+	assert.ok(
+		source.includes(".trim()"),
+		"getServerConfig should trim the input name before comparison",
+	);
+});
+
+test("#3029: getServerConfig performs case-insensitive matching", () => {
+	assert.ok(
+		source.includes(".toLowerCase()"),
+		"getServerConfig should compare names case-insensitively",
+	);
+});
+
+test("#3029: getOrConnect normalizes name for connection cache lookup", () => {
+	// The connections Map key must use the canonical (config) name, not the
+	// raw user input, so that subsequent lookups hit the cache even when the
+	// user's casing differs.
+	const getOrConnectMatch = source.match(
+		/async function getOrConnect\(name: string[\s\S]*?const existing = connections\.get\(/,
+	);
+	assert.ok(
+		getOrConnectMatch,
+		"getOrConnect function should exist",
+	);
+	// After the fix, getOrConnect should normalize the name via getServerConfig
+	// or use config.name as the canonical cache key.
+	assert.ok(
+		source.includes("connections.get(config.name") ||
+		source.includes("connections.set(config.name"),
+		"getOrConnect should use config.name (canonical) as the connections cache key",
+	);
+});
diff --git a/src/resources/extensions/ollama/index.ts b/src/resources/extensions/ollama/index.ts
new file mode 100644
index 000000000..3117fdd54
--- /dev/null
+++ b/src/resources/extensions/ollama/index.ts
@@ -0,0 +1,130 @@
+// GSD2 — Ollama Extension: First-class local LLM support
+/**
+ * Ollama Extension
+ *
+ * Auto-detects a running Ollama instance, discovers locally pulled models,
+ * and registers them as a first-class provider. No configuration required —
+ * if Ollama is running, models appear automatically.
+ *
+ * Features:
+ * - Auto-discovery of local models via /api/tags
+ * - Capability detection (vision, reasoning, context window)
+ * - /ollama slash commands for model management
+ * - ollama_manage tool for LLM-driven model operations
+ * - Zero-cost model registration (local inference)
+ *
+ * Respects OLLAMA_HOST env var for non-default endpoints.
+ */
+
+import { importExtensionModule, type ExtensionAPI } from "@gsd/pi-coding-agent";
+import type { OpenAICompletionsCompat } from "@gsd/pi-ai";
+import * as client from "./ollama-client.js";
+import { discoverModels, getOllamaOpenAIBaseUrl } from "./ollama-discovery.js";
+import { registerOllamaCommands } from "./ollama-commands.js";
+
+/** Default compat settings for Ollama models via OpenAI-compat endpoint */
+const OLLAMA_COMPAT: OpenAICompletionsCompat = {
+	supportsDeveloperRole: false,
+	supportsReasoningEffort: false,
+	supportsUsageInStreaming: false,
+	maxTokensField: "max_tokens",
+	supportsStore: false,
+};
+
+let toolsPromise: Promise<void> | null = null;
+
+async function registerOllamaTools(pi: ExtensionAPI): Promise<void> {
+	if (!toolsPromise) {
+		toolsPromise = (async () => {
+			const { registerOllamaTool } = await importExtensionModule<
+				typeof import("./ollama-tool.js")
+			>(import.meta.url, "./ollama-tool.js");
+			registerOllamaTool(pi);
+		})().catch((error) => {
+			toolsPromise = null;
+			throw error;
+		});
+	}
+	return toolsPromise;
+}
+
+/** Track whether we've registered models so we can clean up on shutdown */
+let providerRegistered = false;
+
+/**
+ * Probe Ollama and register discovered models.
+ * Safe to call multiple times — re-discovers and re-registers.
+ */
+async function probeAndRegister(pi: ExtensionAPI): Promise<boolean> {
+	const running = await client.isRunning();
+	if (!running) {
+		if (providerRegistered) {
+			pi.unregisterProvider("ollama");
+			providerRegistered = false;
+		}
+		return false;
+	}
+
+	const models = await discoverModels();
+	if (models.length === 0) return true; // Running but no models pulled
+
+	const baseUrl = getOllamaOpenAIBaseUrl();
+
+	pi.registerProvider("ollama", {
+		authMode: "none",
+		baseUrl,
+		api: "openai-completions",
+		isReady: () => true,
+		models: models.map((m) => ({
+			id: m.id,
+			name: m.name,
+			reasoning: m.reasoning,
+			input: m.input,
+			cost: m.cost,
+			contextWindow: m.contextWindow,
+			maxTokens: m.maxTokens,
+			compat: OLLAMA_COMPAT,
+		})),
+	});
+
+	providerRegistered = true;
+	return true;
+}
+
+export default function ollama(pi: ExtensionAPI) {
+	// Register slash commands immediately (they check Ollama availability themselves)
+	registerOllamaCommands(pi);
+
+	pi.on("session_start", async (_event, ctx) => {
+		// Register tool (deferred to avoid blocking startup)
+		if (ctx.hasUI) {
+			void registerOllamaTools(pi).catch((error) => {
+				ctx.ui.notify(
+					`Ollama tool failed to load: ${error instanceof Error ? error.message : String(error)}`,
+					"warning",
+				);
+			});
+		} else {
+			await registerOllamaTools(pi);
+		}
+
+		// Async probe — don't block startup
+		probeAndRegister(pi)
+			.then((found) => {
+				if (found && ctx.hasUI) {
+					ctx.ui.setStatus("ollama", "Ollama");
+				}
+			})
+			.catch(() => {
+				// Silently ignore probe failures
+			});
+	});
+
+	pi.on("session_shutdown", async () => {
+		if (providerRegistered) {
+			pi.unregisterProvider("ollama");
+			providerRegistered = false;
+		}
+		toolsPromise = null;
+	});
+}
diff --git a/src/resources/extensions/ollama/model-capabilities.ts b/src/resources/extensions/ollama/model-capabilities.ts
new file mode 100644
index 000000000..8209946c3
--- /dev/null
+++ b/src/resources/extensions/ollama/model-capabilities.ts
@@ -0,0 +1,145 @@
+// GSD2 — Known model capability table for Ollama models
+
+/**
+ * Maps well-known Ollama model families to their capabilities.
+ * Used to enrich auto-discovered models with accurate context windows,
+ * vision support, and reasoning detection.
+ *
+ * Fallback: estimate from parameter count if model isn't in the table.
+ */
+
+export interface ModelCapability {
+	contextWindow?: number;
+	maxTokens?: number;
+	input?: ("text" | "image")[];
+	reasoning?: boolean;
+}
+
+/**
+ * Known model family capabilities.
+ * Keys are matched as prefixes against the model name (before the colon/tag).
+ * More specific entries should appear first.
+ */
+const KNOWN_MODELS: Array<[pattern: string, caps: ModelCapability]> = [
+	// ─── Reasoning models ───────────────────────────────────────────────
+	["deepseek-r1", { contextWindow: 131072, reasoning: true }],
+	["qwq", { contextWindow: 131072, reasoning: true }],
+
+	// ─── Vision models ──────────────────────────────────────────────────
+	["llava", { contextWindow: 4096, input: ["text", "image"] }],
+	["bakllava", { contextWindow: 4096, input: ["text", "image"] }],
+	["moondream", { contextWindow: 8192, input: ["text", "image"] }],
+	["llama3.2-vision", { contextWindow: 131072, input: ["text", "image"] }],
+	["minicpm-v", { contextWindow: 4096, input: ["text", "image"] }],
+
+	// ─── Code models ────────────────────────────────────────────────────
+	["codestral", { contextWindow: 262144, maxTokens: 32768 }],
+	["qwen2.5-coder", { contextWindow: 131072, maxTokens: 32768 }],
+	["deepseek-coder-v2", { contextWindow: 131072, maxTokens: 16384 }],
+	["starcoder2", { contextWindow: 16384, maxTokens: 8192 }],
+	["codegemma", { contextWindow: 8192, maxTokens: 8192 }],
+	["codellama", { contextWindow: 16384, maxTokens: 8192 }],
+	["devstral", { contextWindow: 131072, maxTokens: 32768 }],
+
+	// ─── Llama family ───────────────────────────────────────────────────
+	["llama3.3", { contextWindow: 131072, maxTokens: 16384 }],
+	["llama3.2", { contextWindow: 131072, maxTokens: 16384 }],
+	["llama3.1", { contextWindow: 131072, maxTokens: 16384 }],
+	["llama3", { contextWindow: 8192, maxTokens: 8192 }],
+	["llama2", { contextWindow: 4096, maxTokens: 4096 }],
+
+	// ─── Qwen family ────────────────────────────────────────────────────
+	["qwen3", { contextWindow: 131072, maxTokens: 32768 }],
+	["qwen2.5", { contextWindow: 131072, maxTokens: 32768 }],
+	["qwen2", { contextWindow: 131072, maxTokens: 32768 }],
+
+	// ─── Gemma family ───────────────────────────────────────────────────
+	["gemma3", { contextWindow: 131072, maxTokens: 16384 }],
+	["gemma2", { contextWindow: 8192, maxTokens: 8192 }],
+
+	// ─── Mistral family ─────────────────────────────────────────────────
+	["mistral-large", { contextWindow: 131072, maxTokens: 16384 }],
+	["mistral-small", { contextWindow: 131072, maxTokens: 16384 }],
+	["mistral-nemo", { contextWindow: 131072, maxTokens: 16384 }],
+	["mistral", { contextWindow: 32768, maxTokens: 8192 }],
+	["mixtral", { contextWindow: 32768, maxTokens: 8192 }],
+
+	// ─── Phi family ─────────────────────────────────────────────────────
+	["phi4", { contextWindow: 16384, maxTokens: 16384 }],
+	["phi3.5", { contextWindow: 131072, maxTokens: 16384 }],
+	["phi3", { contextWindow: 131072, maxTokens: 4096 }],
+
+	// ─── Command R ──────────────────────────────────────────────────────
+	["command-r-plus", { contextWindow: 131072, maxTokens: 16384 }],
+	["command-r", { contextWindow: 131072, maxTokens: 16384 }],
+];
+
+/**
+ * Look up capabilities for a model by name.
+ * Matches the longest prefix from the known models table.
+ */
+export function getModelCapabilities(modelName: string): ModelCapability {
+	// Strip tag (everything after the colon) for matching
+	const baseName = modelName.split(":")[0].toLowerCase();
+
+	for (const [pattern, caps] of KNOWN_MODELS) {
+		if (baseName === pattern || baseName.startsWith(pattern)) {
+			return caps;
+		}
+	}
+
+	return {};
+}
+
+/**
+ * Estimate context window from parameter size string (e.g. "7B", "70B", "1.5B").
+ * Used as fallback when model isn't in the known table.
+ */
+export function estimateContextFromParams(parameterSize: string): number {
+	const match = parameterSize.match(/([\d.]+)\s*([BbMm])/);
+	if (!match) return 8192;
+
+	const size = parseFloat(match[1]);
+	const unit = match[2].toUpperCase();
+
+	// Convert to billions
+	const billions = unit === "M" ? size / 1000 : size;
+
+	// Rough heuristics: larger models tend to support larger contexts
+	if (billions >= 70) return 131072;
+	if (billions >= 30) return 65536;
+	if (billions >= 13) return 32768;
+	if (billions >= 7) return 16384;
+	return 8192;
+}
+
+/**
+ * Humanize a model name for display (e.g. "llama3.1:8b" → "Llama 3.1 8B").
+ */
+export function humanizeModelName(modelName: string): string {
+	const [base, tag] = modelName.split(":");
+
+	// Capitalize first letter, add spaces around version numbers
+	let name = base
+		.replace(/([a-z])(\d)/g, "$1 $2")
+		.replace(/(\d)([a-z])/g, "$1 $2")
+		.replace(/^./, (c) => c.toUpperCase());
+
+	// Clean up common patterns
+	name = name.replace(/\s*-\s*/g, " ");
+
+	if (tag && tag !== "latest") {
+		name += ` ${tag.toUpperCase()}`;
+	}
+
+	return name;
+}
+
+/**
+ * Format byte size for display (e.g. 4700000000 → "4.7 GB").
+ */
+export function formatModelSize(bytes: number): string {
+	if (bytes >= 1e9) return `${(bytes / 1e9).toFixed(1)} GB`;
+	if (bytes >= 1e6) return `${(bytes / 1e6).toFixed(1)} MB`;
+	return `${(bytes / 1e3).toFixed(0)} KB`;
+}
diff --git a/src/resources/extensions/ollama/ollama-client.ts b/src/resources/extensions/ollama/ollama-client.ts
new file mode 100644
index 000000000..d881fd013
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-client.ts
@@ -0,0 +1,196 @@
+// GSD2 — HTTP client for Ollama REST API
+
+/**
+ * Low-level HTTP client for the Ollama REST API.
+ * Respects the OLLAMA_HOST environment variable for non-default endpoints.
+ *
+ * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md
+ */
+
+import type {
+	OllamaPsResponse,
+	OllamaPullProgress,
+	OllamaShowResponse,
+	OllamaTagsResponse,
+	OllamaVersionResponse,
+} from "./types.js";
+
+const DEFAULT_HOST = "http://localhost:11434";
+const PROBE_TIMEOUT_MS = 1500;
+const REQUEST_TIMEOUT_MS = 10000;
+
+/**
+ * Get the Ollama host URL from OLLAMA_HOST or default.
+ */
+export function getOllamaHost(): string {
+	const host = process.env.OLLAMA_HOST;
+	if (!host) return DEFAULT_HOST;
+
+	// OLLAMA_HOST can be just a host:port without scheme
+	if (host.startsWith("http://") || host.startsWith("https://")) return host;
+	return `http://${host}`;
+}
+
+async function fetchWithTimeout(url: string, options: RequestInit = {}, timeoutMs = REQUEST_TIMEOUT_MS): Promise<Response> {
+	const controller = new AbortController();
+	const timeout = setTimeout(() => controller.abort(), timeoutMs);
+	try {
+		return await fetch(url, { ...options, signal: controller.signal });
+	} finally {
+		clearTimeout(timeout);
+	}
+}
+
+/**
+ * Check if Ollama is running and reachable.
+ */
+export async function isRunning(): Promise<boolean> {
+	try {
+		const response = await fetchWithTimeout(`${getOllamaHost()}/`, {}, PROBE_TIMEOUT_MS);
+		return response.ok;
+	} catch {
+		return false;
+	}
+}
+
+/**
+ * Get Ollama version.
+ */
+export async function getVersion(): Promise<string | null> {
+	try {
+		const response = await fetchWithTimeout(`${getOllamaHost()}/api/version`);
+		if (!response.ok) return null;
+		const data = (await response.json()) as OllamaVersionResponse;
+		return data.version;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * List all locally available models.
+ */
+export async function listModels(): Promise<OllamaTagsResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/tags`);
+	if (!response.ok) {
+		throw new Error(`Ollama /api/tags returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaTagsResponse;
+}
+
+/**
+ * Get detailed information about a specific model.
+ */
+export async function showModel(name: string): Promise<OllamaShowResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/show`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name }),
+	});
+	if (!response.ok) {
+		throw new Error(`Ollama /api/show returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaShowResponse;
+}
+
+/**
+ * List currently loaded/running models.
+ */
+export async function getRunningModels(): Promise<OllamaPsResponse> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/ps`);
+	if (!response.ok) {
+		throw new Error(`Ollama /api/ps returned ${response.status}: ${response.statusText}`);
+	}
+	return (await response.json()) as OllamaPsResponse;
+}
+
+/**
+ * Pull a model with streaming progress.
+ * Calls onProgress for each progress update.
+ * Returns when the pull is complete.
+ */
+export async function pullModel(
+	name: string,
+	onProgress?: (progress: OllamaPullProgress) => void,
+	signal?: AbortSignal,
+): Promise<void> {
+	const response = await fetch(`${getOllamaHost()}/api/pull`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name, stream: true }),
+		signal,
+	});
+
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/pull returned ${response.status}: ${text}`);
+	}
+
+	if (!response.body) {
+		throw new Error("Ollama /api/pull returned no body");
+	}
+
+	const reader = response.body.getReader();
+	const decoder = new TextDecoder();
+	let buffer = "";
+
+	while (true) {
+		const { done, value } = await reader.read();
+		if (done) break;
+
+		buffer += decoder.decode(value, { stream: true });
+		const lines = buffer.split("\n");
+		buffer = lines.pop() ?? "";
+
+		for (const line of lines) {
+			const trimmed = line.trim();
+			if (!trimmed) continue;
+			try {
+				const progress = JSON.parse(trimmed) as OllamaPullProgress;
+				onProgress?.(progress);
+			} catch {
+				// Skip malformed lines
+			}
+		}
+	}
+
+	// Process remaining buffer
+	if (buffer.trim()) {
+		try {
+			const progress = JSON.parse(buffer.trim()) as OllamaPullProgress;
+			onProgress?.(progress);
+		} catch {
+			// Ignore
+		}
+	}
+}
+
+/**
+ * Delete a local model.
+ */
+export async function deleteModel(name: string): Promise<void> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/delete`, {
+		method: "DELETE",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ name }),
+	});
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/delete returned ${response.status}: ${text}`);
+	}
+}
+
+/**
+ * Copy a model to a new name.
+ */
+export async function copyModel(source: string, destination: string): Promise<void> {
+	const response = await fetchWithTimeout(`${getOllamaHost()}/api/copy`, {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify({ source, destination }),
+	});
+	if (!response.ok) {
+		const text = await response.text();
+		throw new Error(`Ollama /api/copy returned ${response.status}: ${text}`);
+	}
+}
diff --git a/src/resources/extensions/ollama/ollama-commands.ts b/src/resources/extensions/ollama/ollama-commands.ts
new file mode 100644
index 000000000..81322c784
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-commands.ts
@@ -0,0 +1,248 @@
+// GSD2 — Ollama slash commands
+
+/**
+ * Registers /ollama slash commands for managing local Ollama models.
+ *
+ * Commands:
+ *   /ollama          — Show status (running?, version, loaded models)
+ *   /ollama list     — List all available local models with sizes
+ *   /ollama pull     — Pull a model with progress
+ *   /ollama remove   — Delete a local model
+ *   /ollama ps       — Show running models and resource usage
+ */
+
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Text } from "@gsd/pi-tui";
+import * as client from "./ollama-client.js";
+import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js";
+import { formatModelSize } from "./model-capabilities.js";
+
+export function registerOllamaCommands(pi: ExtensionAPI): void {
+	pi.registerCommand("ollama", {
+		description: "Manage local Ollama models — list | pull | remove | ps",
+		async handler(args, ctx) {
+			const parts = (args ?? "").trim().split(/\s+/);
+			const subcommand = parts[0] || "status";
+			const modelArg = parts.slice(1).join(" ");
+
+			switch (subcommand) {
+				case "status":
+					return await handleStatus(ctx);
+				case "list":
+				case "ls":
+					return await handleList(ctx);
+				case "pull":
+					return await handlePull(modelArg, ctx);
+				case "remove":
+				case "rm":
+				case "delete":
+					return await handleRemove(modelArg, ctx);
+				case "ps":
+					return await handlePs(ctx);
+				default:
+					ctx.ui.notify(
+						`Unknown subcommand: ${subcommand}. Use: status, list, pull, remove, ps`,
+						"warning",
+					);
+			}
+		},
+	});
+}
+
+async function handleStatus(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify(
+			"Ollama is not running. Install from https://ollama.com and run 'ollama serve'",
+			"warning",
+		);
+		return;
+	}
+
+	const version = await client.getVersion();
+	const lines: string[] = [];
+	lines.push(`Ollama${version ? ` v${version}` : ""} — running (${client.getOllamaHost()})`);
+
+	// Show loaded models
+	try {
+		const ps = await client.getRunningModels();
+		if (ps.models && ps.models.length > 0) {
+			lines.push("");
+			lines.push("Loaded:");
+			for (const m of ps.models) {
+				const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU";
+				const expiresAt = new Date(m.expires_at);
+				const idleMs = expiresAt.getTime() - Date.now();
+				const idleMin = Math.max(0, Math.floor(idleMs / 60000));
+				lines.push(`  ${m.name}  ${vram}  expires in ${idleMin}m`);
+			}
+		}
+	} catch {
+		// ps endpoint may not be available on older versions
+	}
+
+	// Show available models
+	try {
+		const models = await discoverModels();
+		if (models.length > 0) {
+			lines.push("");
+			lines.push("Available:");
+			for (const m of models) {
+				lines.push(`  ${formatModelForDisplay(m)}`);
+			}
+		} else {
+			lines.push("");
+			lines.push("No models pulled. Use /ollama pull <model> to get started.");
+		}
+	} catch (err) {
+		lines.push("");
+		lines.push(`Error listing models: ${err instanceof Error ? err.message : String(err)}`);
+	}
+
+	await ctx.ui.custom(
+		(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+			const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+			setTimeout(() => done(undefined), 0);
+			return text;
+		},
+	);
+}
+
+async function handleList(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	const models = await discoverModels();
+	if (models.length === 0) {
+		ctx.ui.notify("No models available. Use /ollama pull <model> to download one.", "info");
+		return;
+	}
+
+	const lines = ["Local Ollama models:", ""];
+	for (const m of models) {
+		lines.push(`  ${formatModelForDisplay(m)}`);
+	}
+
+	await ctx.ui.custom(
+		(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+			const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+			setTimeout(() => done(undefined), 0);
+			return text;
+		},
+	);
+}
+
+async function handlePull(modelName: string, ctx: any): Promise<void> {
+	if (!modelName) {
+		ctx.ui.notify("Usage: /ollama pull <model> (e.g. /ollama pull llama3.1:8b)", "warning");
+		return;
+	}
+
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	ctx.ui.setWidget("ollama-pull", [`Pulling ${modelName}...`]);
+
+	try {
+		let lastPercent = -1;
+		await client.pullModel(modelName, (progress) => {
+			if (progress.total && progress.completed) {
+				const percent = Math.floor((progress.completed / progress.total) * 100);
+				if (percent !== lastPercent) {
+					lastPercent = percent;
+					const completed = formatModelSize(progress.completed);
+					const total = formatModelSize(progress.total);
+					ctx.ui.setWidget("ollama-pull", [
+						`Pulling ${modelName}... ${percent}% (${completed} / ${total})`,
+					]);
+				}
+			} else if (progress.status) {
+				ctx.ui.setWidget("ollama-pull", [`${modelName}: ${progress.status}`]);
+			}
+		});
+
+		ctx.ui.setWidget("ollama-pull", undefined);
+		ctx.ui.notify(`${modelName} pulled successfully`, "success");
+	} catch (err) {
+		ctx.ui.setWidget("ollama-pull", undefined);
+		ctx.ui.notify(
+			`Failed to pull ${modelName}: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
+
+async function handleRemove(modelName: string, ctx: any): Promise<void> {
+	if (!modelName) {
+		ctx.ui.notify("Usage: /ollama remove <model>", "warning");
+		return;
+	}
+
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	const confirmed = await ctx.ui.confirm(
+		"Delete model",
+		`Are you sure you want to delete ${modelName}?`,
+	);
+
+	if (!confirmed) return;
+
+	try {
+		await client.deleteModel(modelName);
+		ctx.ui.notify(`${modelName} deleted`, "success");
+	} catch (err) {
+		ctx.ui.notify(
+			`Failed to delete ${modelName}: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
+
+async function handlePs(ctx: any): Promise<void> {
+	const running = await client.isRunning();
+	if (!running) {
+		ctx.ui.notify("Ollama is not running", "warning");
+		return;
+	}
+
+	try {
+		const ps = await client.getRunningModels();
+		if (!ps.models || ps.models.length === 0) {
+			ctx.ui.notify("No models currently loaded in memory", "info");
+			return;
+		}
+
+		const lines = ["Running models:", ""];
+		for (const m of ps.models) {
+			const vram = m.size_vram > 0 ? formatModelSize(m.size_vram) + " VRAM" : "CPU only";
+			const totalSize = formatModelSize(m.size);
+			const expiresAt = new Date(m.expires_at);
+			const idleMs = expiresAt.getTime() - Date.now();
+			const idleMin = Math.max(0, Math.floor(idleMs / 60000));
+			lines.push(`  ${m.name}  ${totalSize}  ${vram}  expires in ${idleMin}m`);
+		}
+
+		await ctx.ui.custom(
+			(tui: any, theme: any, _kb: any, done: (r: undefined) => void) => {
+				const text = new Text(lines.map((l) => theme.fg("fg", l)).join("\n"), 0, 0);
+				setTimeout(() => done(undefined), 0);
+				return text;
+			},
+		);
+	} catch (err) {
+		ctx.ui.notify(
+			`Failed to get running models: ${err instanceof Error ? err.message : String(err)}`,
+			"error",
+		);
+	}
+}
diff --git a/src/resources/extensions/ollama/ollama-discovery.ts b/src/resources/extensions/ollama/ollama-discovery.ts
new file mode 100644
index 000000000..eb6916b9e
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-discovery.ts
@@ -0,0 +1,106 @@
+// GSD2 — Ollama model discovery and capability detection
+
+/**
+ * Discovers locally available Ollama models and enriches them with
+ * capability metadata (context window, vision, reasoning) from the
+ * known model table and /api/show responses.
+ *
+ * Returns models in the format expected by pi.registerProvider().
+ */
+
+import { listModels, getOllamaHost } from "./ollama-client.js";
+import {
+	estimateContextFromParams,
+	formatModelSize,
+	getModelCapabilities,
+	humanizeModelName,
+} from "./model-capabilities.js";
+import type { OllamaModelInfo } from "./types.js";
+
+export interface DiscoveredOllamaModel {
+	id: string;
+	name: string;
+	reasoning: boolean;
+	input: ("text" | "image")[];
+	cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
+	contextWindow: number;
+	maxTokens: number;
+	/** Raw size in bytes for display purposes */
+	sizeBytes: number;
+	/** Parameter size string from Ollama (e.g. "7B") */
+	parameterSize: string;
+}
+
+const ZERO_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
+
+function enrichModel(info: OllamaModelInfo): DiscoveredOllamaModel {
+	const caps = getModelCapabilities(info.name);
+	const parameterSize = info.details?.parameter_size ?? "";
+
+	// Determine context window: known table > estimate from param size > default
+	const contextWindow =
+		caps.contextWindow ??
+		(parameterSize ? estimateContextFromParams(parameterSize) : 8192);
+
+	// Determine max tokens: known table > fraction of context > default
+	const maxTokens =
+		caps.maxTokens ?? Math.min(Math.floor(contextWindow / 4), 16384);
+
+	// Detect vision from families or known table
+	const hasVision =
+		caps.input?.includes("image") ??
+		(info.details?.families?.some((f) => f === "clip" || f === "mllama") ?? false);
+
+	// Detect reasoning from known table
+	const reasoning = caps.reasoning ?? false;
+
+	return {
+		id: info.name,
+		name: humanizeModelName(info.name),
+		reasoning,
+		input: hasVision ? ["text", "image"] : ["text"],
+		cost: ZERO_COST,
+		contextWindow,
+		maxTokens,
+		sizeBytes: info.size,
+		parameterSize,
+	};
+}
+
+/**
+ * Discover all locally available Ollama models with enriched capabilities.
+ */
+export async function discoverModels(): Promise<DiscoveredOllamaModel[]> {
+	const tags = await listModels();
+	if (!tags.models || tags.models.length === 0) return [];
+
+	return tags.models.map(enrichModel);
+}
+
+/**
+ * Format a discovered model for display in model list.
+ */
+export function formatModelForDisplay(model: DiscoveredOllamaModel): string {
+	const parts = [model.id];
+
+	if (model.sizeBytes > 0) {
+		parts.push(`(${formatModelSize(model.sizeBytes)})`);
+	}
+
+	const flags: string[] = [];
+	if (model.reasoning) flags.push("reasoning");
+	if (model.input.includes("image")) flags.push("vision");
+
+	if (flags.length > 0) {
+		parts.push(`[${flags.join(", ")}]`);
+	}
+
+	return parts.join(" ");
+}
+
+/**
+ * Build the OpenAI-compat base URL for Ollama.
+ */
+export function getOllamaOpenAIBaseUrl(): string {
+	return `${getOllamaHost()}/v1`;
+}
diff --git a/src/resources/extensions/ollama/ollama-tool.ts b/src/resources/extensions/ollama/ollama-tool.ts
new file mode 100644
index 000000000..ad5af5885
--- /dev/null
+++ b/src/resources/extensions/ollama/ollama-tool.ts
@@ -0,0 +1,218 @@
+// GSD2 — LLM-callable Ollama management tool
+/**
+ * Registers an ollama_manage tool that the LLM can call to interact
+ * with the local Ollama instance — list models, pull new ones, check status.
+ */
+
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+import { Text } from "@gsd/pi-tui";
+import { Type } from "@sinclair/typebox";
+import * as client from "./ollama-client.js";
+import { discoverModels, formatModelForDisplay } from "./ollama-discovery.js";
+import { formatModelSize } from "./model-capabilities.js";
+
+interface OllamaToolDetails {
+	action: string;
+	model?: string;
+	modelCount?: number;
+	durationMs: number;
+	error?: string;
+}
+
+export function registerOllamaTool(pi: ExtensionAPI): void {
+	pi.registerTool({
+		name: "ollama_manage",
+		label: "Ollama",
+		description:
+			"Manage local Ollama models. List available models, pull new ones, " +
+			"check Ollama status, or see running models and resource usage. " +
+			"Use this when you need a specific local model that isn't available yet.",
+		promptSnippet: "Manage local Ollama models (list, pull, status, ps)",
+		promptGuidelines: [
+			"Use 'list' to see what models are available locally before trying to use one.",
+			"Use 'pull' to download a model that isn't available yet.",
+			"Use 'status' to check if Ollama is running.",
+			"Use 'ps' to see which models are loaded in memory and VRAM usage.",
+			"Common models: llama3.1:8b, qwen2.5-coder:7b, deepseek-r1:8b, codestral:22b",
+		],
+		parameters: Type.Object({
+			action: Type.Union(
+				[
+					Type.Literal("list"),
+					Type.Literal("pull"),
+					Type.Literal("status"),
+					Type.Literal("ps"),
+				],
+				{ description: "Action to perform" },
+			),
+			model: Type.Optional(
+				Type.String({ description: "Model name (required for pull)" }),
+			),
+		}),
+
+		async execute(_toolCallId, params, signal, onUpdate, _ctx) {
+			const startTime = Date.now();
+			const { action, model } = params;
+
+			try {
+				switch (action) {
+					case "status": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running. It needs to be started with 'ollama serve'." }],
+								details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+						const version = await client.getVersion();
+						return {
+							content: [{ type: "text", text: `Ollama${version ? ` v${version}` : ""} is running at ${client.getOllamaHost()}` }],
+							details: { action, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "list": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						const models = await discoverModels();
+						if (models.length === 0) {
+							return {
+								content: [{ type: "text", text: "No models available. Pull one with action='pull'." }],
+								details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+
+						const lines = models.map((m) => formatModelForDisplay(m));
+						return {
+							content: [{ type: "text", text: `Available models:\n${lines.join("\n")}` }],
+							details: { action, modelCount: models.length, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "pull": {
+						if (!model) {
+							return {
+								content: [{ type: "text", text: "Error: 'model' parameter is required for pull action." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "missing_model" } as OllamaToolDetails,
+							};
+						}
+
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, model, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						let lastStatus = "";
+						await client.pullModel(model, (progress) => {
+							if (progress.total && progress.completed) {
+								const pct = Math.floor((progress.completed / progress.total) * 100);
+								const status = `Pulling ${model}... ${pct}%`;
+								if (status !== lastStatus) {
+									lastStatus = status;
+									onUpdate?.({ content: [{ type: "text", text: status }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails });
+								}
+							} else if (progress.status && progress.status !== lastStatus) {
+								lastStatus = progress.status;
+								onUpdate?.({ content: [{ type: "text", text: `${model}: ${progress.status}` }], details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails });
+							}
+						}, signal);
+
+						return {
+							content: [{ type: "text", text: `Successfully pulled ${model}` }],
+							details: { action, model, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					case "ps": {
+						const running = await client.isRunning();
+						if (!running) {
+							return {
+								content: [{ type: "text", text: "Ollama is not running." }],
+								isError: true,
+								details: { action, durationMs: Date.now() - startTime, error: "not_running" } as OllamaToolDetails,
+							};
+						}
+
+						const ps = await client.getRunningModels();
+						if (!ps.models || ps.models.length === 0) {
+							return {
+								content: [{ type: "text", text: "No models currently loaded in memory." }],
+								details: { action, modelCount: 0, durationMs: Date.now() - startTime } as OllamaToolDetails,
+							};
+						}
+
+						const lines = ps.models.map((m) => {
+							const vram = m.size_vram > 0 ? `${formatModelSize(m.size_vram)} VRAM` : "CPU";
+							return `${m.name} — ${formatModelSize(m.size)} total, ${vram}`;
+						});
+
+						return {
+							content: [{ type: "text", text: `Loaded models:\n${lines.join("\n")}` }],
+							details: { action, modelCount: ps.models.length, durationMs: Date.now() - startTime } as OllamaToolDetails,
+						};
+					}
+
+					default:
+						return {
+							content: [{ type: "text", text: `Unknown action: ${action}` }],
+							isError: true,
+							details: { action, durationMs: Date.now() - startTime, error: "unknown_action" } as OllamaToolDetails,
+						};
+				}
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				return {
+					content: [{ type: "text", text: `Ollama error: ${msg}` }],
+					isError: true,
+					details: { action, model, durationMs: Date.now() - startTime, error: msg } as OllamaToolDetails,
+				};
+			}
+		},
+
+		renderCall(args, theme) {
+			let text = theme.fg("toolTitle", theme.bold("ollama "));
+			text += theme.fg("accent", args.action);
+			if (args.model) {
+				text += theme.fg("dim", ` ${args.model}`);
+			}
+			return new Text(text, 0, 0);
+		},
+
+		renderResult(result, { isPartial, expanded }, theme) {
+			const d = result.details as OllamaToolDetails | undefined;
+
+			if (isPartial) return new Text(theme.fg("warning", "Working..."), 0, 0);
+			if ((result as any).isError || d?.error) {
+				return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+			}
+
+			let text = theme.fg("success", d?.action ?? "done");
+			if (d?.modelCount !== undefined) {
+				text += theme.fg("dim", ` (${d.modelCount} models)`);
+			}
+			text += theme.fg("dim", ` ${d?.durationMs ?? 0}ms`);
+
+			if (expanded) {
+				const content = result.content[0];
+				if (content?.type === "text") {
+					const preview = content.text.split("\n").slice(0, 10).join("\n");
+					text += "\n\n" + theme.fg("dim", preview);
+				}
+			}
+
+			return new Text(text, 0, 0);
+		},
+	});
+}
diff --git a/src/resources/extensions/ollama/tests/model-capabilities.test.ts b/src/resources/extensions/ollama/tests/model-capabilities.test.ts
new file mode 100644
index 000000000..61af68e9b
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/model-capabilities.test.ts
@@ -0,0 +1,162 @@
+// GSD2 — Tests for Ollama model capability detection
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import {
+	getModelCapabilities,
+	estimateContextFromParams,
+	humanizeModelName,
+	formatModelSize,
+} from "../model-capabilities.js";
+
+// ─── getModelCapabilities ────────────────────────────────────────────────────
+
+describe("getModelCapabilities", () => {
+	it("returns reasoning for deepseek-r1 models", () => {
+		const caps = getModelCapabilities("deepseek-r1:8b");
+		assert.equal(caps.reasoning, true);
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns reasoning for qwq models", () => {
+		const caps = getModelCapabilities("qwq:32b");
+		assert.equal(caps.reasoning, true);
+	});
+
+	it("returns vision for llava models", () => {
+		const caps = getModelCapabilities("llava:7b");
+		assert.deepEqual(caps.input, ["text", "image"]);
+	});
+
+	it("returns vision for llama3.2-vision models", () => {
+		const caps = getModelCapabilities("llama3.2-vision:11b");
+		assert.deepEqual(caps.input, ["text", "image"]);
+	});
+
+	it("returns correct context for llama3.1", () => {
+		const caps = getModelCapabilities("llama3.1:8b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns correct context for llama3 (no .1)", () => {
+		const caps = getModelCapabilities("llama3:8b");
+		assert.equal(caps.contextWindow, 8192);
+	});
+
+	it("returns correct context for llama2", () => {
+		const caps = getModelCapabilities("llama2:7b");
+		assert.equal(caps.contextWindow, 4096);
+	});
+
+	it("returns correct context for qwen2.5-coder", () => {
+		const caps = getModelCapabilities("qwen2.5-coder:7b");
+		assert.equal(caps.contextWindow, 131072);
+		assert.equal(caps.maxTokens, 32768);
+	});
+
+	it("returns correct context for codestral", () => {
+		const caps = getModelCapabilities("codestral:22b");
+		assert.equal(caps.contextWindow, 262144);
+	});
+
+	it("returns correct context for mistral-nemo", () => {
+		const caps = getModelCapabilities("mistral-nemo:12b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns correct context for gemma3", () => {
+		const caps = getModelCapabilities("gemma3:9b");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("returns empty object for unknown models", () => {
+		const caps = getModelCapabilities("totally-unknown-model:3b");
+		assert.deepEqual(caps, {});
+	});
+
+	it("strips tag before matching", () => {
+		const caps = getModelCapabilities("llama3.1:70b-instruct-q4_0");
+		assert.equal(caps.contextWindow, 131072);
+	});
+
+	it("matches case-insensitively", () => {
+		const caps = getModelCapabilities("Llama3.1:8B");
+		assert.equal(caps.contextWindow, 131072);
+	});
+});
+
+// ─── estimateContextFromParams ───────────────────────────────────────────────
+
+describe("estimateContextFromParams", () => {
+	it("estimates 8192 for small models", () => {
+		assert.equal(estimateContextFromParams("1.5B"), 8192);
+	});
+
+	it("estimates 16384 for 7B models", () => {
+		assert.equal(estimateContextFromParams("7B"), 16384);
+	});
+
+	it("estimates 32768 for 13B models", () => {
+		assert.equal(estimateContextFromParams("13B"), 32768);
+	});
+
+	it("estimates 65536 for 34B models", () => {
+		assert.equal(estimateContextFromParams("34B"), 65536);
+	});
+
+	it("estimates 131072 for 70B+ models", () => {
+		assert.equal(estimateContextFromParams("70B"), 131072);
+	});
+
+	it("handles decimal sizes", () => {
+		assert.equal(estimateContextFromParams("7.5B"), 16384);
+	});
+
+	it("handles M (millions)", () => {
+		assert.equal(estimateContextFromParams("500M"), 8192);
+	});
+
+	it("returns 8192 for unparseable input", () => {
+		assert.equal(estimateContextFromParams("unknown"), 8192);
+	});
+
+	it("returns 8192 for empty string", () => {
+		assert.equal(estimateContextFromParams(""), 8192);
+	});
+});
+
+// ─── humanizeModelName ───────────────────────────────────────────────────────
+
+describe("humanizeModelName", () => {
+	it("capitalizes and adds tag", () => {
+		assert.equal(humanizeModelName("llama3.1:8b"), "Llama 3.1 8B");
+	});
+
+	it("handles latest tag", () => {
+		assert.equal(humanizeModelName("llama3.1:latest"), "Llama 3.1");
+	});
+
+	it("handles no tag", () => {
+		assert.equal(humanizeModelName("llama3.1"), "Llama 3.1");
+	});
+
+	it("handles hyphenated names", () => {
+		const result = humanizeModelName("deepseek-r1:8b");
+		assert.ok(result.includes("8B"));
+	});
+});
+
+// ─── formatModelSize ─────────────────────────────────────────────────────────
+
+describe("formatModelSize", () => {
+	it("formats GB", () => {
+		assert.equal(formatModelSize(4_700_000_000), "4.7 GB");
+	});
+
+	it("formats MB", () => {
+		assert.equal(formatModelSize(500_000_000), "500.0 MB");
+	});
+
+	it("formats KB", () => {
+		assert.equal(formatModelSize(500_000), "500 KB");
+	});
+});
diff --git a/src/resources/extensions/ollama/tests/ollama-client.test.ts b/src/resources/extensions/ollama/tests/ollama-client.test.ts
new file mode 100644
index 000000000..0deae397a
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/ollama-client.test.ts
@@ -0,0 +1,38 @@
+// GSD2 — Tests for Ollama HTTP client
+import { describe, it, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { getOllamaHost } from "../ollama-client.js";
+
+// ─── getOllamaHost ──────────────────────────────────────────────────────────
+
+describe("getOllamaHost", () => {
+	const originalHost = process.env.OLLAMA_HOST;
+
+	afterEach(() => {
+		if (originalHost === undefined) {
+			delete process.env.OLLAMA_HOST;
+		} else {
+			process.env.OLLAMA_HOST = originalHost;
+		}
+	});
+
+	it("returns default when OLLAMA_HOST is not set", () => {
+		delete process.env.OLLAMA_HOST;
+		assert.equal(getOllamaHost(), "http://localhost:11434");
+	});
+
+	it("returns OLLAMA_HOST when set with scheme", () => {
+		process.env.OLLAMA_HOST = "http://myhost:12345";
+		assert.equal(getOllamaHost(), "http://myhost:12345");
+	});
+
+	it("adds http:// when OLLAMA_HOST has no scheme", () => {
+		process.env.OLLAMA_HOST = "myhost:12345";
+		assert.equal(getOllamaHost(), "http://myhost:12345");
+	});
+
+	it("preserves https:// scheme", () => {
+		process.env.OLLAMA_HOST = "https://secure-ollama.example.com";
+		assert.equal(getOllamaHost(), "https://secure-ollama.example.com");
+	});
+});
diff --git a/src/resources/extensions/ollama/tests/ollama-discovery.test.ts b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts
new file mode 100644
index 000000000..b69cf84e1
--- /dev/null
+++ b/src/resources/extensions/ollama/tests/ollama-discovery.test.ts
@@ -0,0 +1,28 @@
+// GSD2 — Tests for Ollama model discovery and enrichment
+import { describe, it, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { getOllamaOpenAIBaseUrl } from "../ollama-discovery.js";
+
+// ─── getOllamaOpenAIBaseUrl ─────────────────────────────────────────────────
+
+describe("getOllamaOpenAIBaseUrl", () => {
+	const originalHost = process.env.OLLAMA_HOST;
+
+	afterEach(() => {
+		if (originalHost === undefined) {
+			delete process.env.OLLAMA_HOST;
+		} else {
+			process.env.OLLAMA_HOST = originalHost;
+		}
+	});
+
+	it("returns default OpenAI-compat URL", () => {
+		delete process.env.OLLAMA_HOST;
+		assert.equal(getOllamaOpenAIBaseUrl(), "http://localhost:11434/v1");
+	});
+
+	it("appends /v1 to custom OLLAMA_HOST", () => {
+		process.env.OLLAMA_HOST = "http://remote:9999";
+		assert.equal(getOllamaOpenAIBaseUrl(), "http://remote:9999/v1");
+	});
+});
diff --git a/src/resources/extensions/ollama/types.ts b/src/resources/extensions/ollama/types.ts
new file mode 100644
index 000000000..5f2c88705
--- /dev/null
+++ b/src/resources/extensions/ollama/types.ts
@@ -0,0 +1,130 @@
+// GSD2 — Ollama API response types
+
+/**
+ * Type definitions for the Ollama REST API.
+ * Reference: https://github.com/ollama/ollama/blob/main/docs/api.md
+ */
+
+// ─── /api/tags ──────────────────────────────────────────────────────────────
+
+export interface OllamaModelDetails {
+	parent_model: string;
+	format: string;
+	family: string;
+	families: string[] | null;
+	parameter_size: string;
+	quantization_level: string;
+}
+
+export interface OllamaModelInfo {
+	name: string;
+	model: string;
+	modified_at: string;
+	size: number;
+	digest: string;
+	details: OllamaModelDetails;
+}
+
+export interface OllamaTagsResponse {
+	models: OllamaModelInfo[];
+}
+
+// ─── /api/show ──────────────────────────────────────────────────────────────
+
+export interface OllamaShowResponse {
+	modelfile: string;
+	parameters: string;
+	template: string;
+	details: OllamaModelDetails;
+	model_info: Record<string, unknown>;
+}
+
+// ─── /api/ps ────────────────────────────────────────────────────────────────
+
+export interface OllamaRunningModel {
+	name: string;
+	model: string;
+	size: number;
+	digest: string;
+	details: OllamaModelDetails;
+	expires_at: string;
+	size_vram: number;
+}
+
+export interface OllamaPsResponse {
+	models: OllamaRunningModel[];
+}
+
+// ─── /api/pull ──────────────────────────────────────────────────────────────
+
+export interface OllamaPullProgress {
+	status: string;
+	digest?: string;
+	total?: number;
+	completed?: number;
+}
+
+// ─── /api/version ───────────────────────────────────────────────────────────
+
+export interface OllamaVersionResponse {
+	version: string;
+}
+
+// ─── /api/chat ──────────────────────────────────────────────────────────────
+
+export interface OllamaChatMessage {
+	role: "system" | "user" | "assistant" | "tool";
+	content: string;
+	images?: string[];
+	tool_calls?: OllamaToolCall[];
+}
+
+export interface OllamaToolCall {
+	function: {
+		name: string;
+		arguments: Record<string, unknown>;
+	};
+}
+
+export interface OllamaTool {
+	type: "function";
+	function: {
+		name: string;
+		description: string;
+		parameters: {
+			type: "object";
+			required?: string[];
+			properties: Record<string, unknown>;
+		};
+	};
+}
+
+export interface OllamaChatRequest {
+	model: string;
+	messages: OllamaChatMessage[];
+	stream?: boolean;
+	tools?: OllamaTool[];
+	options?: {
+		num_ctx?: number;
+		num_predict?: number;
+		temperature?: number;
+		top_p?: number;
+		top_k?: number;
+		stop?: string[];
+	};
+	keep_alive?: string;
+}
+
+export interface OllamaChatResponse {
+	model: string;
+	created_at: string;
+	message: OllamaChatMessage;
+	done: boolean;
+	done_reason?: string;
+	total_duration?: number;
+	load_duration?: number;
+	prompt_eval_count?: number;
+	prompt_eval_duration?: number;
+	eval_count?: number;
+	eval_duration?: number;
+}
diff --git a/src/resources/extensions/search-the-web/extension-manifest.json b/src/resources/extensions/search-the-web/extension-manifest.json
index 582c341d8..b17107d76 100644
--- a/src/resources/extensions/search-the-web/extension-manifest.json
+++ b/src/resources/extensions/search-the-web/extension-manifest.json
@@ -8,6 +8,6 @@
   "provides": {
     "tools": ["search-the-web", "fetch_page", "search_and_read", "web_search"],
     "commands": ["search-provider"],
-    "hooks": ["model_select", "before_provider_request"]
+    "hooks": ["session_start", "model_select", "before_provider_request"]
   }
 }
diff --git a/src/resources/extensions/shared/interview-ui.ts b/src/resources/extensions/shared/interview-ui.ts
index 823568330..99a3501b8 100644
--- a/src/resources/extensions/shared/interview-ui.ts
+++ b/src/resources/extensions/shared/interview-ui.ts
@@ -105,7 +105,7 @@ export interface WrapUpOptions {
 // ─── Constants ────────────────────────────────────────────────────────────────
 
 const OTHER_OPTION_LABEL = "None of the above";
-const OTHER_OPTION_DESCRIPTION = "Press TAB to add optional notes.";
+const OTHER_OPTION_DESCRIPTION = "Select to type your own answer.";
 
 // ─── Wrap-up screen ───────────────────────────────────────────────────────────
 
@@ -295,6 +295,17 @@ export async function showInterviewRound(
 				states[currentIdx].committedIndex = states[currentIdx].cursorIndex;
 			}
 
+			// Auto-open the notes field when "None of the above" is selected
+			// so the user can immediately provide a free-text explanation
+			// instead of being trapped in a re-asking loop (bug #2715).
+			if (!isMultiSelect(currentIdx) && states[currentIdx].cursorIndex === noneOrDoneIdx(currentIdx)) {
+				states[currentIdx].notesVisible = true;
+				focusNotes = true;
+				loadStateToEditor();
+				refresh();
+				return;
+			}
+
 			if (isMultiQuestion && currentIdx < questions.length - 1) {
 				let next = currentIdx + 1;
 				for (let i = 0; i < questions.length; i++) {
diff --git a/src/resources/extensions/shared/tests/ask-user-freetext.test.ts b/src/resources/extensions/shared/tests/ask-user-freetext.test.ts
new file mode 100644
index 000000000..065150899
--- /dev/null
+++ b/src/resources/extensions/shared/tests/ask-user-freetext.test.ts
@@ -0,0 +1,156 @@
+/**
+ * Tests for ask-user-questions free-text input behavior.
+ *
+ * Bug #2715: The ask-user-questions UI lacks free-text input and can trap
+ * users in a loop when the agent needs an explanation rather than a fixed
+ * choice.
+ *
+ * These tests exercise the RPC fallback path (ctx.ui.select) in
+ * ask-user-questions.ts to ensure that selecting "None of the above"
+ * triggers a follow-up free-text input prompt via ctx.ui.input().
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+
+// The ask-user-questions extension registers a tool via pi.registerTool().
+// We capture that registration and call execute() directly with a mock context.
+import AskUserQuestions from "../../ask-user-questions.js";
+
+interface CapturedTool {
+	name: string;
+	execute: (...args: any[]) => Promise<any>;
+}
+
+function captureTool(): CapturedTool {
+	let captured: CapturedTool | null = null;
+	const fakePi = {
+		registerTool(tool: any) {
+			captured = { name: tool.name, execute: tool.execute };
+		},
+	};
+	AskUserQuestions(fakePi as any);
+	if (!captured) throw new Error("No tool registered");
+	return captured;
+}
+
+function makeQuestion(id: string, options: string[]) {
+	return {
+		id,
+		header: id,
+		question: `Pick for ${id}`,
+		options: options.map((label) => ({ label, description: `Desc for ${label}` })),
+	};
+}
+
+function makeMockCtx(opts: {
+	selectReturns: (string | string[] | undefined)[];
+	inputReturns?: (string | undefined)[];
+}) {
+	let selectCallIdx = 0;
+	let inputCallIdx = 0;
+	const selectCalls: { title: string; options: string[] }[] = [];
+	const inputCalls: { title: string; placeholder?: string }[] = [];
+
+	return {
+		ctx: {
+			hasUI: true,
+			ui: {
+				custom: () => undefined, // force RPC fallback
+				select: async (title: string, options: string[], selectOpts?: any) => {
+					selectCalls.push({ title, options });
+					return opts.selectReturns[selectCallIdx++];
+				},
+				input: async (title: string, placeholder?: string) => {
+					inputCalls.push({ title, placeholder });
+					return (opts.inputReturns ?? [])[inputCallIdx++];
+				},
+			},
+		},
+		selectCalls,
+		inputCalls,
+	};
+}
+
+describe("ask-user-questions RPC fallback free-text", () => {
+	it("prompts for free-text input when user selects 'None of the above'", async () => {
+		const tool = captureTool();
+		const { ctx, selectCalls, inputCalls } = makeMockCtx({
+			selectReturns: ["None of the above"],
+			inputReturns: ["I need to explain my reasoning"],
+		});
+
+		const params = {
+			questions: [makeQuestion("q1", ["Option A", "Option B"])],
+		};
+
+		const result = await tool.execute("call-1", params, undefined, undefined, ctx);
+
+		// The select should have been called with "None of the above" appended
+		assert.equal(selectCalls.length, 1);
+		assert.ok(
+			selectCalls[0].options.includes("None of the above"),
+			"select options should include 'None of the above'",
+		);
+
+		// A follow-up input() call should have been made to collect free text
+		assert.equal(inputCalls.length, 1, "should call ctx.ui.input() for free-text after 'None of the above'");
+
+		// The result should include the user's free-text note
+		const text = result.content[0]?.text;
+		assert.ok(text, "result should have text content");
+		const parsed = JSON.parse(text);
+		assert.ok(
+			parsed.answers.q1,
+			"answer for q1 should exist",
+		);
+		const q1Answers = parsed.answers.q1.answers;
+		assert.ok(
+			q1Answers.some((a: string) => a.includes("I need to explain my reasoning")),
+			"answer should include the free-text explanation",
+		);
+	});
+
+	it("does NOT prompt for free-text when user selects a normal option", async () => {
+		const tool = captureTool();
+		const { ctx, inputCalls } = makeMockCtx({
+			selectReturns: ["Option A"],
+		});
+
+		const params = {
+			questions: [makeQuestion("q1", ["Option A", "Option B"])],
+		};
+
+		const result = await tool.execute("call-2", params, undefined, undefined, ctx);
+
+		// No input() call should have been made
+		assert.equal(inputCalls.length, 0, "should NOT call ctx.ui.input() for a normal option");
+
+		const text = result.content[0]?.text;
+		const parsed = JSON.parse(text);
+		assert.deepStrictEqual(parsed.answers.q1.answers, ["Option A"]);
+	});
+
+	it("handles cancelled free-text input gracefully", async () => {
+		const tool = captureTool();
+		const { ctx, inputCalls } = makeMockCtx({
+			selectReturns: ["None of the above"],
+			inputReturns: [undefined], // user cancelled the input
+		});
+
+		const params = {
+			questions: [makeQuestion("q1", ["Option A", "Option B"])],
+		};
+
+		const result = await tool.execute("call-3", params, undefined, undefined, ctx);
+
+		// Input should still have been called
+		assert.equal(inputCalls.length, 1, "should call ctx.ui.input() even if user cancels");
+
+		// Result should still contain "None of the above" without a note
+		const text = result.content[0]?.text;
+		assert.ok(text, "result should have text content");
+		const parsed = JSON.parse(text);
+		assert.deepStrictEqual(parsed.answers.q1.answers, ["None of the above"]);
+	});
+});
diff --git a/src/resources/skills/create-gsd-extension/SKILL.md b/src/resources/skills/create-gsd-extension/SKILL.md
index e233c0229..28c51efca 100644
--- a/src/resources/skills/create-gsd-extension/SKILL.md
+++ b/src/resources/skills/create-gsd-extension/SKILL.md
@@ -7,9 +7,11 @@ description: Create, debug, and iterate on GSD extensions (TypeScript modules th
 
 **Extensions are TypeScript modules** that hook into GSD's runtime (built on pi). They export a default function receiving `ExtensionAPI` and use it to subscribe to events, register tools/commands/shortcuts, and interact with the session.
 
-**GSD extension paths:**
-- Global extensions: `~/.gsd/agent/extensions/*.ts` or `~/.gsd/agent/extensions/*/index.ts`
-- Project-local extensions: `.gsd/extensions/*.ts` or `.gsd/extensions/*/index.ts`
+**GSD extension paths (community/user-installed extensions):**
+- Global: `~/.pi/agent/extensions/*.ts` or `~/.pi/agent/extensions/*/index.ts`
+- Project-local: `.gsd/extensions/*.ts` or `.gsd/extensions/*/index.ts`
+
+Note: `~/.gsd/agent/extensions/` is reserved for bundled extensions synced from the gsd-pi package. Community extensions placed there are silently ignored by the loader.
 
 **The three primitives:**
 1. **Events** — Listen and react (`pi.on("event", handler)`). Can block tool calls, modify messages, inject context.
diff --git a/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md b/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md
index 75f73f2c8..11b300677 100644
--- a/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md
+++ b/src/resources/skills/create-gsd-extension/references/key-rules-gotchas.md
@@ -26,11 +26,12 @@ Non-negotiable rules and common gotchas when building GSD extensions.
 </common_patterns>
 
 <gsd_paths>
-**GSD extension paths:**
-- Global: `~/.gsd/agent/extensions/*.ts`
-- Global (subdir): `~/.gsd/agent/extensions/*/index.ts`
+**GSD extension paths (community/user-installed extensions):**
+- Global: `~/.pi/agent/extensions/*.ts`
+- Global (subdir): `~/.pi/agent/extensions/*/index.ts`
 - Project-local: `.gsd/extensions/*.ts`
 - Project-local (subdir): `.gsd/extensions/*/index.ts`
 
-The upstream pi docs reference `~/.pi` paths — GSD uses `~/.gsd` everywhere instead.
+Note: `~/.gsd/agent/extensions/` is reserved for bundled extensions synced from the gsd-pi package.
+Community extensions placed there are silently ignored by the loader.
 </gsd_paths>
diff --git a/src/resources/skills/create-gsd-extension/workflows/add-capability.md b/src/resources/skills/create-gsd-extension/workflows/add-capability.md
index a069e4570..eac2e4ea1 100644
--- a/src/resources/skills/create-gsd-extension/workflows/add-capability.md
+++ b/src/resources/skills/create-gsd-extension/workflows/add-capability.md
@@ -14,7 +14,7 @@ Read the reference file for the specific capability being added:
 ## Step 1: Identify the Extension
 
 Locate the existing extension file. Check:
-- `~/.gsd/agent/extensions/` (global)
+- `~/.pi/agent/extensions/` (global community extensions)
 - `.gsd/extensions/` (project-local)
 
 Read the current extension code to understand its structure.
@@ -28,7 +28,7 @@ If the extension needs new imports, add them at the top of the file.
 ## Step 3: Handle Structural Changes
 
 **Single file → Directory**: If the extension is outgrowing a single file:
-1. Create `~/.gsd/agent/extensions/my-extension/`
+1. Create `~/.pi/agent/extensions/my-extension/`
 2. Move the file to `index.ts`
 3. Extract helpers to separate files
 
diff --git a/src/resources/skills/create-gsd-extension/workflows/create-extension.md b/src/resources/skills/create-gsd-extension/workflows/create-extension.md
index 817efa13b..a91a39ae6 100644
--- a/src/resources/skills/create-gsd-extension/workflows/create-extension.md
+++ b/src/resources/skills/create-gsd-extension/workflows/create-extension.md
@@ -12,7 +12,7 @@
 ## Step 1: Determine Scope and Placement
 
 Ask the user:
-- **Global** (`~/.gsd/agent/extensions/`) — Available in all GSD sessions
+- **Global** (`~/.pi/agent/extensions/`) — Available in all GSD sessions
 - **Project-local** (`.gsd/extensions/`) — Available only in this project
 
 ## Step 2: Determine Extension Capabilities
@@ -36,12 +36,12 @@ Identify what the extension needs from the user's description:
 
 **Single file** — for small extensions (1-2 tools/commands, simple hooks):
 ```
-~/.gsd/agent/extensions/my-extension.ts
+~/.pi/agent/extensions/my-extension.ts
 ```
 
 **Directory with index.ts** — for multi-file extensions:
 ```
-~/.gsd/agent/extensions/my-extension/
+~/.pi/agent/extensions/my-extension/
 ├── index.ts
 ├── tools.ts
 └── utils.ts
@@ -49,7 +49,7 @@ Identify what the extension needs from the user's description:
 
 **Package with dependencies** — when npm packages are needed:
 ```
-~/.gsd/agent/extensions/my-extension/
+~/.pi/agent/extensions/my-extension/
 ├── package.json
 ├── src/index.ts
 └── node_modules/
diff --git a/src/resources/skills/create-gsd-extension/workflows/debug-extension.md b/src/resources/skills/create-gsd-extension/workflows/debug-extension.md
index 58b1e982e..5a8ac2295 100644
--- a/src/resources/skills/create-gsd-extension/workflows/debug-extension.md
+++ b/src/resources/skills/create-gsd-extension/workflows/debug-extension.md
@@ -32,12 +32,14 @@ gsd -e ./path/to/extension.ts
 
 ## Step 3: Verify File Location
 
-Extensions must be in auto-discovery paths:
-- `~/.gsd/agent/extensions/*.ts`
-- `~/.gsd/agent/extensions/*/index.ts`
+Community extensions must be in auto-discovery paths:
+- `~/.pi/agent/extensions/*.ts`
+- `~/.pi/agent/extensions/*/index.ts`
 - `.gsd/extensions/*.ts`
 - `.gsd/extensions/*/index.ts`
 
+Note: `~/.gsd/agent/extensions/` is reserved for bundled extensions synced from the gsd-pi package.
+
 The file must `export default function(pi: ExtensionAPI) { ... }`.
 
 ## Step 4: Check for Common Mistakes
diff --git a/src/startup-model-validation.ts b/src/startup-model-validation.ts
new file mode 100644
index 000000000..1a4141f00
--- /dev/null
+++ b/src/startup-model-validation.ts
@@ -0,0 +1,78 @@
+/**
+ * Startup model validation — extracted from cli.ts so it can be called
+ * AFTER extensions register their models in the ModelRegistry.
+ *
+ * Before this extraction (bug #2626), the validation ran before
+ * createAgentSession(), meaning extension-provided models (e.g.
+ * claude-code/claude-sonnet-4-6) were not yet in the registry.
+ * configuredExists was always false for extension models, causing the
+ * user's valid choice to be silently overwritten with a built-in fallback.
+ */
+
+import { getPiDefaultModelAndProvider } from './pi-migration.js'
+
+interface MinimalModel {
+  provider: string
+  id: string
+}
+
+interface MinimalModelRegistry {
+  getAll(): MinimalModel[]
+  getAvailable(): MinimalModel[]
+}
+
+type ThinkingLevel = 'off' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'
+
+interface MinimalSettingsManager {
+  getDefaultProvider(): string | undefined
+  getDefaultModel(): string | undefined
+  getDefaultThinkingLevel(): ThinkingLevel | undefined
+  setDefaultModelAndProvider(provider: string, modelId: string): void
+  setDefaultThinkingLevel(level: ThinkingLevel): void
+}
+
+/**
+ * Validate the configured default model against the registry.
+ *
+ * If the configured model exists in the registry, this is a no-op — the
+ * user's choice is preserved.  If it does not exist (stale settings from a
+ * prior install, or genuinely removed model), a fallback is selected and
+ * written to settings.
+ *
+ * IMPORTANT: Call this AFTER createAgentSession() so that extension-
+ * provided models have been registered in the ModelRegistry.
+ */
+export function validateConfiguredModel(
+  modelRegistry: MinimalModelRegistry,
+  settingsManager: MinimalSettingsManager,
+): void {
+  const configuredProvider = settingsManager.getDefaultProvider()
+  const configuredModel = settingsManager.getDefaultModel()
+  const allModels = modelRegistry.getAll()
+  const availableModels = modelRegistry.getAvailable()
+  const configuredExists = configuredProvider && configuredModel &&
+    allModels.some((m) => m.provider === configuredProvider && m.id === configuredModel)
+
+  if (!configuredModel || !configuredExists) {
+    // Model not configured at all, or removed from registry — pick a fallback.
+    // Only fires when the model is genuinely unknown (not just temporarily unavailable).
+    const piDefault = getPiDefaultModelAndProvider()
+    const preferred =
+      (piDefault
+        ? availableModels.find((m) => m.provider === piDefault.provider && m.id === piDefault.model)
+        : undefined) ||
+      availableModels.find((m) => m.provider === 'openai' && m.id === 'gpt-5.4') ||
+      availableModels.find((m) => m.provider === 'openai') ||
+      availableModels.find((m) => m.provider === 'anthropic' && m.id === 'claude-opus-4-6') ||
+      availableModels.find((m) => m.provider === 'anthropic' && m.id.includes('opus')) ||
+      availableModels.find((m) => m.provider === 'anthropic') ||
+      availableModels[0]
+    if (preferred) {
+      settingsManager.setDefaultModelAndProvider(preferred.provider, preferred.id)
+    }
+  }
+
+  if (settingsManager.getDefaultThinkingLevel() !== 'off' && !configuredExists) {
+    settingsManager.setDefaultThinkingLevel('off')
+  }
+}
diff --git a/src/tests/auto-mode-piped.test.ts b/src/tests/auto-mode-piped.test.ts
new file mode 100644
index 000000000..005dddadd
--- /dev/null
+++ b/src/tests/auto-mode-piped.test.ts
@@ -0,0 +1,106 @@
+/**
+ * Tests for `gsd auto` routing — verifies that `auto` is recognized as a
+ * subcommand alias for `headless auto` so it doesn't fall through to the
+ * interactive TUI, which hangs when stdin/stdout are piped.
+ *
+ * Regression test for #2732.
+ */
+
+import test from 'node:test'
+import assert from 'node:assert/strict'
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { fileURLToPath } from 'node:url'
+
+const projectRoot = join(fileURLToPath(import.meta.url), '..', '..', '..')
+
+// ---------------------------------------------------------------------------
+// Source-level verification: cli.ts must handle 'auto' before TUI
+// ---------------------------------------------------------------------------
+
+/**
+ * Read cli.ts and verify the 'auto' subcommand is routed before the
+ * interactive TUI code path. This is the definitive test — if cli.ts doesn't
+ * handle 'auto', piped invocations will hang (#2732).
+ */
+function cliSourceHandlesAutoBeforeTUI(): boolean {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  // Find the position of the 'auto' subcommand handler
+  // It should appear as: messages[0] === 'auto'
+  const autoHandlerMatch = cliSource.match(
+    /messages\[0\]\s*===\s*['"]auto['"]/,
+  )
+  if (!autoHandlerMatch) return false
+
+  // Find the position of the InteractiveMode TUI entry
+  const tuiMatch = cliSource.match(/new\s+InteractiveMode\s*\(/)
+  if (!tuiMatch) return false
+
+  // The auto handler must appear BEFORE the TUI in the source
+  const autoPos = cliSource.indexOf(autoHandlerMatch[0])
+  const tuiPos = cliSource.indexOf(tuiMatch[0])
+
+  return autoPos < tuiPos
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Core regression test: `gsd auto` must be handled before TUI (#2732)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('cli.ts handles `auto` subcommand before interactive TUI (#2732)', () => {
+  assert.ok(
+    cliSourceHandlesAutoBeforeTUI(),
+    'cli.ts must route messages[0] === "auto" to a handler BEFORE ' +
+    'reaching `new InteractiveMode()`. Without this, `gsd auto` with ' +
+    'piped stdin/stdout falls through to the TUI and hangs.',
+  )
+})
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Verify the auto handler routes to headless (not a stub/no-op)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('cli.ts routes `auto` to headless runner', () => {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  // The auto handler block should import or reference headless
+  // Look for the auto block and check it contains runHeadless or headless
+  const autoBlockRegex = /messages\[0\]\s*===\s*['"]auto['"][\s\S]*?runHeadless/
+  assert.ok(
+    autoBlockRegex.test(cliSource),
+    '`auto` subcommand handler must invoke runHeadless to delegate to headless mode',
+  )
+})
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Verify piped-mode hint in error message when auto mode is not available
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('TTY error message mentions `gsd auto` as a non-interactive alternative', () => {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  // The TTY error message should mention auto as an alternative
+  assert.ok(
+    cliSource.includes('gsd auto') || cliSource.includes('gsd headless'),
+    'TTY error hints should mention headless/auto mode as alternatives',
+  )
+})
+
+// ═══════════════════════════════════════════════════════════════════════════
+// `gsd headless` still works (no regression)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test('cli.ts handles `headless` subcommand before interactive TUI', () => {
+  const cliSource = readFileSync(join(projectRoot, 'src', 'cli.ts'), 'utf-8')
+
+  const headlessMatch = cliSource.match(/messages\[0\]\s*===\s*['"]headless['"]/)
+  const tuiMatch = cliSource.match(/new\s+InteractiveMode\s*\(/)
+
+  assert.ok(headlessMatch, 'headless subcommand handler exists')
+  assert.ok(tuiMatch, 'InteractiveMode TUI exists')
+
+  const headlessPos = cliSource.indexOf(headlessMatch![0])
+  const tuiPos = cliSource.indexOf(tuiMatch![0])
+  assert.ok(headlessPos < tuiPos, 'headless handler is before TUI')
+})
diff --git a/src/tests/auto-piped-io.test.ts b/src/tests/auto-piped-io.test.ts
new file mode 100644
index 000000000..84bb5fbc1
--- /dev/null
+++ b/src/tests/auto-piped-io.test.ts
@@ -0,0 +1,172 @@
+/**
+ * Tests for auto-mode piped I/O detection (#2732).
+ *
+ * When `gsd auto` is run with piped stdout (e.g. `gsd auto | cat`),
+ * the CLI should detect the non-TTY stdout and redirect to headless
+ * mode instead of hanging in interactive mode trying to set up a TUI
+ * on a non-terminal output stream.
+ *
+ * Also verifies the stdout TTY gate at the interactive mode entry point:
+ * when stdout is piped, interactive mode must not be entered regardless
+ * of the subcommand.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+// ─── Extracted detection logic (mirrors cli.ts) ───────────────────────────
+
+/**
+ * Subcommands that are explicitly handled before the interactive mode
+ * section in cli.ts and therefore never fall through to the TUI.
+ */
+const EXPLICIT_SUBCOMMANDS = new Set([
+  "headless",
+  "update",
+  "config",
+  "worktree",
+  "wt",
+  "sessions",
+  "web",
+]);
+
+/**
+ * Detect whether the current subcommand should be auto-redirected
+ * to headless mode when stdout is not a TTY.
+ *
+ * Returns true when: the subcommand is "auto" AND stdout is piped.
+ */
+function shouldRedirectAutoToHeadless(
+  subcommand: string | undefined,
+  stdoutIsTTY: boolean,
+): boolean {
+  if (stdoutIsTTY) return false;
+  return subcommand === "auto";
+}
+
+/**
+ * Check whether interactive mode can be entered.
+ * Both stdin AND stdout must be TTY for the TUI to work.
+ */
+function canEnterInteractiveMode(
+  stdinIsTTY: boolean,
+  stdoutIsTTY: boolean,
+): boolean {
+  return stdinIsTTY && stdoutIsTTY;
+}
+
+/**
+ * Returns true if the subcommand is handled by an explicit branch
+ * in cli.ts and will never reach the interactive mode section.
+ */
+function isExplicitSubcommand(subcommand: string | undefined): boolean {
+  return subcommand !== undefined && EXPLICIT_SUBCOMMANDS.has(subcommand);
+}
+
+// ─── shouldRedirectAutoToHeadless ─────────────────────────────────────────
+
+test("redirects 'auto' to headless when stdout is piped", () => {
+  assert.ok(shouldRedirectAutoToHeadless("auto", false));
+});
+
+test("does NOT redirect 'auto' when stdout is a TTY", () => {
+  assert.ok(!shouldRedirectAutoToHeadless("auto", true));
+});
+
+test("does NOT redirect non-auto subcommands when stdout is piped", () => {
+  assert.ok(!shouldRedirectAutoToHeadless("headless", false));
+  assert.ok(!shouldRedirectAutoToHeadless("config", false));
+  assert.ok(!shouldRedirectAutoToHeadless("update", false));
+  assert.ok(!shouldRedirectAutoToHeadless(undefined, false));
+});
+
+// ─── canEnterInteractiveMode ──────────────────────────────────────────────
+
+test("allows interactive mode when both stdin and stdout are TTY", () => {
+  assert.ok(canEnterInteractiveMode(true, true));
+});
+
+test("blocks interactive mode when stdin is piped", () => {
+  assert.ok(!canEnterInteractiveMode(false, true));
+});
+
+test("blocks interactive mode when stdout is piped", () => {
+  assert.ok(!canEnterInteractiveMode(true, false));
+});
+
+test("blocks interactive mode when both stdin and stdout are piped", () => {
+  assert.ok(!canEnterInteractiveMode(false, false));
+});
+
+// ─── isExplicitSubcommand ─────────────────────────────────────────────────
+
+test("identifies explicitly handled subcommands", () => {
+  assert.ok(isExplicitSubcommand("headless"));
+  assert.ok(isExplicitSubcommand("update"));
+  assert.ok(isExplicitSubcommand("config"));
+  assert.ok(isExplicitSubcommand("worktree"));
+  assert.ok(isExplicitSubcommand("wt"));
+  assert.ok(isExplicitSubcommand("sessions"));
+  assert.ok(isExplicitSubcommand("web"));
+});
+
+test("does NOT identify 'auto' as explicit subcommand", () => {
+  assert.ok(!isExplicitSubcommand("auto"));
+});
+
+test("does NOT identify undefined as explicit subcommand", () => {
+  assert.ok(!isExplicitSubcommand(undefined));
+});
+
+// ─── End-to-end scenario: gsd auto | cat ──────────────────────────────────
+
+test("scenario: 'gsd auto 2>&1 | cat' — should redirect to headless", () => {
+  // Simulates: subcommand = "auto", stdin is TTY, stdout is piped
+  const subcommand = "auto";
+  const stdinIsTTY = true;
+  const stdoutIsTTY = false;
+
+  // Interactive mode should be blocked
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+
+  // Auto should be redirected to headless
+  assert.ok(shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
+
+test("scenario: 'gsd auto > /tmp/output.txt' — should redirect to headless", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = true;
+  const stdoutIsTTY = false;
+
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+  assert.ok(shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
+
+test("scenario: 'gsd auto' in terminal — normal interactive mode", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = true;
+  const stdoutIsTTY = true;
+
+  assert.ok(canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+  assert.ok(!shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
+
+test("scenario: 'echo msg | gsd auto' — stdin piped, should redirect", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = false;
+  const stdoutIsTTY = true; // stdout is TTY even though stdin is piped
+
+  // stdout is TTY, so auto redirect doesn't trigger...
+  assert.ok(!shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+  // ...but interactive mode is blocked because stdin is piped
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+});
+
+test("scenario: 'echo msg | gsd auto | cat' — both piped", () => {
+  const subcommand = "auto";
+  const stdinIsTTY = false;
+  const stdoutIsTTY = false;
+
+  assert.ok(!canEnterInteractiveMode(stdinIsTTY, stdoutIsTTY));
+  assert.ok(shouldRedirectAutoToHeadless(subcommand, stdoutIsTTY));
+});
diff --git a/src/tests/create-gsd-extension-paths.test.ts b/src/tests/create-gsd-extension-paths.test.ts
new file mode 100644
index 000000000..7aff613b3
--- /dev/null
+++ b/src/tests/create-gsd-extension-paths.test.ts
@@ -0,0 +1,76 @@
+/**
+ * Validates that the create-gsd-extension skill documentation uses the correct
+ * community extension install path (~/.pi/agent/extensions/) instead of the
+ * bundled-only path (~/.gsd/agent/extensions/).
+ *
+ * Bug: https://github.com/gsd-build/gsd-2/issues/3131
+ *
+ * ~/.gsd/agent/extensions/ is reserved for bundled extensions synced from
+ * the gsd-pi package. Community/user extensions must use ~/.pi/agent/extensions/.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const skillDir = join(__dirname, "..", "resources", "skills", "create-gsd-extension");
+
+function readSkillFile(relativePath: string): string {
+  return readFileSync(join(skillDir, relativePath), "utf-8");
+}
+
+// All documentation files that reference community extension paths
+const docsToCheck: { file: string; label: string }[] = [
+  { file: "SKILL.md", label: "SKILL.md" },
+  { file: "references/key-rules-gotchas.md", label: "key-rules-gotchas.md" },
+  { file: "workflows/add-capability.md", label: "add-capability.md" },
+  { file: "workflows/create-extension.md", label: "create-extension.md" },
+  { file: "workflows/debug-extension.md", label: "debug-extension.md" },
+];
+
+test("create-gsd-extension docs use ~/.pi/agent/extensions/ for community extensions", async (t) => {
+  for (const { file, label } of docsToCheck) {
+    await t.test(`${label} references ~/.pi/agent/extensions/ for global extensions`, () => {
+      const content = readSkillFile(file);
+
+      // The doc should reference ~/.pi/agent/extensions/ (community path)
+      assert.ok(
+        content.includes("~/.pi/agent/extensions/"),
+        `${label} should reference ~/.pi/agent/extensions/ for community extensions`,
+      );
+    });
+  }
+});
+
+test("create-gsd-extension docs do NOT direct users to install in ~/.gsd/agent/extensions/", async (t) => {
+  for (const { file, label } of docsToCheck) {
+    await t.test(`${label} does not tell users to place extensions in ~/.gsd/agent/extensions/`, () => {
+      const content = readSkillFile(file);
+
+      // ~/.gsd/agent/extensions/ should only appear in context that clearly marks
+      // it as reserved/bundled, never as an install target for community extensions.
+      // We check that it does NOT appear as a "Global extensions:" or "Global:" path directive.
+      const lines = content.split("\n");
+      for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        if (line.includes("~/.gsd/agent/extensions/")) {
+          // If the line references ~/.gsd/agent/extensions/, it must be in a
+          // context explaining it is reserved/bundled — not as an install instruction.
+          const context = lines.slice(Math.max(0, i - 2), i + 3).join("\n");
+          const isBundledContext =
+            context.toLowerCase().includes("bundled") ||
+            context.toLowerCase().includes("reserved") ||
+            context.toLowerCase().includes("synced");
+          assert.ok(
+            isBundledContext,
+            `${label} line ${i + 1} references ~/.gsd/agent/extensions/ without ` +
+            `marking it as bundled/reserved. Context:\n${context}`,
+          );
+        }
+      }
+    });
+  }
+});
diff --git a/src/tests/ensure-workspace-builds.test.ts b/src/tests/ensure-workspace-builds.test.ts
index f256c7afe..965d2348e 100644
--- a/src/tests/ensure-workspace-builds.test.ts
+++ b/src/tests/ensure-workspace-builds.test.ts
@@ -1,12 +1,12 @@
 import { describe, it, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
-import { mkdtempSync, writeFileSync, mkdirSync, rmSync, utimesSync } from "node:fs";
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync, utimesSync, statSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { createRequire } from "node:module";
 
 const require = createRequire(import.meta.url);
-const { newestSrcMtime } = require("../../scripts/ensure-workspace-builds.cjs");
+const { newestSrcMtime, detectStalePackages } = require("../../scripts/ensure-workspace-builds.cjs");
 
 describe("newestSrcMtime", () => {
   let tmp: string;
@@ -62,3 +62,95 @@ describe("newestSrcMtime", () => {
     assert.equal(newestSrcMtime(tmp), 0);
   });
 });
+
+describe("detectStalePackages", () => {
+  let tmp: string;
+
+  beforeEach(() => { tmp = mkdtempSync(join(tmpdir(), "gsd-stale-test-")); });
+  afterEach(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  /**
+   * Helper to create a fake workspace package with src/ and dist/ directories.
+   * Sets timestamps to simulate npm tarball extraction where src/ files can be
+   * 1 second newer than dist/ files.
+   */
+  function createFakePackage(
+    packagesDir: string,
+    pkgName: string,
+    opts: { srcNewerThanDist?: boolean; missingDist?: boolean } = {},
+  ): void {
+    const pkgDir = join(packagesDir, pkgName);
+    const srcDir = join(pkgDir, "src");
+    const distDir = join(pkgDir, "dist");
+    mkdirSync(srcDir, { recursive: true });
+    writeFileSync(join(srcDir, "index.ts"), "export const x = 1;");
+
+    if (!opts.missingDist) {
+      mkdirSync(distDir, { recursive: true });
+      writeFileSync(join(distDir, "index.js"), "export const x = 1;");
+    }
+
+    if (opts.srcNewerThanDist && !opts.missingDist) {
+      // Simulate npm tarball extraction: src/ is 1 second newer than dist/
+      const distTime = new Date("2024-06-01T00:00:00Z");
+      const srcTime = new Date("2024-06-01T00:00:01Z");
+      utimesSync(join(distDir, "index.js"), distTime, distTime);
+      utimesSync(join(srcDir, "index.ts"), srcTime, srcTime);
+    }
+  }
+
+  it("detects missing dist/ as stale regardless of .git presence", () => {
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { missingDist: true });
+
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, ["test-pkg"]);
+  });
+
+  it("detects stale src > dist timestamps in a git repo (dev clone)", () => {
+    // Simulate a git repo by creating .git directory
+    mkdirSync(join(tmp, ".git"), { recursive: true });
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { srcNewerThanDist: true });
+
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, ["test-pkg"]);
+  });
+
+  it("skips staleness check when not in a git repo (npm tarball install)", () => {
+    // No .git directory — simulates npm install from tarball
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { srcNewerThanDist: true });
+
+    // Even though src/ is newer than dist/, the script should NOT detect it
+    // as stale because we're in an npm tarball (no .git directory).
+    // The timestamp difference is an artifact of npm tarball extraction.
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, [], "should not detect staleness in npm tarball installs (no .git)");
+  });
+
+  it("still detects missing dist/ in npm tarball installs", () => {
+    // No .git directory — simulates npm install from tarball
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg", { missingDist: true });
+
+    // Missing dist/ should always be detected, even in npm installs
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, ["test-pkg"]);
+  });
+
+  it("returns empty array when dist/ is up to date", () => {
+    mkdirSync(join(tmp, ".git"), { recursive: true });
+    const packagesDir = join(tmp, "packages");
+    mkdirSync(packagesDir, { recursive: true });
+    createFakePackage(packagesDir, "test-pkg");
+    // Default: timestamps are equal (both set by writeFileSync at ~same time)
+
+    const result = detectStalePackages(tmp, ["test-pkg"]);
+    assert.deepEqual(result, []);
+  });
+});
diff --git a/src/tests/extension-model-validation.test.ts b/src/tests/extension-model-validation.test.ts
new file mode 100644
index 000000000..22ae05c1a
--- /dev/null
+++ b/src/tests/extension-model-validation.test.ts
@@ -0,0 +1,169 @@
+/**
+ * Regression test for #2626: Extension-provided models silently overwritten on startup.
+ *
+ * The startup model-validation logic must run AFTER extensions register their
+ * models in the ModelRegistry.  When validation runs before extensions load,
+ * extension-provided models (e.g. claude-code/claude-sonnet-4-6) are not yet
+ * in the registry, so configuredExists is always false and the user's choice
+ * is silently replaced with a built-in fallback.
+ *
+ * This test exercises `validateConfiguredModel()` directly (once extracted) to
+ * verify that:
+ *   (a) extension models present in the registry are preserved,
+ *   (b) genuinely missing models still trigger fallback selection.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+const { validateConfiguredModel } = await import("../startup-model-validation.js");
+
+/**
+ * Minimal stub of ModelRegistry with just getAll() / getAvailable().
+ */
+function fakeModelRegistry(models: Array<{ provider: string; id: string }>) {
+  const available = models.map((m) => ({
+    ...m,
+    name: m.id,
+    contextWindow: 128_000,
+    maxTokens: 4096,
+    reasoning: false,
+  }));
+  return {
+    getAll: () => available,
+    getAvailable: () => available,
+  };
+}
+
+/**
+ * Minimal stub of SettingsManager backed by plain objects.
+ */
+function fakeSettingsManager(initial: { provider?: string; model?: string }) {
+  let provider = initial.provider;
+  let model = initial.model;
+  let thinkingLevel = "off" as string;
+  return {
+    getDefaultProvider: () => provider,
+    getDefaultModel: () => model,
+    getDefaultThinkingLevel: () => thinkingLevel,
+    setDefaultModelAndProvider(p: string, m: string) {
+      provider = p;
+      model = m;
+    },
+    setDefaultThinkingLevel(level: string) {
+      thinkingLevel = level;
+    },
+    // Expose for assertions
+    get currentProvider() { return provider; },
+    get currentModel() { return model; },
+  };
+}
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: extension-provided model in registry must NOT be overwritten
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel preserves extension-provided model when present in registry", () => {
+  const settings = fakeSettingsManager({
+    provider: "claude-code",
+    model: "claude-sonnet-4-6",
+  });
+
+  // Registry includes the extension model (simulating post-extension-load state)
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+    { provider: "claude-code", id: "claude-sonnet-4-6" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.currentProvider, "claude-code",
+    "provider must remain the user-configured extension provider");
+  assert.equal(settings.currentModel, "claude-sonnet-4-6",
+    "model must remain the user-configured extension model");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: genuinely removed model still triggers fallback
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel falls back when model is not in registry", () => {
+  const settings = fakeSettingsManager({
+    provider: "openai",
+    model: "grok-2",  // hypothetical removed model
+  });
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+    { provider: "anthropic", id: "claude-opus-4-6" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  // Should have been overwritten to one of the available models
+  assert.notEqual(settings.currentModel, "grok-2",
+    "stale model must be replaced by a fallback");
+  assert.ok(settings.currentProvider, "a fallback provider must be set");
+  assert.ok(settings.currentModel, "a fallback model must be set");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: no configured model at all triggers fallback
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel picks a fallback when nothing is configured", () => {
+  const settings = fakeSettingsManager({
+    provider: undefined,
+    model: undefined,
+  });
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.currentProvider, "openai");
+  assert.equal(settings.currentModel, "gpt-5.4");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: thinking level reset when model doesn't exist
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel resets thinking level when model was replaced", () => {
+  const settings = fakeSettingsManager({
+    provider: "openai",
+    model: "grok-2",
+  });
+  // Simulate non-off thinking level
+  settings.setDefaultThinkingLevel("high");
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.getDefaultThinkingLevel(), "off",
+    "thinking level must be reset to off when model was not found");
+});
+
+// ──────────────────────────────────────────────────────────────────────
+// Test: thinking level NOT reset when model exists
+// ──────────────────────────────────────────────────────────────────────
+test("validateConfiguredModel preserves thinking level when model exists", () => {
+  const settings = fakeSettingsManager({
+    provider: "openai",
+    model: "gpt-5.4",
+  });
+  settings.setDefaultThinkingLevel("high");
+
+  const registry = fakeModelRegistry([
+    { provider: "openai", id: "gpt-5.4" },
+  ]);
+
+  validateConfiguredModel(registry as any, settings as any);
+
+  assert.equal(settings.getDefaultThinkingLevel(), "high",
+    "thinking level must be preserved when configured model exists");
+});
diff --git a/src/tests/google-search-auth.repro.test.ts b/src/tests/google-search-auth.repro.test.ts
index 309bbb72b..5dac025fb 100644
--- a/src/tests/google-search-auth.repro.test.ts
+++ b/src/tests/google-search-auth.repro.test.ts
@@ -1,6 +1,6 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import googleSearchExtension from "../resources/extensions/google-search/index.ts";
+import googleSearchExtension from "../resources/extensions/google-search/index.js";
 
 function createMockPI() {
   const handlers: any[] = [];
diff --git a/src/tests/google-search-oauth-shape.test.ts b/src/tests/google-search-oauth-shape.test.ts
new file mode 100644
index 000000000..66aa072e5
--- /dev/null
+++ b/src/tests/google-search-oauth-shape.test.ts
@@ -0,0 +1,215 @@
+/**
+ * google-search-oauth-shape.test.ts — Regression test for #2963.
+ *
+ * The OAuth fallback in google_search manually POSTs to the Cloud Code Assist
+ * endpoint.  The original implementation sent a request body that did not match
+ * the endpoint's expected contract, causing a 400 INVALID_ARGUMENT response.
+ *
+ * This test captures the fetch call and asserts that the URL and body conform
+ * to the Cloud Code Assist wire format used by the working provider in
+ * packages/pi-ai/src/providers/google-gemini-cli.ts.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import googleSearchExtension from "../resources/extensions/google-search/index.js";
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+function createMockPI() {
+  const handlers: Array<{ event: string; handler: any }> = [];
+  let registeredTool: any = null;
+
+  return {
+    handlers,
+    get registeredTool() { return registeredTool; },
+    on(event: string, handler: any) {
+      handlers.push({ event, handler });
+    },
+    registerTool(tool: any) {
+      registeredTool = tool;
+    },
+    async fire(event: string, eventData: any, ctx: any) {
+      for (const h of handlers) {
+        if (h.event === event) {
+          await h.handler(eventData, ctx);
+        }
+      }
+    },
+  };
+}
+
+function mockModelRegistry(oauthJson?: string) {
+  return {
+    authStorage: {
+      hasAuth: async (_id: string) => !!oauthJson,
+    },
+    getApiKeyForProvider: async (_provider: string) => oauthJson,
+  };
+}
+
+/** A valid SSE response body matching the Cloud Code Assist wire format. */
+function makeOkSSEBody() {
+  const payload = {
+    response: {
+      candidates: [{
+        content: {
+          parts: [{ text: "Sunny, 85 °F in Austin today." }],
+        },
+        groundingMetadata: {
+          groundingChunks: [
+            { web: { title: "weather.com", uri: "https://weather.com/austin", domain: "weather.com" } },
+          ],
+          webSearchQueries: ["weather today in Austin Texas"],
+        },
+      }],
+    },
+  };
+  return `data: ${JSON.stringify(payload)}\n\n`;
+}
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+test("#2963: OAuth fallback URL must include ?alt=sse query parameter", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedUrl = "";
+
+  (global as any).fetch = async (url: string, _options: any) => {
+    capturedUrl = url;
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c1", { query: "weather" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(
+    capturedUrl.includes("?alt=sse"),
+    `URL must contain ?alt=sse for SSE parsing to work. Got: ${capturedUrl}`,
+  );
+});
+
+test("#2963: OAuth fallback body must include userAgent field", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedBody: any = null;
+
+  (global as any).fetch = async (_url: string, options: any) => {
+    capturedBody = JSON.parse(options.body);
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c2", { query: "weather userAgent test" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(capturedBody, "fetch must have been called");
+  assert.equal(
+    typeof capturedBody.userAgent,
+    "string",
+    "Body must include a userAgent field (Cloud Code Assist contract)",
+  );
+});
+
+test("#2963: OAuth fallback body must contain google_search tool in correct format", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedBody: any = null;
+
+  (global as any).fetch = async (_url: string, options: any) => {
+    capturedBody = JSON.parse(options.body);
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c3", { query: "weather tools test" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(capturedBody, "fetch must have been called");
+  const tools = capturedBody.request?.tools;
+  assert.ok(Array.isArray(tools), "request.tools must be an array");
+  assert.ok(
+    tools.some((t: any) => t.googleSearch !== undefined),
+    `tools must contain a googleSearch entry. Got: ${JSON.stringify(tools)}`,
+  );
+});
+
+test("#2963: OAuth fallback body has correct top-level structure", async (t) => {
+  const originalKey = process.env.GEMINI_API_KEY;
+  delete process.env.GEMINI_API_KEY;
+  const originalFetch = global.fetch;
+
+  let capturedBody: any = null;
+
+  (global as any).fetch = async (_url: string, options: any) => {
+    capturedBody = JSON.parse(options.body);
+    return { ok: true, text: async () => makeOkSSEBody() };
+  };
+
+  t.after(() => {
+    global.fetch = originalFetch;
+    if (originalKey !== undefined) process.env.GEMINI_API_KEY = originalKey;
+    else delete process.env.GEMINI_API_KEY;
+  });
+
+  const pi = createMockPI();
+  googleSearchExtension(pi as any);
+
+  const oauthJson = JSON.stringify({ token: "tok", projectId: "proj" });
+  const ctx = { ui: { notify() {} }, modelRegistry: mockModelRegistry(oauthJson) };
+
+  await pi.fire("session_start", {}, ctx);
+  await pi.registeredTool.execute("c4", { query: "weather structure test" }, new AbortController().signal, () => {}, ctx);
+
+  assert.ok(capturedBody, "fetch must have been called");
+
+  // Top-level fields required by CloudCodeAssistRequest
+  assert.equal(capturedBody.project, "proj", "project must match the OAuth projectId");
+  assert.ok(typeof capturedBody.model === "string" && capturedBody.model.length > 0, "model must be a non-empty string");
+  assert.ok(capturedBody.request && typeof capturedBody.request === "object", "request must be an object");
+  assert.ok(typeof capturedBody.userAgent === "string", "userAgent must be present");
+
+  // Nested request fields
+  assert.ok(Array.isArray(capturedBody.request.contents), "request.contents must be an array");
+  assert.ok(Array.isArray(capturedBody.request.tools), "request.tools must be an array");
+});
diff --git a/src/tests/integration/web-auto-dashboard-lock-reconciliation.test.ts b/src/tests/integration/web-auto-dashboard-lock-reconciliation.test.ts
new file mode 100644
index 000000000..dafdcffe1
--- /dev/null
+++ b/src/tests/integration/web-auto-dashboard-lock-reconciliation.test.ts
@@ -0,0 +1,199 @@
+/**
+ * Regression test for #2705: Web UI shows "Start auto" even while auto mode is
+ * already running.
+ *
+ * Root cause: collectAuthoritativeAutoDashboardData spawns a subprocess that
+ * imports auto.ts fresh. The module-level AutoSession state (s.active) is
+ * always false in a new process, so the subprocess always reports
+ * { active: false } even when auto IS running in the parent process.
+ *
+ * Fix: after obtaining the subprocess result, reconcile active/paused state
+ * with on-disk session lock and paused-session metadata.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import {
+  collectAuthoritativeAutoDashboardData,
+} from "../../web/auto-dashboard-service.ts";
+
+// ─── Helpers ──────────────────────────────────────────────────────────
+
+const repoRoot = join(import.meta.dirname, "..", "..", "..");
+
+function makeTempFixture(): { projectCwd: string; cleanup: () => void } {
+  const root = mkdtempSync(join(tmpdir(), "gsd-auto-lock-test-"));
+  const projectCwd = join(root, "project");
+  mkdirSync(projectCwd, { recursive: true });
+  return {
+    projectCwd,
+    cleanup: () => {
+      try { rmSync(root, { recursive: true, force: true }); } catch { /* best-effort */ }
+    },
+  };
+}
+
+function writeAutoModule(dir: string, payload: Record<string, unknown>): string {
+  const modulePath = join(dir, "fake-auto-dashboard.mjs");
+  writeFileSync(
+    modulePath,
+    `export function getAutoDashboardData() { return ${JSON.stringify(payload)}; }\n`,
+  );
+  return modulePath;
+}
+
+function writeSessionLock(projectCwd: string, data: Record<string, unknown>): void {
+  const gsdDir = join(projectCwd, ".gsd");
+  mkdirSync(gsdDir, { recursive: true });
+  writeFileSync(join(gsdDir, "auto.lock"), JSON.stringify(data));
+}
+
+function writePausedSession(projectCwd: string, data: Record<string, unknown>): void {
+  const runtimeDir = join(projectCwd, ".gsd", "runtime");
+  mkdirSync(runtimeDir, { recursive: true });
+  writeFileSync(join(runtimeDir, "paused-session.json"), JSON.stringify(data));
+}
+
+const INACTIVE_PAYLOAD = {
+  active: false,
+  paused: false,
+  stepMode: false,
+  startTime: 0,
+  elapsed: 0,
+  currentUnit: null,
+  completedUnits: [],
+  basePath: "",
+  totalCost: 0,
+  totalTokens: 0,
+};
+
+// ─── Tests ──────────────────────────────────────────────────────────
+
+test("#2705 regression: subprocess reports active=false but session lock exists with live PID → reconcile to active=true", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  // On disk: session lock exists with current PID (simulates auto running in parent process).
+  writeSessionLock(fixture.projectCwd, {
+    pid: process.pid,
+    startedAt: new Date().toISOString(),
+    unitType: "execute-task",
+    unitId: "M001/S01/T01",
+    unitStartedAt: new Date().toISOString(),
+  });
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  // After reconciliation, active MUST be true because the lock PID is alive.
+  assert.equal(result.active, true, "active must be reconciled to true when session lock PID is alive");
+  assert.equal(result.paused, false, "paused must remain false when no paused-session exists");
+});
+
+test("#2705: subprocess reports active=false and no session lock → remains inactive", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.active, false, "active must remain false when no session lock exists");
+  assert.equal(result.paused, false);
+});
+
+test("#2705: subprocess reports active=false but paused-session.json exists → reconcile to paused=true", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  writePausedSession(fixture.projectCwd, {
+    milestoneId: "M001",
+    pausedAt: new Date().toISOString(),
+    stepMode: false,
+  });
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.paused, true, "paused must be reconciled to true when paused-session.json exists");
+  assert.equal(result.active, false, "active must remain false when paused (paused overrides active)");
+});
+
+test("#2705: subprocess reports active=true → no reconciliation needed", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const activePayload = {
+    active: true,
+    paused: false,
+    stepMode: true,
+    startTime: 1000,
+    elapsed: 500,
+    currentUnit: { type: "execute-task", id: "M001/S01/T01", startedAt: 1000 },
+    completedUnits: [],
+    basePath: fixture.projectCwd,
+    totalCost: 1.5,
+    totalTokens: 1000,
+  };
+  const modulePath = writeAutoModule(fixture.projectCwd, activePayload);
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.active, true, "active should remain true when subprocess already reports it");
+});
+
+test("#2705: session lock exists but PID is dead → remains inactive (stale lock)", async (t) => {
+  const fixture = makeTempFixture();
+  t.after(() => fixture.cleanup());
+
+  const modulePath = writeAutoModule(fixture.projectCwd, INACTIVE_PAYLOAD);
+
+  // Use a PID that is almost certainly dead.
+  writeSessionLock(fixture.projectCwd, {
+    pid: 999999999,
+    startedAt: new Date().toISOString(),
+    unitType: "execute-task",
+    unitId: "M001/S01/T01",
+    unitStartedAt: new Date().toISOString(),
+  });
+
+  const result = await collectAuthoritativeAutoDashboardData(repoRoot, {
+    env: {
+      ...process.env,
+      GSD_WEB_TEST_AUTO_DASHBOARD_MODULE: modulePath,
+      GSD_WEB_PROJECT_CWD: fixture.projectCwd,
+    },
+  });
+
+  assert.equal(result.active, false, "active must remain false when session lock PID is dead (stale lock)");
+});
diff --git a/src/tests/integration/web-live-state-contract.test.ts b/src/tests/integration/web-live-state-contract.test.ts
index 2af24bcc6..bed3b44c2 100644
--- a/src/tests/integration/web-live-state-contract.test.ts
+++ b/src/tests/integration/web-live-state-contract.test.ts
@@ -397,10 +397,11 @@ test("/api/session/events exposes explicit live_state_invalidation events for ag
   harness.emit({ type: "auto_retry_end", success: false, attempt: 1, finalError: "still failing" });
   harness.emit({ type: "auto_compaction_start", reason: "threshold" });
   harness.emit({ type: "auto_compaction_end", result: undefined, aborted: false, willRetry: false });
+  harness.emit({ type: "turn_end" });
 
   const events = await readSseEventsUntil(
     response,
-    (seen) => seen.filter((event) => event.type === "live_state_invalidation").length >= 5,
+    (seen) => seen.filter((event) => event.type === "live_state_invalidation").length >= 6,
   );
   const invalidations = events.filter((event) => event.type === "live_state_invalidation");
 
@@ -416,6 +417,7 @@ test("/api/session/events exposes explicit live_state_invalidation events for ag
       { reason: "auto_retry_end", source: "bridge_event", workspaceIndexCacheInvalidated: false },
       { reason: "auto_compaction_start", source: "bridge_event", workspaceIndexCacheInvalidated: false },
       { reason: "auto_compaction_end", source: "bridge_event", workspaceIndexCacheInvalidated: false },
+      { reason: "turn_end", source: "bridge_event", workspaceIndexCacheInvalidated: true },
     ],
     "live_state_invalidation reasons/sources should stay inspectable on /api/session/events",
   );
@@ -424,6 +426,7 @@ test("/api/session/events exposes explicit live_state_invalidation events for ag
   assert.deepEqual(invalidations[2].domains, ["auto", "recovery"]);
   assert.deepEqual(invalidations[3].domains, ["auto", "recovery"]);
   assert.deepEqual(invalidations[4].domains, ["auto", "recovery"]);
+  assert.deepEqual(invalidations[5].domains, ["workspace"]);
 
   controller.abort();
   await waitForMicrotasks();
@@ -585,3 +588,79 @@ test("workspace cache only busts on real boundaries and session mutations emit t
 
   unsubscribe();
 });
+
+test("turn_end events invalidate workspace so milestones list reflects current state (issue #2706)", async (t) => {
+  const fixture = makeWorkspaceFixture();
+  const sessionPath = createSessionFile(
+    fixture.projectCwd,
+    fixture.sessionsDir,
+    "sess-turn",
+    "Turn Session",
+    "2026-03-15T03:32:00.000Z",
+  );
+  let workspaceIndexCalls = 0;
+
+  const harness = createHarness((command, current) => {
+    if (command.type === "get_state") {
+      current.emit({
+        id: command.id,
+        type: "response",
+        command: "get_state",
+        success: true,
+        data: fakeSessionState("sess-turn", sessionPath),
+      });
+      return;
+    }
+
+    assert.fail(`unexpected command: ${command.type}`);
+  });
+
+  setupBridge(harness, fixture, {
+    indexWorkspace: async () => {
+      workspaceIndexCalls += 1;
+      return fakeWorkspaceIndex();
+    },
+  });
+
+  t.after(async () => {
+    await bridge.resetBridgeServiceForTests();
+    onboarding.resetOnboardingServiceForTests();
+    fixture.cleanup();
+  });
+
+  const service = bridge.getProjectBridgeService();
+  await service.ensureStarted();
+  const seenEvents: any[] = [];
+  const unsubscribe = service.subscribe((event) => {
+    seenEvents.push(event);
+  });
+
+  // Load workspace once to prime cache
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 1, "initial boot should call indexWorkspace once");
+
+  // Emit turn_end — this should invalidate the workspace cache so the
+  // milestones list picks up state changes that occurred during the turn.
+  harness.emit({ type: "turn_end" });
+  await waitForMicrotasks();
+
+  // Verify a live_state_invalidation was emitted for turn_end
+  const invalidations = seenEvents.filter((event) => event.type === "live_state_invalidation");
+  const turnEndInvalidation = invalidations.find((event) => event.reason === "turn_end");
+  assert.ok(turnEndInvalidation, "turn_end should emit a live_state_invalidation event");
+  assert.ok(
+    turnEndInvalidation.domains.includes("workspace"),
+    "turn_end invalidation should include the workspace domain",
+  );
+  assert.equal(
+    turnEndInvalidation.workspaceIndexCacheInvalidated,
+    true,
+    "turn_end should invalidate the workspace index cache",
+  );
+
+  // Verify workspace cache was actually busted
+  await bridge.collectBootPayload();
+  assert.equal(workspaceIndexCalls, 2, "turn_end should bust the workspace index cache so the next fetch re-indexes");
+
+  unsubscribe();
+});
diff --git a/src/tests/integration/web-mode-cli.test.ts b/src/tests/integration/web-mode-cli.test.ts
index 249e17568..68b6c9c1b 100644
--- a/src/tests/integration/web-mode-cli.test.ts
+++ b/src/tests/integration/web-mode-cli.test.ts
@@ -164,6 +164,7 @@ test('launchWebMode prefers the packaged standalone host and opens the resolved
       cwd: standaloneRoot,
       detached: true,
       stdio: 'ignore',
+      windowsHide: true,
       env: {
         TEST_ENV: '1',
         HOSTNAME: '127.0.0.1',
diff --git a/src/tests/integration/web-mode-windows-hide.test.ts b/src/tests/integration/web-mode-windows-hide.test.ts
new file mode 100644
index 000000000..aeb6baeea
--- /dev/null
+++ b/src/tests/integration/web-mode-windows-hide.test.ts
@@ -0,0 +1,120 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+const webMode = await import("../../web-mode.ts");
+
+// ---------------------------------------------------------------------------
+// #2628 — On Windows, child processes spawned by web-mode must set
+// `windowsHide: true` to prevent console windows from flashing on screen.
+// ---------------------------------------------------------------------------
+
+test("launchWebMode passes windowsHide: true in spawn options", async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-web-winhide-"));
+  const standaloneRoot = join(tmp, "dist", "web", "standalone");
+  const serverPath = join(standaloneRoot, "server.js");
+  mkdirSync(standaloneRoot, { recursive: true });
+  writeFileSync(serverPath, 'console.log("stub")\n');
+
+  const pidFilePath = join(tmp, "web-server.pid");
+  const registryPath = join(tmp, "web-instances.json");
+
+  let capturedOptions: Record<string, unknown> | undefined;
+
+  t.after(() => {
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd: "/tmp/winhide-project",
+      projectSessionsDir: "/tmp/.gsd/sessions/winhide",
+      agentDir: "/tmp/.gsd/agent",
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {},
+      resolvePort: async () => 46000,
+      execPath: "/custom/node",
+      env: { TEST_ENV: "1" },
+      spawn: (_command, _args, options) => {
+        capturedOptions = options as Record<string, unknown>;
+        return {
+          pid: 70001,
+          once: () => undefined,
+          unref: () => {},
+        } as any;
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      pidFilePath,
+      writePidFile: webMode.writePidFile,
+      registryPath,
+      stderr: { write: () => true },
+    },
+  );
+
+  assert.equal(status.ok, true, "launch should succeed");
+  assert.ok(capturedOptions, "spawn must have been called");
+  assert.equal(
+    capturedOptions!.windowsHide,
+    true,
+    "spawn options must include windowsHide: true to prevent console window flashing on Windows (#2628)",
+  );
+});
+
+test("launchWebMode source-dev host also passes windowsHide: true", async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-web-winhide-src-"));
+  const webRoot = join(tmp, "web");
+  mkdirSync(webRoot, { recursive: true });
+  writeFileSync(join(webRoot, "package.json"), '{"name":"web"}\n');
+
+  const pidFilePath = join(tmp, "web-server.pid");
+  const registryPath = join(tmp, "web-instances.json");
+
+  let capturedOptions: Record<string, unknown> | undefined;
+
+  t.after(() => {
+    rmSync(tmp, { recursive: true, force: true });
+  });
+
+  const status = await webMode.launchWebMode(
+    {
+      cwd: "/tmp/winhide-src-project",
+      projectSessionsDir: "/tmp/.gsd/sessions/winhide-src",
+      agentDir: "/tmp/.gsd/agent",
+      packageRoot: tmp,
+    },
+    {
+      initResources: () => {},
+      resolvePort: async () => 46001,
+      execPath: "/custom/node",
+      env: { TEST_ENV: "1" },
+      platform: "win32",
+      spawn: (_command, _args, options) => {
+        capturedOptions = options as Record<string, unknown>;
+        return {
+          pid: 70002,
+          once: () => undefined,
+          unref: () => {},
+        } as any;
+      },
+      waitForBootReady: async () => undefined,
+      openBrowser: () => {},
+      pidFilePath,
+      writePidFile: webMode.writePidFile,
+      registryPath,
+      stderr: { write: () => true },
+    },
+  );
+
+  assert.equal(status.ok, true, "launch should succeed");
+  assert.ok(capturedOptions, "spawn must have been called");
+  assert.equal(
+    capturedOptions!.windowsHide,
+    true,
+    "source-dev spawn must also include windowsHide: true (#2628)",
+  );
+});
diff --git a/src/tests/integration/web-project-tab-preservation.test.ts b/src/tests/integration/web-project-tab-preservation.test.ts
new file mode 100644
index 000000000..4b7b5d2d1
--- /dev/null
+++ b/src/tests/integration/web-project-tab-preservation.test.ts
@@ -0,0 +1,243 @@
+import test, { describe } from "node:test";
+import assert from "node:assert/strict";
+
+// ---------------------------------------------------------------------------
+// Test: project switching preserves the active tab (view) instead of
+// resetting to dashboard.
+//
+// Bug #2711: Switching projects always returns to dashboard.
+//
+// Root cause: handleSelectProject in ProjectsPanel dispatched
+//   gsd:navigate-view with { view: "dashboard" } on every switch.
+//   Additionally, the viewRestored flag in WorkspaceChrome was never
+//   reset when the project changed, so the per-project sessionStorage
+//   restore could not fire for the new project.
+//
+// These tests validate the corrected logic in isolation, without needing
+// a full React DOM.
+// ---------------------------------------------------------------------------
+
+// ── Simulated sessionStorage (mirrors browser sessionStorage API) ────────
+
+class MockSessionStorage {
+  private store = new Map<string, string>();
+
+  getItem(key: string): string | null {
+    return this.store.get(key) ?? null;
+  }
+
+  setItem(key: string, value: string): void {
+    this.store.set(key, value);
+  }
+
+  removeItem(key: string): void {
+    this.store.delete(key);
+  }
+
+  clear(): void {
+    this.store.clear();
+  }
+}
+
+// ── Mirrors the KNOWN_VIEWS set and viewStorageKey from app-shell.tsx ─────
+
+const KNOWN_VIEWS = new Set([
+  "dashboard",
+  "power",
+  "chat",
+  "roadmap",
+  "files",
+  "activity",
+  "visualize",
+]);
+
+function viewStorageKey(projectCwd: string): string {
+  return `gsd-active-view:${projectCwd}`;
+}
+
+// ── Simulated WorkspaceChrome view-restore logic ─────────────────────────
+// This mirrors the useEffect in WorkspaceChrome that restores the persisted
+// view when projectPath changes — with the fix applied.
+
+interface ChromeState {
+  activeView: string;
+  viewRestored: boolean;
+  projectPath: string | null;
+}
+
+/**
+ * Simulates the view-restore effect.
+ * In the fixed code, viewRestored resets to false when projectPath changes,
+ * allowing the stored view to be read for the new project.
+ */
+function simulateViewRestoreEffect(
+  state: ChromeState,
+  storage: MockSessionStorage,
+): ChromeState {
+  // The fix: if projectPath changed, reset viewRestored
+  // (In React this is a separate useEffect that depends on [projectPath])
+  if (!state.viewRestored && state.projectPath) {
+    const stored = storage.getItem(viewStorageKey(state.projectPath));
+    if (stored && KNOWN_VIEWS.has(stored)) {
+      return { ...state, activeView: stored, viewRestored: true };
+    }
+    return { ...state, viewRestored: true };
+  }
+  return state;
+}
+
+/**
+ * Simulates switching to a new project path.
+ * The fix resets viewRestored so the restore effect can fire for the new project.
+ */
+function simulateProjectSwitch(
+  state: ChromeState,
+  newProjectPath: string,
+): ChromeState {
+  return {
+    ...state,
+    projectPath: newProjectPath,
+    viewRestored: false, // <-- THE FIX: reset so restore runs for new project
+  };
+}
+
+// ── Simulated handleSelectProject (pre-fix vs post-fix) ──────────────────
+
+/** Pre-fix: always navigates to dashboard on project switch */
+function handleSelectProjectPreFix(
+  _state: ChromeState,
+  _projectPath: string,
+): string {
+  // Bug: always forces dashboard
+  return "dashboard";
+}
+
+/** Post-fix: does NOT override the active view */
+function handleSelectProjectPostFix(
+  state: ChromeState,
+  _projectPath: string,
+): string {
+  // Fix: preserve whatever view is active (restore logic handles per-project view)
+  return state.activeView;
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("project switch tab preservation (#2711)", () => {
+  test("BUG: pre-fix handleSelectProject always resets to dashboard", () => {
+    const state: ChromeState = {
+      activeView: "roadmap",
+      viewRestored: true,
+      projectPath: "/projects/alpha",
+    };
+
+    const viewAfterSwitch = handleSelectProjectPreFix(state, "/projects/beta");
+    // This demonstrates the bug: user was on "roadmap" but got sent to "dashboard"
+    assert.equal(viewAfterSwitch, "dashboard");
+  });
+
+  test("FIX: post-fix handleSelectProject preserves current view", () => {
+    const state: ChromeState = {
+      activeView: "roadmap",
+      viewRestored: true,
+      projectPath: "/projects/alpha",
+    };
+
+    const viewAfterSwitch = handleSelectProjectPostFix(state, "/projects/beta");
+    assert.equal(viewAfterSwitch, "roadmap", "Should preserve the current tab");
+  });
+
+  test("FIX: viewRestored resets on project switch, enabling per-project view restore", () => {
+    const storage = new MockSessionStorage();
+    storage.setItem(viewStorageKey("/projects/alpha"), "files");
+    storage.setItem(viewStorageKey("/projects/beta"), "activity");
+
+    // Start on project alpha, viewing files
+    let state: ChromeState = {
+      activeView: "dashboard",
+      viewRestored: false,
+      projectPath: "/projects/alpha",
+    };
+
+    // Initial restore for alpha
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "files");
+    assert.equal(state.viewRestored, true);
+
+    // Switch to project beta
+    state = simulateProjectSwitch(state, "/projects/beta");
+    assert.equal(state.viewRestored, false, "viewRestored should reset on project switch");
+
+    // Restore effect fires for beta
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "activity", "Should restore beta's persisted view");
+  });
+
+  test("FIX: switching to project with no stored view keeps current view", () => {
+    const storage = new MockSessionStorage();
+    // Only alpha has a stored view
+    storage.setItem(viewStorageKey("/projects/alpha"), "roadmap");
+
+    let state: ChromeState = {
+      activeView: "roadmap",
+      viewRestored: true,
+      projectPath: "/projects/alpha",
+    };
+
+    // Switch to gamma (no stored view)
+    state = simulateProjectSwitch(state, "/projects/gamma");
+    state = simulateViewRestoreEffect(state, storage);
+
+    // Should keep the current view since gamma has no stored preference
+    assert.equal(state.activeView, "roadmap", "Should keep current view when new project has no stored view");
+  });
+
+  test("FIX: stored view for invalid view name is ignored", () => {
+    const storage = new MockSessionStorage();
+    storage.setItem(viewStorageKey("/projects/alpha"), "nonexistent-view");
+
+    let state: ChromeState = {
+      activeView: "power",
+      viewRestored: false,
+      projectPath: "/projects/alpha",
+    };
+
+    state = simulateViewRestoreEffect(state, storage);
+    // Invalid stored view should be ignored, keeping current view
+    assert.equal(state.activeView, "power");
+  });
+
+  test("FIX: rapid project switches each get a fresh restore", () => {
+    const storage = new MockSessionStorage();
+    storage.setItem(viewStorageKey("/projects/a"), "chat");
+    storage.setItem(viewStorageKey("/projects/b"), "visualize");
+    storage.setItem(viewStorageKey("/projects/c"), "files");
+
+    let state: ChromeState = {
+      activeView: "dashboard",
+      viewRestored: false,
+      projectPath: "/projects/a",
+    };
+
+    // Restore for A
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "chat");
+
+    // Switch to B
+    state = simulateProjectSwitch(state, "/projects/b");
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "visualize");
+
+    // Switch to C
+    state = simulateProjectSwitch(state, "/projects/c");
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "files");
+
+    // Switch back to A
+    state = simulateProjectSwitch(state, "/projects/a");
+    state = simulateViewRestoreEffect(state, storage);
+    assert.equal(state.activeView, "chat", "Should restore A's view again after switching away and back");
+  });
+});
diff --git a/src/tests/integration/web-terminal-preservation.test.ts b/src/tests/integration/web-terminal-preservation.test.ts
new file mode 100644
index 000000000..fb0cd2d1a
--- /dev/null
+++ b/src/tests/integration/web-terminal-preservation.test.ts
@@ -0,0 +1,264 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+// ---------------------------------------------------------------------------
+// Constants mirrored from the shutdown-gate and app-shell
+// ---------------------------------------------------------------------------
+const SHUTDOWN_DELAY_MS = 3_000;
+
+// ---------------------------------------------------------------------------
+// Test 1: pagehide handler must NOT fire shutdown beacon on tab switches
+// ---------------------------------------------------------------------------
+// The bug: `pagehide` fires both on actual page unload AND on mobile/Safari
+// tab switches (where event.persisted === true because the page enters bfcache).
+// The current handler does not check event.persisted, so it fires shutdown
+// beacons on tab switches — killing the server and all PTY sessions.
+
+/**
+ * Mirrors the pagehide handler logic from app-shell.tsx's
+ * ProjectAwareWorkspace component.  The BUGGY version sends a shutdown
+ * beacon unconditionally.
+ */
+function buggyPageHideHandler(_event: { persisted: boolean }): boolean {
+  // Current code (buggy): always sends beacon regardless of event.persisted
+  return true; // true = beacon was sent
+}
+
+/**
+ * Fixed version: only send shutdown beacon when the page is truly being
+ * unloaded (event.persisted === false).  When persisted is true the page
+ * is being put into bfcache (tab switch, app backgrounding) and the
+ * server should stay alive.
+ */
+function fixedPageHideHandler(event: { persisted: boolean }): boolean {
+  if (event.persisted) {
+    // Page is entering bfcache (tab switch) — do NOT shut down
+    return false;
+  }
+  return true; // true = beacon was sent
+}
+
+test("pagehide: buggy handler sends shutdown beacon on tab switch (persisted=true)", () => {
+  // This test documents the bug — the buggy handler fires on tab switches
+  const beaconSent = buggyPageHideHandler({ persisted: true });
+  assert.equal(beaconSent, true, "Buggy handler sends beacon even on tab switch");
+});
+
+test("pagehide: fixed handler skips shutdown beacon on tab switch (persisted=true)", () => {
+  const beaconSent = fixedPageHideHandler({ persisted: true });
+  assert.equal(beaconSent, false, "Fixed handler must NOT send beacon on tab switch");
+});
+
+test("pagehide: fixed handler still sends shutdown beacon on real page unload (persisted=false)", () => {
+  const beaconSent = fixedPageHideHandler({ persisted: false });
+  assert.equal(beaconSent, true, "Fixed handler must send beacon on real unload");
+});
+
+// ---------------------------------------------------------------------------
+// Test 2: Project switching must NOT destroy PTY sessions
+// ---------------------------------------------------------------------------
+// The bug: ProjectStoreManager.switchProject() changes the active store,
+// which causes React to unmount the entire WorkspaceChrome tree (including
+// ShellTerminal). The PTY processes survive server-side, but the client
+// loses all xterm state and SSE connections.  When the user switches back,
+// a NEW terminal is created instead of reconnecting to the existing one.
+
+/**
+ * Mirrors the session-id generation logic used by ShellTerminal.
+ * The BUGGY version generates a project-agnostic session ID, so switching
+ * projects and switching back creates a collision or a fresh session.
+ *
+ * The FIXED version namespaces session IDs by project so switching back
+ * reconnects to the same server-side PTY session via its stable ID.
+ */
+
+interface TerminalSessionTracker {
+  /** Active PTY session IDs on the server (survives client unmount) */
+  serverSessions: Map<string, { alive: boolean; projectCwd: string }>;
+  /** Client-side session IDs (destroyed on unmount) */
+  clientSessions: Set<string>;
+}
+
+function createTracker(): TerminalSessionTracker {
+  return {
+    serverSessions: new Map(),
+    clientSessions: new Set(),
+  };
+}
+
+/**
+ * Simulates what happens when ShellTerminal mounts for a project.
+ * The BUGGY version uses a plain default ID with no project namespace.
+ */
+function buggyMountTerminal(tracker: TerminalSessionTracker, _projectCwd: string): string {
+  const sessionId = "default"; // No project namespace — always the same ID
+  tracker.serverSessions.set(sessionId, { alive: true, projectCwd: _projectCwd });
+  tracker.clientSessions.add(sessionId);
+  return sessionId;
+}
+
+/**
+ * Simulates what happens when ShellTerminal unmounts (project switch).
+ * Client-side state is destroyed but server session stays alive.
+ */
+function unmountTerminal(tracker: TerminalSessionTracker, sessionId: string): void {
+  tracker.clientSessions.delete(sessionId);
+  // Server session stays alive — this is the correct behavior
+}
+
+/**
+ * FIXED mount: uses a project-scoped session ID so switching back to
+ * a project reconnects to the same server-side PTY.
+ */
+function fixedMountTerminal(tracker: TerminalSessionTracker, projectCwd: string): string {
+  const sessionId = `shell:${projectCwd}:default`;
+  // getOrCreateSession on the server: if alive, returns existing; if dead, creates new
+  if (!tracker.serverSessions.has(sessionId) || !tracker.serverSessions.get(sessionId)!.alive) {
+    tracker.serverSessions.set(sessionId, { alive: true, projectCwd });
+  }
+  tracker.clientSessions.add(sessionId);
+  return sessionId;
+}
+
+test("project switch: buggy flow reuses same session ID for different projects", () => {
+  const tracker = createTracker();
+
+  // Mount terminal for project A
+  const sessionA = buggyMountTerminal(tracker, "/projects/alpha");
+  assert.equal(sessionA, "default");
+  assert.equal(tracker.serverSessions.get("default")?.projectCwd, "/projects/alpha");
+
+  // Switch to project B — unmount A, mount B
+  unmountTerminal(tracker, sessionA);
+  const sessionB = buggyMountTerminal(tracker, "/projects/beta");
+
+  // Bug: same session ID, but now points to a different project
+  assert.equal(sessionB, "default");
+  assert.equal(
+    tracker.serverSessions.get("default")?.projectCwd,
+    "/projects/beta",
+    "Buggy: server session is overwritten with new project",
+  );
+});
+
+test("project switch: fixed flow preserves per-project session identity", () => {
+  const tracker = createTracker();
+
+  // Mount terminal for project A
+  const sessionA = fixedMountTerminal(tracker, "/projects/alpha");
+  assert.ok(sessionA.includes("/projects/alpha"), "Session ID includes project path");
+
+  // Switch to project B — unmount A, mount B
+  unmountTerminal(tracker, sessionA);
+  const sessionB = fixedMountTerminal(tracker, "/projects/beta");
+
+  // Session IDs are different — no collision
+  assert.notEqual(sessionA, sessionB, "Different projects get different session IDs");
+
+  // Both server sessions exist independently
+  assert.equal(tracker.serverSessions.get(sessionA)?.alive, true);
+  assert.equal(tracker.serverSessions.get(sessionB)?.alive, true);
+
+  // Switch back to project A — should reconnect to same session
+  unmountTerminal(tracker, sessionB);
+  const sessionA2 = fixedMountTerminal(tracker, "/projects/alpha");
+  assert.equal(sessionA2, sessionA, "Switching back reconnects to the same session ID");
+  assert.equal(tracker.serverSessions.get(sessionA)?.alive, true, "Original server session is still alive");
+});
+
+// ---------------------------------------------------------------------------
+// Test 3: Shutdown gate must differentiate tab-switch from real unload
+// ---------------------------------------------------------------------------
+// The shutdown gate has a 3s delay to allow page refreshes to cancel the
+// shutdown.  But on mobile tab switches that fire pagehide, the 3s timer
+// starts — and if the user doesn't switch back within 3s, the server dies.
+// The fix is to never start the timer on persisted pagehide events.
+
+interface ShutdownGateState {
+  timerScheduled: boolean;
+  shutdownExecuted: boolean;
+}
+
+function createShutdownGate(): ShutdownGateState {
+  return { timerScheduled: false, shutdownExecuted: false };
+}
+
+function scheduleShutdownIfAllowed(gate: ShutdownGateState, event: { persisted: boolean }): void {
+  // Fixed: only schedule shutdown when the page is truly unloading
+  if (event.persisted) return;
+  gate.timerScheduled = true;
+}
+
+function cancelShutdown(gate: ShutdownGateState): void {
+  gate.timerScheduled = false;
+}
+
+test("shutdown gate: tab switch (persisted=true) must not schedule shutdown", () => {
+  const gate = createShutdownGate();
+  scheduleShutdownIfAllowed(gate, { persisted: true });
+  assert.equal(gate.timerScheduled, false, "No shutdown timer on tab switch");
+});
+
+test("shutdown gate: real page unload (persisted=false) must schedule shutdown", () => {
+  const gate = createShutdownGate();
+  scheduleShutdownIfAllowed(gate, { persisted: false });
+  assert.equal(gate.timerScheduled, true, "Shutdown timer on real unload");
+});
+
+test("shutdown gate: scheduled shutdown can still be cancelled by page refresh", () => {
+  const gate = createShutdownGate();
+  scheduleShutdownIfAllowed(gate, { persisted: false });
+  assert.equal(gate.timerScheduled, true);
+  cancelShutdown(gate);
+  assert.equal(gate.timerScheduled, false, "Timer cancelled on refresh");
+});
+
+// ---------------------------------------------------------------------------
+// Test 4: Shell terminal session ID must be project-scoped
+// ---------------------------------------------------------------------------
+
+/**
+ * Mirrors the session ID derivation that ShellTerminal should use.
+ * The default session ID (when no sessionPrefix is given) must incorporate
+ * the project path so that different projects get different PTY sessions.
+ */
+function deriveSessionId(
+  projectCwd: string | undefined,
+  sessionPrefix?: string,
+  command?: string,
+): string {
+  const base = sessionPrefix ?? (command ? "gsd-default" : "default");
+  if (!projectCwd) return base;
+  // Stable hash-like key from the project path — keeps IDs short but unique
+  return `${base}:${projectCwd}`;
+}
+
+test("session ID derivation: different projects produce different IDs", () => {
+  const idA = deriveSessionId("/projects/alpha");
+  const idB = deriveSessionId("/projects/beta");
+  assert.notEqual(idA, idB);
+});
+
+test("session ID derivation: same project produces stable ID", () => {
+  const id1 = deriveSessionId("/projects/alpha");
+  const id2 = deriveSessionId("/projects/alpha");
+  assert.equal(id1, id2);
+});
+
+test("session ID derivation: explicit sessionPrefix is preserved with project scope", () => {
+  const id = deriveSessionId("/projects/alpha", "my-prefix");
+  assert.ok(id.includes("my-prefix"), "Prefix included");
+  assert.ok(id.includes("/projects/alpha"), "Project path included");
+});
+
+test("session ID derivation: command sessions are also project-scoped", () => {
+  const idA = deriveSessionId("/projects/alpha", undefined, "gsd");
+  const idB = deriveSessionId("/projects/beta", undefined, "gsd");
+  assert.notEqual(idA, idB);
+  assert.ok(idA.includes("gsd-default"), "Uses gsd-default base for command sessions");
+});
+
+test("session ID derivation: no projectCwd falls back to plain base ID", () => {
+  const id = deriveSessionId(undefined);
+  assert.equal(id, "default");
+});
diff --git a/src/tests/pty-chat-parser.test.ts b/src/tests/pty-chat-parser.test.ts
index 5ed060fb0..07e21b63b 100644
--- a/src/tests/pty-chat-parser.test.ts
+++ b/src/tests/pty-chat-parser.test.ts
@@ -19,3 +19,131 @@ test("PtyChatParser.flush emits a trailing partial line without waiting for a ne
   assert.equal(latest[0]?.role, "assistant");
   assert.equal(latest[0]?.content, "All slices are complete — nothing to discuss.\n");
 });
+
+// ─── Bug #2707: User messages omitted ────────────────────────────────────────
+
+test("user input echoed on the same prompt line is classified as role=user", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  // GSD prints assistant response, then prompt with user input on same line
+  parser.feed("Here is your task summary.\n");
+  parser.feed("❯ show status\n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 1, "should have exactly one user message");
+  assert.equal(userMsgs[0].content, "show status");
+});
+
+test("user input on a separate line after bare prompt is classified as role=user, not assistant", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  // GSD prints assistant text, then bare prompt on its own line
+  parser.feed("Done processing.\n");
+  parser.feed("❯ \n");
+  // User input appears on the next line (PTY echo without prompt prefix)
+  parser.feed("hello world\n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 1, "should have exactly one user message");
+  assert.equal(userMsgs[0].content, "hello world");
+
+  // The user input must NOT appear as assistant content
+  const assistantMsgs = latest.filter((m) => m.role === "assistant");
+  for (const msg of assistantMsgs) {
+    assert.ok(
+      !msg.content.includes("hello world"),
+      "user input must not be misclassified as assistant content",
+    );
+  }
+});
+
+test("multiple user turns: each user input after prompt is role=user", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  // Turn 1: assistant response, prompt, user input
+  parser.feed("Welcome to GSD.\n");
+  parser.feed("❯ \n");
+  parser.feed("discuss\n");
+
+  // Turn 2: assistant response, prompt, user input
+  parser.feed("Starting discussion mode.\n");
+  parser.feed("❯ \n");
+  parser.feed("plan my milestone\n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 2, "should have two user messages");
+  assert.equal(userMsgs[0].content, "discuss");
+  assert.equal(userMsgs[1].content, "plan my milestone");
+});
+
+test("awaitingInput is true after prompt line, false after user input arrives", () => {
+  const parser = new PtyChatParser("test");
+
+  parser.feed("Task complete.\n");
+  assert.equal(parser.isAwaitingInput(), false, "not awaiting input before prompt");
+
+  parser.feed("❯ \n");
+  assert.equal(parser.isAwaitingInput(), true, "awaiting input after bare prompt");
+
+  parser.feed("next command\n");
+  assert.equal(parser.isAwaitingInput(), false, "no longer awaiting after user input");
+});
+
+test("awaitingInput resets when assistant content follows user input", () => {
+  const parser = new PtyChatParser("test");
+
+  parser.feed("Hello.\n");
+  parser.feed("❯ \n");
+  assert.equal(parser.isAwaitingInput(), true);
+
+  parser.feed("do something\n");
+  assert.equal(parser.isAwaitingInput(), false);
+
+  // Assistant responds
+  parser.feed("Working on it...\n");
+  assert.equal(parser.isAwaitingInput(), false, "should stay false during assistant output");
+});
+
+// ─── Bug #2707: Chat looks stuck ────────────────────────────────────────────
+
+test("prompt with empty user text does not create a user message but signals awaiting input", () => {
+  const parser = new PtyChatParser("test");
+  let latest = parser.getMessages();
+  parser.onMessage(() => {
+    latest = parser.getMessages();
+  });
+
+  parser.feed("All done.\n");
+  parser.feed("❯ \n");
+
+  const userMsgs = latest.filter((m) => m.role === "user");
+  assert.equal(userMsgs.length, 0, "bare prompt should not create a user message");
+  assert.equal(parser.isAwaitingInput(), true, "parser should signal awaiting input");
+});
+
+test("alternate prompt markers (› and >) also trigger awaiting input", () => {
+  const parser = new PtyChatParser("test");
+
+  parser.feed("Response text.\n");
+  parser.feed("› \n");
+  assert.equal(parser.isAwaitingInput(), true, "› prompt should trigger awaiting input");
+
+  parser.feed("user reply\n");
+  assert.equal(parser.isAwaitingInput(), false);
+
+  parser.feed("More output.\n");
+  parser.feed("> \n");
+  assert.equal(parser.isAwaitingInput(), true, "> prompt should trigger awaiting input");
+});
diff --git a/src/tests/read-tool-offset-clamp.test.ts b/src/tests/read-tool-offset-clamp.test.ts
new file mode 100644
index 000000000..4dc4c5e78
--- /dev/null
+++ b/src/tests/read-tool-offset-clamp.test.ts
@@ -0,0 +1,106 @@
+/**
+ * Tests for read tool offset clamping (#3007).
+ *
+ * When offset exceeds file length, the read tool should clamp to the
+ * last line instead of throwing, preventing downstream JSON parse errors
+ * in auto-mode milestone completion.
+ */
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { createReadTool } from "../../packages/pi-coding-agent/src/core/tools/read.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function makeTmpDir(): { dir: string; cleanup: () => void } {
+	const dir = mkdtempSync(join(tmpdir(), "read-tool-test-"));
+	return { dir, cleanup: () => rmSync(dir, { recursive: true, force: true }) };
+}
+
+function writeLines(dir: string, name: string, lineCount: number): string {
+	const lines = Array.from({ length: lineCount }, (_, i) => `Line ${i + 1}: content`);
+	const filePath = join(dir, name);
+	writeFileSync(filePath, lines.join("\n"));
+	return filePath;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Offset beyond file bounds — should clamp, not throw (#3007)
+// ═══════════════════════════════════════════════════════════════════════════
+
+test("read tool: offset exceeding file length should NOT throw (#3007)", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "small-artifact.md", 13);
+
+	const readTool = createReadTool(dir);
+
+	// offset 30 on a 13-line file — exact reproduction of #3007
+	const result = await readTool.execute("test-call", {
+		path: "small-artifact.md",
+		offset: 30,
+	});
+
+	assert.ok(result, "should return a result, not throw");
+	assert.ok(result.content, "should have content");
+	assert.ok(result.content.length > 0, "should have at least one content block");
+
+	const text = (result.content[0] as any).text as string;
+	assert.ok(typeof text === "string", "first content block should be text");
+	// Should include the last line of the file (clamped)
+	assert.ok(text.includes("Line 13"), "should include last line of file after clamping");
+});
+
+test("read tool: offset 100 on a 5-line file clamps to last line", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "tiny-file.txt", 5);
+
+	const readTool = createReadTool(dir);
+	const result = await readTool.execute("test-call", {
+		path: "tiny-file.txt",
+		offset: 100,
+	});
+
+	const text = (result.content[0] as any).text as string;
+	assert.ok(text.includes("Line 5"), "should include the last line of the file");
+});
+
+test("read tool: offset at exact last line works normally", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "exact-offset.txt", 5);
+
+	const readTool = createReadTool(dir);
+	// offset 5 on a 5-line file — should return line 5 (valid, no clamping needed)
+	const result = await readTool.execute("test-call", {
+		path: "exact-offset.txt",
+		offset: 5,
+	});
+
+	const text = (result.content[0] as any).text as string;
+	assert.ok(text.includes("Line 5"), "should include line 5");
+});
+
+test("read tool: clamped offset includes notice about adjustment", async (t) => {
+	const { dir, cleanup } = makeTmpDir();
+	t.after(cleanup);
+	writeLines(dir, "notice-test.md", 10);
+
+	const readTool = createReadTool(dir);
+	const result = await readTool.execute("test-call", {
+		path: "notice-test.md",
+		offset: 50,
+	});
+
+	const text = (result.content[0] as any).text as string;
+	// Should contain some notice that the offset was adjusted
+	assert.ok(
+		text.includes("clamped") || text.includes("adjusted") || text.includes("beyond"),
+		`should indicate offset was clamped, got: ${text.slice(0, 200)}`,
+	);
+});
diff --git a/src/tests/tui-non-tty-render-loop.test.ts b/src/tests/tui-non-tty-render-loop.test.ts
new file mode 100644
index 000000000..2e6e4677d
--- /dev/null
+++ b/src/tests/tui-non-tty-render-loop.test.ts
@@ -0,0 +1,143 @@
+/**
+ * Test: RPC bridge TUI render loop must not burn CPU on non-TTY stdout.
+ *
+ * When gsd is spawned as an RPC bridge child process, stdout is a pipe
+ * (process.stdout.isTTY === undefined). The TUI render loop must not
+ * start in that scenario — otherwise it runs at ~4,600 renders/second
+ * consuming 500%+ CPU doing nothing useful.
+ *
+ * Regression test for: https://github.com/gsd-build/gsd-2/issues/3095
+ */
+import { describe, it, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import { ProcessTerminal } from "@gsd/pi-tui";
+import { TUI } from "@gsd/pi-tui";
+import type { Terminal } from "@gsd/pi-tui";
+
+/**
+ * A mock terminal that tracks writes and render activity.
+ * Simulates a non-TTY environment (isTTY = false).
+ */
+class MockNonTTYTerminal implements Terminal {
+  public started = false;
+  public writeCount = 0;
+  public writtenData: string[] = [];
+  private _onInput?: (data: string) => void;
+  private _onResize?: () => void;
+
+  /** Simulates non-TTY stdout */
+  readonly isTTY = false;
+
+  start(onInput: (data: string) => void, onResize: () => void): void {
+    this.started = true;
+    this._onInput = onInput;
+    this._onResize = onResize;
+  }
+
+  stop(): void {
+    this.started = false;
+  }
+
+  async drainInput(_maxMs?: number, _idleMs?: number): Promise<void> {}
+
+  write(data: string): void {
+    this.writeCount++;
+    this.writtenData.push(data);
+  }
+
+  get columns(): number { return 80; }
+  get rows(): number { return 24; }
+  get kittyProtocolActive(): boolean { return false; }
+
+  moveBy(_lines: number): void {}
+  hideCursor(): void {}
+  showCursor(): void {}
+  clearLine(): void {}
+  clearFromCursor(): void {}
+  clearScreen(): void {}
+  setTitle(_title: string): void {}
+}
+
+/**
+ * A mock terminal that behaves like a real TTY.
+ */
+class MockTTYTerminal extends MockNonTTYTerminal {
+  override readonly isTTY = true as const;
+}
+
+describe("TUI non-TTY render loop guard (issue #3095)", () => {
+  it("ProcessTerminal.start() should be a no-op when stdout is not a TTY", () => {
+    // ProcessTerminal.start() accesses process.stdout directly.
+    // We verify it exposes isTTY so callers can check before starting.
+    const terminal = new ProcessTerminal();
+    // ProcessTerminal.isTTY should reflect process.stdout.isTTY
+    assert.equal(
+      typeof terminal.isTTY,
+      "boolean",
+      "ProcessTerminal must expose an isTTY property"
+    );
+  });
+
+  it("TUI.start() must not render when terminal.isTTY is false", async () => {
+    const terminal = new MockNonTTYTerminal();
+    const tui = new TUI(terminal);
+
+    tui.start();
+
+    // Wait for any nextTick-scheduled renders to fire
+    await new Promise<void>((resolve) => setTimeout(resolve, 50));
+
+    // The TUI should NOT have produced any render output on a non-TTY terminal
+    assert.equal(
+      terminal.writeCount,
+      0,
+      `TUI rendered ${terminal.writeCount} times on non-TTY stdout — ` +
+      `this would cause the CPU burn described in #3095. ` +
+      `Expected 0 writes when isTTY is false.`
+    );
+
+    // Clean up
+    tui.stop();
+  });
+
+  it("TUI.start() renders normally when terminal.isTTY is true", async () => {
+    const terminal = new MockTTYTerminal();
+    const tui = new TUI(terminal);
+
+    tui.start();
+
+    // Wait for nextTick-scheduled render
+    await new Promise<void>((resolve) => setTimeout(resolve, 50));
+
+    // On a TTY terminal, at least one render should have occurred
+    assert.ok(
+      terminal.writeCount > 0,
+      "TUI should render at least once on a TTY terminal"
+    );
+
+    tui.stop();
+  });
+
+  it("requestRender() must be a no-op when terminal.isTTY is false", async () => {
+    const terminal = new MockNonTTYTerminal();
+    const tui = new TUI(terminal);
+
+    tui.start();
+
+    // Force multiple render requests
+    tui.requestRender();
+    tui.requestRender();
+    tui.requestRender();
+
+    // Wait for any scheduled renders
+    await new Promise<void>((resolve) => setTimeout(resolve, 50));
+
+    assert.equal(
+      terminal.writeCount,
+      0,
+      "requestRender() must not write to non-TTY stdout"
+    );
+
+    tui.stop();
+  });
+});
diff --git a/src/web-mode.ts b/src/web-mode.ts
index 42683a667..665e0f5a8 100644
--- a/src/web-mode.ts
+++ b/src/web-mode.ts
@@ -14,7 +14,7 @@ const DEFAULT_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '.
 function openBrowser(url: string): void {
   if (process.platform === 'win32') {
     // PowerShell's Start-Process handles URLs with '&' safely; cmd /c start does not.
-    execFile('powershell', ['-c', `Start-Process '${url.replace(/'/g, "''")}'`], () => {})
+    execFile('powershell', ['-c', `Start-Process '${url.replace(/'/g, "''")}'`], { windowsHide: true }, () => {})
   } else {
     const cmd = process.platform === 'darwin' ? 'open' : 'xdg-open'
     execFile(cmd, [url], () => {})
@@ -635,6 +635,7 @@ export async function launchWebMode(
       cwd: spawnSpec.cwd,
       detached: true,
       stdio: 'ignore',
+      windowsHide: true,
       env,
     },
   )
diff --git a/src/web/auto-dashboard-service.ts b/src/web/auto-dashboard-service.ts
index 31afe3ef8..972c7474f 100644
--- a/src/web/auto-dashboard-service.ts
+++ b/src/web/auto-dashboard-service.ts
@@ -1,5 +1,5 @@
 import { execFile } from "node:child_process";
-import { existsSync } from "node:fs";
+import { existsSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { pathToFileURL } from "node:url";
 
@@ -42,6 +42,64 @@ export function collectTestOnlyFallbackAutoDashboardData(): AutoDashboardData {
   return fallbackAutoDashboardData();
 }
 
+/**
+ * Check if a PID is alive by sending signal 0.
+ */
+function isPidAlive(pid: number): boolean {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Reconcile subprocess auto dashboard data with on-disk session state.
+ *
+ * The subprocess always starts with fresh module state (s.active === false),
+ * so it can never report active/paused correctly. We check:
+ *   1. .gsd/auto.lock — if present and its PID is alive, auto IS running.
+ *   2. .gsd/runtime/paused-session.json — if present, auto IS paused.
+ *
+ * See #2705.
+ */
+function reconcileWithDiskState(
+  data: AutoDashboardData,
+  projectCwd: string,
+  checkExists: (path: string) => boolean,
+): AutoDashboardData {
+  // If the subprocess already reports active or paused, trust it.
+  if (data.active || data.paused) return data;
+
+  // Check for paused-session.json first (paused takes precedence).
+  const pausedPath = join(projectCwd, ".gsd", "runtime", "paused-session.json");
+  if (checkExists(pausedPath)) {
+    try {
+      // Validate the file is readable JSON (not corrupt).
+      JSON.parse(readFileSync(pausedPath, "utf-8"));
+      return { ...data, paused: true };
+    } catch {
+      // Corrupt or unreadable — ignore.
+    }
+  }
+
+  // Check for session lock with a live PID.
+  const lockPath = join(projectCwd, ".gsd", "auto.lock");
+  if (checkExists(lockPath)) {
+    try {
+      const lockData = JSON.parse(readFileSync(lockPath, "utf-8")) as { pid?: number };
+      if (typeof lockData.pid === "number" && isPidAlive(lockData.pid)) {
+        return { ...data, active: true };
+      }
+    } catch {
+      // Corrupt or unreadable — ignore.
+    }
+  }
+
+  return data;
+}
+
 export async function collectAuthoritativeAutoDashboardData(
   packageRoot: string,
   options: AutoDashboardServiceOptions = {},
@@ -95,6 +153,7 @@ export async function collectAuthoritativeAutoDashboardData(
           [AUTO_DASHBOARD_MODULE_ENV]: autoModulePath,
         },
         maxBuffer: AUTO_DASHBOARD_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -103,7 +162,12 @@ export async function collectAuthoritativeAutoDashboardData(
         }
 
         try {
-          resolveResult(JSON.parse(stdout) as AutoDashboardData);
+          const parsed = JSON.parse(stdout) as AutoDashboardData;
+          const projectCwd = env.GSD_WEB_PROJECT_CWD || "";
+          const reconciled = projectCwd
+            ? reconcileWithDiskState(parsed, projectCwd, checkExists)
+            : parsed;
+          resolveResult(reconciled);
         } catch (parseError) {
           reject(
             new Error(
diff --git a/src/web/bridge-service.ts b/src/web/bridge-service.ts
index f1faac3aa..2f8a4f212 100644
--- a/src/web/bridge-service.ts
+++ b/src/web/bridge-service.ts
@@ -659,6 +659,7 @@ export type BridgeLiveStateDomain = "auto" | "workspace" | "recovery" | "resumab
 export type BridgeLiveStateInvalidationSource = "bridge_event" | "rpc_command" | "session_manage";
 export type BridgeLiveStateInvalidationReason =
   | "agent_end"
+  | "turn_end"
   | "auto_retry_start"
   | "auto_retry_end"
   | "auto_compaction_start"
@@ -771,6 +772,7 @@ async function loadSessionBrowserSessionsViaChildProcess(config: BridgeRuntimeCo
           GSD_SESSION_BROWSER_DIR: config.projectSessionsDir,
         },
         maxBuffer: 1024 * 1024,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -832,6 +834,7 @@ async function appendSessionInfoViaChildProcess(
           GSD_TARGET_SESSION_NAME: name,
         },
         maxBuffer: 1024 * 1024,
+        windowsHide: true,
       },
       (error, _stdout, stderr) => {
         if (error) {
@@ -1030,6 +1033,7 @@ async function loadWorkspaceIndexViaChildProcess(basePath: string, packageRoot:
           GSD_WORKSPACE_BASE: basePath,
         },
         maxBuffer: 1024 * 1024,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -1251,6 +1255,13 @@ function createLiveStateInvalidationFromBridgeEvent(
         domains: ["auto", "workspace", "recovery"],
         workspaceIndexCacheInvalidated: true,
       };
+    case "turn_end":
+      return {
+        reason: "turn_end",
+        source: "bridge_event",
+        domains: ["workspace"],
+        workspaceIndexCacheInvalidated: true,
+      };
     case "auto_retry_start":
       return {
         reason: "auto_retry_start",
@@ -1616,6 +1627,7 @@ export class BridgeService {
       cwd: cliEntry.cwd,
       env: childEnv,
       stdio: ["pipe", "pipe", "pipe"],
+      windowsHide: true,
     }) as SpawnedRpcChild;
 
     this.process = child;
@@ -1771,6 +1783,7 @@ export class BridgeService {
       const eventType = (event as { type?: string }).type;
       if (
         eventType === "agent_end" ||
+        eventType === "turn_end" ||
         eventType === "auto_retry_start" ||
         eventType === "auto_retry_end" ||
         eventType === "auto_compaction_start" ||
diff --git a/src/web/captures-service.ts b/src/web/captures-service.ts
index 1f7cb1189..2a8b4c9b8 100644
--- a/src/web/captures-service.ts
+++ b/src/web/captures-service.ts
@@ -64,6 +64,7 @@ export async function collectCapturesData(projectCwdOverride?: string): Promise<
           GSD_CAPTURES_BASE: projectCwd,
         },
         maxBuffer: CAPTURES_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -136,6 +137,7 @@ export async function resolveCaptureAction(request: CaptureResolveRequest, proje
           GSD_CAPTURES_BASE: projectCwd,
         },
         maxBuffer: CAPTURES_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/cleanup-service.ts b/src/web/cleanup-service.ts
index 145201f31..2ef778a4e 100644
--- a/src/web/cleanup-service.ts
+++ b/src/web/cleanup-service.ts
@@ -78,6 +78,7 @@ export async function collectCleanupData(projectCwdOverride?: string): Promise<C
           GSD_CLEANUP_BASE: projectCwd,
         },
         maxBuffer: CLEANUP_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
@@ -170,6 +171,7 @@ export async function executeCleanup(
           GSD_CLEANUP_SNAPSHOTS: JSON.stringify(pruneSnapshots),
         },
         maxBuffer: CLEANUP_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/doctor-service.ts b/src/web/doctor-service.ts
index 8fac5b272..ec5bc4dac 100644
--- a/src/web/doctor-service.ts
+++ b/src/web/doctor-service.ts
@@ -41,6 +41,7 @@ function runDoctorChild(
           GSD_DOCTOR_SCOPE: scope ?? "",
         },
         maxBuffer: DOCTOR_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/export-service.ts b/src/web/export-service.ts
index 431f31473..002c98a94 100644
--- a/src/web/export-service.ts
+++ b/src/web/export-service.ts
@@ -74,6 +74,7 @@ export async function collectExportData(
           GSD_EXPORT_FORMAT: format,
         },
         maxBuffer: EXPORT_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/forensics-service.ts b/src/web/forensics-service.ts
index 445fa59e6..ac74855d6 100644
--- a/src/web/forensics-service.ts
+++ b/src/web/forensics-service.ts
@@ -94,6 +94,7 @@ export async function collectForensicsData(projectCwdOverride?: string): Promise
           GSD_FORENSICS_BASE: projectCwd,
         },
         maxBuffer: FORENSICS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/history-service.ts b/src/web/history-service.ts
index a2ee75c68..ac1808aa2 100644
--- a/src/web/history-service.ts
+++ b/src/web/history-service.ts
@@ -66,6 +66,7 @@ export async function collectHistoryData(projectCwdOverride?: string): Promise<H
           GSD_HISTORY_BASE: projectCwd,
         },
         maxBuffer: HISTORY_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/hooks-service.ts b/src/web/hooks-service.ts
index 9eeac1276..5eebcf4d9 100644
--- a/src/web/hooks-service.ts
+++ b/src/web/hooks-service.ts
@@ -66,6 +66,7 @@ export async function collectHooksData(projectCwdOverride?: string): Promise<Hoo
           [HOOKS_MODULE_ENV]: hooksModulePath,
         },
         maxBuffer: HOOKS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/recovery-diagnostics-service.ts b/src/web/recovery-diagnostics-service.ts
index ee5abeb92..cc9c8b9e8 100644
--- a/src/web/recovery-diagnostics-service.ts
+++ b/src/web/recovery-diagnostics-service.ts
@@ -491,6 +491,7 @@ async function collectRecoveryDiagnosticsChildPayload(
           GSD_RECOVERY_FORENSICS_MODULE: sessionForensicsModulePath,
         },
         maxBuffer: RECOVERY_DIAGNOSTICS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/settings-service.ts b/src/web/settings-service.ts
index 8e1b5c6ea..7a2a8df24 100644
--- a/src/web/settings-service.ts
+++ b/src/web/settings-service.ts
@@ -142,6 +142,7 @@ export async function collectSettingsData(projectCwdOverride?: string): Promise<
           GSD_SETTINGS_BASE: projectCwd,
         },
         maxBuffer: SETTINGS_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/skill-health-service.ts b/src/web/skill-health-service.ts
index 60834dc96..43d586884 100644
--- a/src/web/skill-health-service.ts
+++ b/src/web/skill-health-service.ts
@@ -61,6 +61,7 @@ export async function collectSkillHealthData(projectCwdOverride?: string): Promi
           GSD_SKILL_HEALTH_BASE: projectCwd,
         },
         maxBuffer: SKILL_HEALTH_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/undo-service.ts b/src/web/undo-service.ts
index ad339a359..2a218cc54 100644
--- a/src/web/undo-service.ts
+++ b/src/web/undo-service.ts
@@ -195,6 +195,7 @@ export async function executeUndo(projectCwdOverride?: string): Promise<UndoResu
           GSD_UNDO_BASE: projectCwd,
         },
         maxBuffer: UNDO_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/src/web/update-service.ts b/src/web/update-service.ts
index 1ec44aa1a..62c728161 100644
--- a/src/web/update-service.ts
+++ b/src/web/update-service.ts
@@ -73,6 +73,7 @@ export function triggerUpdate(targetVersion?: string): boolean {
     stdio: ["ignore", "ignore", "pipe"],
     // Detach so the child process is not killed if the parent exits
     detached: false,
+    windowsHide: true,
   })
 
   let stderr = ""
diff --git a/src/web/visualizer-service.ts b/src/web/visualizer-service.ts
index 93b1fcdd0..11a21e8f8 100644
--- a/src/web/visualizer-service.ts
+++ b/src/web/visualizer-service.ts
@@ -98,6 +98,7 @@ export async function collectVisualizerData(projectCwdOverride?: string): Promis
           GSD_VISUALIZER_BASE: projectCwd,
         },
         maxBuffer: VISUALIZER_MAX_BUFFER,
+        windowsHide: true,
       },
       (error, stdout, stderr) => {
         if (error) {
diff --git a/tsconfig.test.json b/tsconfig.test.json
index cdd2e38ab..d1fb9db80 100644
--- a/tsconfig.test.json
+++ b/tsconfig.test.json
@@ -4,6 +4,6 @@
     "declaration": false,
     "noEmit": false
   },
-  "include": ["src/tests/headless-cli-surface.test.ts", "src/headless-events.ts", "src/headless-types.ts"],
+  "include": ["src/tests/headless-cli-surface.test.ts", "src/tests/ensure-workspace-builds.test.ts", "src/headless-events.ts", "src/headless-types.ts", "src/tests/google-search-oauth-shape.test.ts", "src/tests/google-search-auth.repro.test.ts"],
   "exclude": []
 }
diff --git a/vscode-extension/CHANGELOG.md b/vscode-extension/CHANGELOG.md
index fd532537d..98266e301 100644
--- a/vscode-extension/CHANGELOG.md
+++ b/vscode-extension/CHANGELOG.md
@@ -1,24 +1,45 @@
 # Changelog
 
+## [0.3.0]
+
+### Added
+
+- **SCM provider** — "GSD Agent" appears in Source Control panel with accept/discard per-file diffs
+- **Change tracker** — captures original file content before agent modifications for diff and rollback
+- **Checkpoints** — automatic snapshots on each agent turn with restore capability
+- **Diagnostic bridge** — "Fix Problems in File" and "Fix All Problems" commands read VS Code diagnostics and send to agent
+- **Line-level decorations** — green/yellow highlights on agent-modified lines with gutter indicators
+- **Chat context injection** — auto-includes editor selection and file diagnostics when relevant
+- **Git integration** — commit agent changes, create branches, show diffs
+- **Approval modes** — auto-approve, ask (prompts before writes), plan-only (read-only)
+- **UI request handling** — agent questions, confirmations, and selections now show as VS Code dialogs instead of hanging
+- **Fix Errors button** — quick access to diagnostic fixing in sidebar Actions
+- **5 new settings** — `showProgressNotifications`, `activityFeedMaxItems`, `showContextWarning`, `contextWarningThreshold`, `approvalMode`
+
+### Changed
+
+- **Sidebar redesign** — compact card-based layout with collapsible sections, pill toggles, hidden empty data
+- **Workflow buttons** now route through Chat panel so responses are visible
+- **Slash completion** filtered to `/gsd` commands only
+- **Checkpoint labels** show timestamp + first action (e.g., "10:32 — Edit sidebar.ts")
+- **Session tree** supports ISO timestamp filenames (GSD's actual format)
+- **Session persistence** enabled (removed `--no-session` flag)
+- **Progress notifications** disabled by default (Chat panel provides inline progress)
+- **Sidebar reduced** from 6 panels to 3 (GSD Agent, Sessions, Activity)
+- **Settings section** starts collapsed by default
+
 ## [0.2.0]
 
 ### Added
 
-- **Activity feed** — real-time TreeView showing tool executions (Read, Write, Edit, Bash, Grep, Glob) with status icons, duration, and click-to-open
-- **Workflow controls** — sidebar buttons for Auto, Next, Quick Task, Capture, Status, and Fork that send `/gsd` slash commands
-- **Progress notifications** — VS Code notification with cancel button while the agent is working
-- **Context window indicator** — color-coded usage bar (green/yellow/red) in sidebar with configurable threshold warnings
-- **Session forking** — fork from any message via QuickPick using `get_fork_messages` and `fork` RPC commands
-- **Queue mode controls** — toggle steering and follow-up modes (all vs one-at-a-time) from the sidebar
-- **Enhanced conversation history** — tool call rendering, collapsible thinking blocks, search/filter, fork-from-here buttons
-- **Enhanced code lens** — Refactor, Find Bugs, and Generate Tests actions alongside Ask GSD
-- **4 new settings** — `showProgressNotifications`, `activityFeedMaxItems`, `showContextWarning`, `contextWarningThreshold`
-- **8 new commands** (33 total) — `clearActivity`, `forkSession`, `toggleSteeringMode`, `toggleFollowUpMode`, `refactorSymbol`, `findBugsSymbol`, `generateTestsSymbol`
-
-### Changed
-
-- Sidebar session table now shows steering and follow-up queue mode with clickable toggle badges
-- Token usage section includes context window usage bar when model context window is known
+- **Activity feed** — real-time TreeView showing tool executions with status icons, duration, and click-to-open
+- **Workflow controls** — sidebar buttons for Auto, Next, Quick Task, Capture
+- **Context window indicator** — color-coded usage bar in sidebar with threshold warnings
+- **Session forking** — fork from any message via QuickPick
+- **Queue mode controls** — toggle steering and follow-up modes from the sidebar
+- **Enhanced conversation history** — tool call rendering, collapsible thinking blocks, search/filter, fork-from-here
+- **Enhanced code lens** — Refactor, Find Bugs, and Generate Tests alongside Ask GSD
+- **8 new commands** (33 total)
 
 ## [0.1.0]
 
@@ -31,7 +52,7 @@ Initial release.
 - Bash terminal — pseudoterminal routing agent Bash tool output
 - Session tree — browse and switch between session files
 - Conversation history — webview panel with full chat log
-- Slash command completion — auto-complete for `/gsd` commands in editors
+- Slash command completion — auto-complete for `/gsd` commands
 - Code lens — "Ask GSD" above functions and classes in TS/JS/Python/Go/Rust
 - 25 commands with 6 keyboard shortcuts
 - Auto-start, auto-compaction, and code lens configuration
diff --git a/vscode-extension/README.md b/vscode-extension/README.md
index f0f249c43..899012880 100644
--- a/vscode-extension/README.md
+++ b/vscode-extension/README.md
@@ -1,88 +1,193 @@
 # GSD-2 — VS Code Extension
 
-Control the [GSD-2 coding agent](https://github.com/gsd-build/gsd-2) directly from VS Code. Run autonomous coding sessions, chat with `@gsd` in VS Code Chat, and monitor your agent from a sidebar dashboard — all without leaving the editor.
+Control the [GSD-2 coding agent](https://github.com/gsd-build/gsd-2) directly from VS Code. Run autonomous coding sessions, chat with `@gsd`, monitor agent activity in real-time, review and accept/reject changes, and manage your workflow — all without leaving the editor.
+
+![GSD Extension Overview](docs/images/overview.png)
 
 ## Requirements
 
-GSD must be installed before activating this extension:
-
-```bash
-npm install -g gsd-pi
-```
-
-Node.js ≥ 22.0.0 and Git are required.
-
-## Features
-
-### Sidebar Dashboard
-
-Click the GSD icon in the Activity Bar to open the agent dashboard. It shows:
-
-- Connection status (connected / disconnected)
-- Active model and provider
-- Thinking level
-- Token usage and session cost
-- Quick action buttons: Start, Stop, New Session, Compact, Abort
-
-### Chat Integration (`@gsd`)
-
-Use `@gsd` in VS Code Chat (`Ctrl+Shift+I`) to send messages to the agent:
-
-```
-@gsd refactor the auth module to use JWT
-@gsd /gsd auto
-@gsd what's the current milestone status?
-```
-
-### Commands
-
-All commands are accessible via `Ctrl+Shift+P`:
-
-| Command | Description |
-|---------|-------------|
-| **GSD: Start Agent** | Connect to the GSD agent |
-| **GSD: Stop Agent** | Disconnect the agent |
-| **GSD: New Session** | Start a fresh conversation |
-| **GSD: Send Message** | Send a message to the agent |
-| **GSD: Abort Current Operation** | Interrupt the current operation |
-| **GSD: Steer Agent** | Send a steering message mid-operation |
-| **GSD: Switch Model** | Pick a model from QuickPick |
-| **GSD: Cycle Model** | Rotate to the next configured model |
-| **GSD: Set Thinking Level** | Choose off / low / medium / high |
-| **GSD: Cycle Thinking Level** | Rotate through thinking levels |
-| **GSD: Compact Context** | Manually trigger context compaction |
-| **GSD: Export Conversation as HTML** | Save the session as HTML |
-| **GSD: Show Session Stats** | Display token usage and cost |
-| **GSD: Run Bash Command** | Execute a shell command via the agent |
-| **GSD: List Available Commands** | Browse and run GSD slash commands |
-
-### Keyboard Shortcuts
-
-| Shortcut | Command |
-|----------|---------|
-| `Ctrl+Shift+G Ctrl+Shift+N` | New Session |
-| `Ctrl+Shift+G Ctrl+Shift+M` | Cycle Model |
-| `Ctrl+Shift+G Ctrl+Shift+T` | Cycle Thinking Level |
-
-## Configuration
-
-| Setting | Default | Description |
-|---------|---------|-------------|
-| `gsd.binaryPath` | `"gsd"` | Path to the GSD binary if not on PATH |
-| `gsd.autoStart` | `false` | Start the agent automatically when the extension activates |
-| `gsd.autoCompaction` | `true` | Enable automatic context compaction |
+- **GSD-2** installed globally: `npm install -g gsd-pi`
+- **Node.js** >= 22.0.0
+- **Git** installed and on PATH
+- **VS Code** >= 1.95.0
 
 ## Quick Start
 
 1. Install GSD: `npm install -g gsd-pi`
 2. Install this extension
 3. Open a project folder in VS Code
-4. `Ctrl+Shift+P` → **GSD: Start Agent**
-5. Use `@gsd` in Chat or the sidebar to interact with the agent
+4. Click the **GSD icon** in the Activity Bar (left sidebar)
+5. Click **Start Agent** or run `Ctrl+Shift+P` > **GSD: Start Agent**
+6. Start chatting with `@gsd` in Chat or click **Auto** in the sidebar
+
+---
+
+## Features
+
+### Sidebar Dashboard
+
+Click the **GSD icon** in the Activity Bar. The compact header shows connection status, model, session, message count, thinking level, context usage bar, and cost — all in two lines. Sections (Workflow, Stats, Actions, Settings) are collapsible and remember their state.
+
+### Workflow Controls
+
+One-click buttons for GSD's core commands. All route through the Chat panel so you see the full response:
+
+| Button | What it does |
+|--------|-------------|
+| **Auto** | Start autonomous mode — research, plan, execute |
+| **Next** | Execute one unit of work, then pause |
+| **Quick** | Quick task without planning (opens input) |
+| **Capture** | Capture a thought for later triage |
+
+### Chat Integration (`@gsd`)
+
+Use `@gsd` in VS Code Chat (`Cmd+Shift+I`) to talk to the agent:
+
+```
+@gsd refactor the auth module to use JWT
+@gsd /gsd auto
+@gsd fix the errors in this file
+```
+
+- **Auto-starts** the agent if not running
+- **File context** via `#file` references
+- **Selection context** — automatically includes selected code
+- **Diagnostic context** — auto-includes errors/warnings when you mention "fix" or "error"
+- **Streaming** progress, file anchors, token usage footer
+
+### Source Control Integration
+
+Agent-modified files appear in a dedicated **"GSD Agent"** section of the Source Control panel:
+
+- **Click any file** to see a before/after diff in VS Code's native diff editor
+- **Accept** or **Discard** changes per-file via inline buttons
+- **Accept All** / **Discard All** via the SCM title bar
+- Gutter diff indicators (green/red bars) show exactly what changed
+
+### Line-Level Decorations
+
+When the agent modifies a file, you'll see:
+- **Green background** on newly added lines
+- **Yellow background** on modified lines
+- **Left border gutter indicator** on all agent-touched lines
+- **Hover** any decorated line to see "Modified by GSD Agent"
+
+### Checkpoints & Rollback
+
+Automatic checkpoints are created at the start of each agent turn. Use **Discard All** in the SCM panel to revert all agent changes to their original state, or discard individual files.
+
+### Activity Feed
+
+The **Activity** panel shows a real-time log of every tool the agent executes — Read, Write, Edit, Bash, Grep, Glob — with status icons (running/success/error), duration, and click-to-open for file operations.
+
+### Sessions
+
+The **Sessions** panel lists all past sessions for the current workspace. Click any session to switch to it. The current session is highlighted green. Sessions persist to disk automatically.
+
+### Diagnostic Integration
+
+- **Fix Errors** button in the sidebar reads the active file's diagnostics from the Problems panel and sends them to the agent
+- **Fix All Problems** (`Cmd+Shift+P` > GSD: Fix All Problems) collects errors/warnings across the workspace
+- Works automatically in chat — mention "fix" or "error" and diagnostics are included
+
+### Code Lens
+
+Four inline actions above every function and class (TS/JS/Python/Go/Rust):
+
+| Action | What it does |
+|--------|-------------|
+| **Ask GSD** | Explain the function/class |
+| **Refactor** | Improve clarity, performance, or structure |
+| **Find Bugs** | Review for bugs and edge cases |
+| **Tests** | Generate test coverage |
+
+### Git Integration
+
+- **Commit Agent Changes** — stages and commits modified files with your message
+- **Create Branch** — create a new branch for agent work
+- **Show Diff** — view git diff of agent changes
+
+### Approval Modes
+
+Control how much autonomy the agent has:
+
+| Mode | Behavior |
+|------|----------|
+| **Auto-approve** | Agent runs freely (default) |
+| **Ask** | Prompts before file writes and commands |
+| **Plan-only** | Read-only — agent can analyze but not modify |
+
+Change via Settings section or `Cmd+Shift+P` > **GSD: Select Approval Mode**.
+
+### Agent UI Requests
+
+When the agent needs input (questions, confirmations, selections), VS Code dialogs appear automatically — no more hanging on `ask_user_questions`.
+
+### Additional Features
+
+- **Conversation History** — full message viewer with tool calls, thinking blocks, search, and fork-from-here
+- **Slash Command Completion** — type `/` for auto-complete of `/gsd` commands
+- **File Decorations** — "G" badge on agent-modified files in the Explorer
+- **Bash Terminal** — dedicated terminal for agent shell output
+- **Context Window Warning** — notification when context exceeds threshold
+- **Progress Notifications** — optional notification with cancel button (off by default)
+
+---
+
+## All Commands
+
+| Command | Shortcut | Description |
+|---------|----------|-------------|
+| **GSD: Start Agent** | | Connect to the GSD agent |
+| **GSD: Stop Agent** | | Disconnect the agent |
+| **GSD: New Session** | `Cmd+Shift+G` `Cmd+Shift+N` | Start a fresh conversation |
+| **GSD: Send Message** | `Cmd+Shift+G` `Cmd+Shift+P` | Send a message to the agent |
+| **GSD: Abort** | `Cmd+Shift+G` `Cmd+Shift+A` | Interrupt the current operation |
+| **GSD: Steer Agent** | `Cmd+Shift+G` `Cmd+Shift+I` | Steering message mid-operation |
+| **GSD: Switch Model** | | Pick a model from QuickPick |
+| **GSD: Cycle Model** | `Cmd+Shift+G` `Cmd+Shift+M` | Rotate to the next model |
+| **GSD: Set Thinking Level** | | Choose off / low / medium / high |
+| **GSD: Cycle Thinking** | `Cmd+Shift+G` `Cmd+Shift+T` | Rotate through thinking levels |
+| **GSD: Compact Context** | | Trigger context compaction |
+| **GSD: Export HTML** | | Save session as HTML |
+| **GSD: Session Stats** | | Display token usage and cost |
+| **GSD: Run Bash** | | Execute a shell command |
+| **GSD: List Commands** | | Browse slash commands |
+| **GSD: Set Session Name** | | Rename current session |
+| **GSD: Copy Last Response** | | Copy to clipboard |
+| **GSD: Switch Session** | | Load a different session |
+| **GSD: Show History** | | Open conversation viewer |
+| **GSD: Fork Session** | | Fork from a previous message |
+| **GSD: Fix Problems in File** | | Send file diagnostics to agent |
+| **GSD: Fix All Problems** | | Send workspace errors to agent |
+| **GSD: Commit Agent Changes** | | Git commit modified files |
+| **GSD: Create Branch** | | Create branch for agent work |
+| **GSD: Show Agent Diff** | | View git diff |
+| **GSD: Accept All Changes** | | Accept all SCM changes |
+| **GSD: Discard All Changes** | | Revert all agent modifications |
+| **GSD: Select Approval Mode** | | Choose auto-approve/ask/plan-only |
+| **GSD: Cycle Approval Mode** | | Rotate through approval modes |
+| **GSD: Code Lens** actions | | Ask, Refactor, Find Bugs, Tests |
+
+> On Windows/Linux, replace `Cmd` with `Ctrl`.
+
+## Configuration
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `gsd.binaryPath` | `"gsd"` | Path to the GSD binary |
+| `gsd.autoStart` | `false` | Start agent on extension activation |
+| `gsd.autoCompaction` | `true` | Automatic context compaction |
+| `gsd.codeLens` | `true` | Code lens above functions/classes |
+| `gsd.showProgressNotifications` | `false` | Progress notification (off — Chat shows progress) |
+| `gsd.activityFeedMaxItems` | `100` | Max items in Activity feed |
+| `gsd.showContextWarning` | `true` | Warn when context exceeds threshold |
+| `gsd.contextWarningThreshold` | `80` | Context % that triggers warning |
+| `gsd.approvalMode` | `"auto-approve"` | Agent permission mode |
 
 ## How It Works
 
-The extension spawns `gsd --mode rpc` in the background and communicates over JSON-RPC via stdin/stdout. All RPC commands are supported, including streaming events for real-time sidebar updates.
+The extension spawns `gsd --mode rpc` and communicates over JSON-RPC via stdin/stdout. Agent events stream in real-time. The change tracker captures file state before modifications for SCM diffs and rollback. UI requests from the agent (questions, confirmations) are handled via VS Code dialogs.
 
 ## Links
 
diff --git a/vscode-extension/docs/images/overview.png b/vscode-extension/docs/images/overview.png
new file mode 100644
index 000000000..eafd6a1df
Binary files /dev/null and b/vscode-extension/docs/images/overview.png differ
diff --git a/vscode-extension/package.json b/vscode-extension/package.json
index 8ea2de271..2a2088fdf 100644
--- a/vscode-extension/package.json
+++ b/vscode-extension/package.json
@@ -3,7 +3,7 @@
   "displayName": "GSD-2",
   "description": "VS Code integration for the GSD-2 coding agent — sidebar dashboard, @gsd chat participant, activity feed, conversation history, code lens, session forking, slash command completion, workflow controls, and 33 commands",
   "publisher": "FluxLabs",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "icon": "logo.jpg",
   "license": "MIT",
   "repository": {
@@ -168,6 +168,67 @@
       {
         "command": "gsd.generateTestsSymbol",
         "title": "GSD: Generate Tests for Symbol"
+      },
+      {
+        "command": "gsd.acceptAllChanges",
+        "title": "GSD: Accept All Agent Changes",
+        "icon": "$(check-all)"
+      },
+      {
+        "command": "gsd.discardAllChanges",
+        "title": "GSD: Discard All Agent Changes",
+        "icon": "$(discard)"
+      },
+      {
+        "command": "gsd.acceptFileChanges",
+        "title": "Accept Changes",
+        "icon": "$(check)"
+      },
+      {
+        "command": "gsd.discardFileChanges",
+        "title": "Discard Changes",
+        "icon": "$(discard)"
+      },
+      {
+        "command": "gsd.restoreCheckpoint",
+        "title": "GSD: Restore Checkpoint"
+      },
+      {
+        "command": "gsd.fixProblemsInFile",
+        "title": "GSD: Fix Problems in File"
+      },
+      {
+        "command": "gsd.fixAllProblems",
+        "title": "GSD: Fix All Problems"
+      },
+      {
+        "command": "gsd.clearDiagnostics",
+        "title": "GSD: Clear Agent Diagnostics"
+      },
+      {
+        "command": "gsd.commitAgentChanges",
+        "title": "GSD: Commit Agent Changes"
+      },
+      {
+        "command": "gsd.createAgentBranch",
+        "title": "GSD: Create Branch for Agent Work"
+      },
+      {
+        "command": "gsd.showAgentDiff",
+        "title": "GSD: Show Agent Diff"
+      },
+      {
+        "command": "gsd.clearPlan",
+        "title": "GSD: Clear Plan View",
+        "icon": "$(clear-all)"
+      },
+      {
+        "command": "gsd.cycleApprovalMode",
+        "title": "GSD: Cycle Approval Mode"
+      },
+      {
+        "command": "gsd.selectApprovalMode",
+        "title": "GSD: Select Approval Mode"
       }
     ],
     "keybindings": [
@@ -240,6 +301,30 @@
           "when": "view == gsd-activity",
           "group": "navigation"
         }
+      ],
+      "scm/title": [
+        {
+          "command": "gsd.acceptAllChanges",
+          "group": "navigation",
+          "when": "scmProvider == gsd"
+        },
+        {
+          "command": "gsd.discardAllChanges",
+          "group": "navigation",
+          "when": "scmProvider == gsd"
+        }
+      ],
+      "scm/resourceState/context": [
+        {
+          "command": "gsd.acceptFileChanges",
+          "group": "inline",
+          "when": "scmProvider == gsd"
+        },
+        {
+          "command": "gsd.discardFileChanges",
+          "group": "inline",
+          "when": "scmProvider == gsd"
+        }
       ]
     },
     "chatParticipants": [
@@ -276,7 +361,7 @@
         },
         "gsd.showProgressNotifications": {
           "type": "boolean",
-          "default": true,
+          "default": false,
           "description": "Show progress notification while the agent is working"
         },
         "gsd.activityFeedMaxItems": {
@@ -297,6 +382,17 @@
           "minimum": 50,
           "maximum": 95,
           "description": "Context window usage percentage that triggers a warning"
+        },
+        "gsd.approvalMode": {
+          "type": "string",
+          "default": "auto-approve",
+          "enum": ["auto-approve", "ask", "plan-only"],
+          "enumDescriptions": [
+            "Agent runs freely without prompts",
+            "Prompt before file changes and commands",
+            "Read-only mode — agent can analyze but not modify"
+          ],
+          "description": "Approval mode for agent actions"
         }
       }
     }
diff --git a/vscode-extension/src/change-tracker.ts b/vscode-extension/src/change-tracker.ts
new file mode 100644
index 000000000..f10191d65
--- /dev/null
+++ b/vscode-extension/src/change-tracker.ts
@@ -0,0 +1,295 @@
+import * as vscode from "vscode";
+import * as fs from "node:fs";
+import type { GsdClient, AgentEvent } from "./gsd-client.js";
+
+export interface FileSnapshot {
+	uri: vscode.Uri;
+	originalContent: string;
+	timestamp: number;
+}
+
+export interface Checkpoint {
+	id: number;
+	label: string;
+	timestamp: number;
+	/** Map of file path → original content at checkpoint creation time */
+	snapshots: Map<string, string>;
+}
+
+/**
+ * Tracks file changes made by the GSD agent. Stores original file content
+ * before the agent modifies it, enabling diff views, SCM integration,
+ * and checkpoint/rollback functionality.
+ */
+export class GsdChangeTracker implements vscode.Disposable {
+	/** file path → original content (before first agent modification this session) */
+	private originals = new Map<string, string>();
+	/** Set of file paths modified in the current agent turn */
+	private currentTurnFiles = new Set<string>();
+	/** Ordered list of checkpoints */
+	private _checkpoints: Checkpoint[] = [];
+	private nextCheckpointId = 1;
+	/** toolUseId → file path for in-flight tool executions */
+	private pendingTools = new Map<string, string>();
+	/** Whether the current turn has been described in the checkpoint label */
+	private turnDescribed = false;
+
+	private readonly _onDidChange = new vscode.EventEmitter<string[]>();
+	/** Fires when the set of tracked files changes. Payload is array of changed file paths. */
+	readonly onDidChange = this._onDidChange.event;
+
+	private readonly _onCheckpointChange = new vscode.EventEmitter<void>();
+	readonly onCheckpointChange = this._onCheckpointChange.event;
+
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.disposables.push(this._onDidChange, this._onCheckpointChange);
+
+		this.disposables.push(
+			client.onEvent((evt) => this.handleEvent(evt)),
+			client.onConnectionChange((connected) => {
+				if (!connected) {
+					this.reset();
+				}
+			}),
+		);
+	}
+
+	/** All file paths that have been modified by the agent */
+	get modifiedFiles(): string[] {
+		return [...this.originals.keys()];
+	}
+
+	/** Get the original content of a file (before agent first modified it) */
+	getOriginal(filePath: string): string | undefined {
+		return this.originals.get(filePath);
+	}
+
+	/** Whether the tracker has any modifications */
+	get hasChanges(): boolean {
+		return this.originals.size > 0;
+	}
+
+	/** Current checkpoints (newest first) */
+	get checkpoints(): readonly Checkpoint[] {
+		return this._checkpoints;
+	}
+
+	/**
+	 * Discard agent changes to a single file — restore original content.
+	 * Returns true if the file was restored.
+	 */
+	async discardFile(filePath: string): Promise<boolean> {
+		const original = this.originals.get(filePath);
+		if (original === undefined) return false;
+
+		try {
+			await fs.promises.writeFile(filePath, original, "utf8");
+			this.originals.delete(filePath);
+			this._onDidChange.fire([filePath]);
+			return true;
+		} catch {
+			return false;
+		}
+	}
+
+	/**
+	 * Discard all agent changes — restore all files to their original state.
+	 */
+	async discardAll(): Promise<number> {
+		let count = 0;
+		const paths = [...this.originals.keys()];
+		for (const filePath of paths) {
+			if (await this.discardFile(filePath)) {
+				count++;
+			}
+		}
+		return count;
+	}
+
+	/**
+	 * Accept changes to a file — remove from tracking (keep the current content).
+	 */
+	acceptFile(filePath: string): void {
+		if (this.originals.delete(filePath)) {
+			this._onDidChange.fire([filePath]);
+		}
+	}
+
+	/**
+	 * Accept all changes — clear all tracking.
+	 */
+	acceptAll(): void {
+		const paths = [...this.originals.keys()];
+		this.originals.clear();
+		if (paths.length > 0) {
+			this._onDidChange.fire(paths);
+		}
+	}
+
+	/**
+	 * Restore all files to a checkpoint state.
+	 */
+	async restoreCheckpoint(checkpointId: number): Promise<number> {
+		const idx = this._checkpoints.findIndex((c) => c.id === checkpointId);
+		if (idx === -1) return 0;
+
+		const checkpoint = this._checkpoints[idx];
+		let count = 0;
+
+		for (const [filePath, content] of checkpoint.snapshots) {
+			try {
+				await fs.promises.writeFile(filePath, content, "utf8");
+				count++;
+			} catch {
+				// skip files that can't be restored
+			}
+		}
+
+		// Reset originals to the checkpoint state
+		this.originals = new Map(checkpoint.snapshots);
+
+		// Remove all checkpoints after this one
+		this._checkpoints = this._checkpoints.slice(0, idx);
+
+		this._onDidChange.fire([...checkpoint.snapshots.keys()]);
+		this._onCheckpointChange.fire();
+		return count;
+	}
+
+	/** Clear all tracking state */
+	reset(): void {
+		const paths = [...this.originals.keys()];
+		this.originals.clear();
+		this.currentTurnFiles.clear();
+		this.pendingTools.clear();
+		this._checkpoints = [];
+		this.nextCheckpointId = 1;
+		if (paths.length > 0) {
+			this._onDidChange.fire(paths);
+		}
+		this._onCheckpointChange.fire();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private handleEvent(evt: AgentEvent): void {
+		switch (evt.type) {
+			case "agent_start":
+				this.createCheckpoint();
+				this.currentTurnFiles.clear();
+				this.turnDescribed = false;
+				break;
+
+			case "tool_execution_start": {
+				const toolName = String(evt.toolName ?? "");
+				const toolInput = (evt.toolInput ?? {}) as Record<string, unknown>;
+				const toolUseId = String(evt.toolUseId ?? "");
+
+				// Update checkpoint label with first action description
+				if (!this.turnDescribed) {
+					this.turnDescribed = true;
+					this.updateLatestCheckpointLabel(describeAction(toolName, toolInput));
+				}
+
+				if (toolName !== "Write" && toolName !== "Edit") break;
+
+				const filePath = String(toolInput.file_path ?? toolInput.path ?? "");
+
+				if (!filePath) break;
+
+				// Store the original content before the agent modifies it
+				// Only capture on FIRST modification (don't overwrite)
+				if (!this.originals.has(filePath)) {
+					try {
+						if (fs.existsSync(filePath)) {
+							const content = fs.readFileSync(filePath, "utf8");
+							this.originals.set(filePath, content);
+						} else {
+							// File doesn't exist yet — original is "empty" (new file)
+							this.originals.set(filePath, "");
+						}
+					} catch {
+						// Can't read file, skip tracking
+					}
+				}
+
+				if (toolUseId) {
+					this.pendingTools.set(toolUseId, filePath);
+				}
+				break;
+			}
+
+			case "tool_execution_end": {
+				const toolUseId = String(evt.toolUseId ?? "");
+				const filePath = this.pendingTools.get(toolUseId);
+				if (filePath) {
+					this.pendingTools.delete(toolUseId);
+					this.currentTurnFiles.add(filePath);
+					this._onDidChange.fire([filePath]);
+				}
+				break;
+			}
+		}
+	}
+
+	private createCheckpoint(): void {
+		const now = Date.now();
+		const time = new Date(now).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+		const fileCount = this.originals.size;
+		const label = fileCount > 0
+			? `${time} (${fileCount} file${fileCount !== 1 ? "s" : ""} tracked)`
+			: `${time} (start)`;
+
+		const checkpoint: Checkpoint = {
+			id: this.nextCheckpointId++,
+			label,
+			timestamp: now,
+			snapshots: new Map(this.originals),
+		};
+		this._checkpoints.push(checkpoint);
+		this._onCheckpointChange.fire();
+	}
+
+	/**
+	 * Update the label of the latest checkpoint with a description
+	 * of the first action taken (called after first tool execution in a turn).
+	 */
+	private updateLatestCheckpointLabel(description: string): void {
+		if (this._checkpoints.length === 0) return;
+		const latest = this._checkpoints[this._checkpoints.length - 1];
+		const time = new Date(latest.timestamp).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+		latest.label = `${time} — ${description}`;
+		this._onCheckpointChange.fire();
+	}
+}
+
+function describeAction(toolName: string, input: Record<string, unknown>): string {
+	switch (toolName) {
+		case "Read": {
+			const p = String(input.file_path ?? input.path ?? "");
+			return `Read ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Write": {
+			const p = String(input.file_path ?? "");
+			return `Write ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Edit": {
+			const p = String(input.file_path ?? "");
+			return `Edit ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Bash":
+			return `$ ${String(input.command ?? "").slice(0, 40)}`;
+		case "Grep":
+			return `Grep: ${String(input.pattern ?? "").slice(0, 30)}`;
+		case "Glob":
+			return `Glob: ${String(input.pattern ?? "").slice(0, 30)}`;
+		default:
+			return toolName;
+	}
+}
diff --git a/vscode-extension/src/chat-participant.ts b/vscode-extension/src/chat-participant.ts
index 01647e1ad..6ba3e60e2 100644
--- a/vscode-extension/src/chat-participant.ts
+++ b/vscode-extension/src/chat-participant.ts
@@ -39,6 +39,21 @@ export function registerChatParticipant(
 			message = `${fileContext}\n\n${message}`;
 		}
 
+		// Auto-include editor selection if present and not already referenced
+		const selectionContext = getSelectionContext();
+		if (selectionContext) {
+			message = `${selectionContext}\n\n${message}`;
+		}
+
+		// Auto-include diagnostics for the active file if the prompt mentions "fix", "error", "problem", "warning"
+		const fixKeywords = /\b(fix|error|problem|warning|issue|bug|lint|diagnos)/i;
+		if (fixKeywords.test(message)) {
+			const diagContext = getActiveDiagnosticsContext();
+			if (diagContext) {
+				message = `${message}\n\n${diagContext}`;
+			}
+		}
+
 		// Track streaming state
 		let agentDone = false;
 		let totalInputTokens = 0;
@@ -281,3 +296,42 @@ function resolveFileUri(fp: string): vscode.Uri | null {
 		return null;
 	}
 }
+
+/**
+ * Get the current editor selection as context, if any text is selected.
+ */
+function getSelectionContext(): string | null {
+	const editor = vscode.window.activeTextEditor;
+	if (!editor || editor.selection.isEmpty) return null;
+
+	const selection = editor.document.getText(editor.selection);
+	if (!selection.trim()) return null;
+
+	const relativePath = vscode.workspace.asRelativePath(editor.document.uri);
+	const { start, end } = editor.selection;
+	return `Selected code in \`${relativePath}\` (lines ${start.line + 1}-${end.line + 1}):\n\`\`\`\n${selection}\n\`\`\``;
+}
+
+/**
+ * Get diagnostics (errors/warnings) for the active editor file.
+ */
+function getActiveDiagnosticsContext(): string | null {
+	const editor = vscode.window.activeTextEditor;
+	if (!editor) return null;
+
+	const diagnostics = vscode.languages.getDiagnostics(editor.document.uri);
+	const significant = diagnostics.filter(
+		(d) => d.severity === vscode.DiagnosticSeverity.Error || d.severity === vscode.DiagnosticSeverity.Warning,
+	);
+	if (significant.length === 0) return null;
+
+	const relativePath = vscode.workspace.asRelativePath(editor.document.uri);
+	const lines = [`Current diagnostics in \`${relativePath}\`:`];
+	for (const d of significant) {
+		const sev = d.severity === vscode.DiagnosticSeverity.Error ? "Error" : "Warning";
+		const line = d.range.start.line + 1;
+		const source = d.source ? ` [${d.source}]` : "";
+		lines.push(`- ${sev} (line ${line}): ${d.message}${source}`);
+	}
+	return lines.join("\n");
+}
diff --git a/vscode-extension/src/checkpoints.ts b/vscode-extension/src/checkpoints.ts
new file mode 100644
index 000000000..584c9011c
--- /dev/null
+++ b/vscode-extension/src/checkpoints.ts
@@ -0,0 +1,55 @@
+import * as vscode from "vscode";
+import type { GsdChangeTracker, Checkpoint } from "./change-tracker.js";
+
+/**
+ * TreeDataProvider that shows agent checkpoints (one per agent turn).
+ * Each checkpoint can be restored to revert all file changes since that point.
+ */
+export class GsdCheckpointProvider implements vscode.TreeDataProvider<Checkpoint>, vscode.Disposable {
+	public static readonly viewId = "gsd-checkpoints";
+
+	private readonly _onDidChangeTreeData = new vscode.EventEmitter<void>();
+	readonly onDidChangeTreeData = this._onDidChangeTreeData.event;
+
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly tracker: GsdChangeTracker) {
+		this.disposables.push(
+			this._onDidChangeTreeData,
+			tracker.onCheckpointChange(() => this._onDidChangeTreeData.fire()),
+		);
+	}
+
+	getTreeItem(checkpoint: Checkpoint): vscode.TreeItem {
+		const fileCount = checkpoint.snapshots.size;
+		const time = new Date(checkpoint.timestamp);
+		const timeStr = time.toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+
+		const item = new vscode.TreeItem(
+			checkpoint.label,
+			vscode.TreeItemCollapsibleState.None,
+		);
+		item.description = `${timeStr} (${fileCount} file${fileCount !== 1 ? "s" : ""})`;
+		item.iconPath = new vscode.ThemeIcon("history");
+		item.tooltip = `Checkpoint: ${checkpoint.label}\nTime: ${time.toLocaleString()}\nFiles tracked: ${fileCount}\n\nClick to restore to this point`;
+		item.contextValue = "checkpoint";
+		item.command = {
+			command: "gsd.restoreCheckpoint",
+			title: "Restore Checkpoint",
+			arguments: [checkpoint.id],
+		};
+
+		return item;
+	}
+
+	getChildren(): Checkpoint[] {
+		// Show newest first
+		return [...this.tracker.checkpoints].reverse();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
diff --git a/vscode-extension/src/diagnostics.ts b/vscode-extension/src/diagnostics.ts
new file mode 100644
index 000000000..cd25ccfee
--- /dev/null
+++ b/vscode-extension/src/diagnostics.ts
@@ -0,0 +1,142 @@
+import * as vscode from "vscode";
+import type { GsdClient } from "./gsd-client.js";
+
+/**
+ * Integrates with VS Code's diagnostic system:
+ * - Reads diagnostics (errors/warnings) from the Problems panel and sends them to the agent
+ * - Provides a DiagnosticCollection for the agent to surface its own findings
+ */
+export class GsdDiagnosticBridge implements vscode.Disposable {
+	private readonly collection: vscode.DiagnosticCollection;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.collection = vscode.languages.createDiagnosticCollection("gsd");
+		this.disposables.push(this.collection);
+	}
+
+	/**
+	 * Read all diagnostics for the active file and send them to the agent
+	 * as a "fix these problems" prompt.
+	 */
+	async fixProblemsInFile(): Promise<void> {
+		const editor = vscode.window.activeTextEditor;
+		if (!editor) {
+			vscode.window.showWarningMessage("No active file to fix.");
+			return;
+		}
+
+		const uri = editor.document.uri;
+		const diagnostics = vscode.languages.getDiagnostics(uri);
+
+		if (diagnostics.length === 0) {
+			vscode.window.showInformationMessage("No problems found in this file.");
+			return;
+		}
+
+		const fileName = vscode.workspace.asRelativePath(uri);
+		const problemText = formatDiagnostics(fileName, diagnostics);
+
+		const prompt = [
+			`Fix the following problems in \`${fileName}\`:`,
+			"",
+			problemText,
+			"",
+			"Fix all of these issues. Show me the changes.",
+		].join("\n");
+
+		await this.client.sendPrompt(prompt);
+	}
+
+	/**
+	 * Read all diagnostics across the workspace (errors only) and send
+	 * them to the agent as a "fix all errors" prompt.
+	 */
+	async fixAllProblems(): Promise<void> {
+		const allDiagnostics = vscode.languages.getDiagnostics();
+		const errorFiles: { fileName: string; diagnostics: vscode.Diagnostic[] }[] = [];
+
+		for (const [uri, diagnostics] of allDiagnostics) {
+			// Only include errors and warnings, skip hints/info
+			const significant = diagnostics.filter(
+				(d) => d.severity === vscode.DiagnosticSeverity.Error || d.severity === vscode.DiagnosticSeverity.Warning,
+			);
+			if (significant.length > 0) {
+				errorFiles.push({
+					fileName: vscode.workspace.asRelativePath(uri),
+					diagnostics: significant,
+				});
+			}
+		}
+
+		if (errorFiles.length === 0) {
+			vscode.window.showInformationMessage("No errors or warnings found in the workspace.");
+			return;
+		}
+
+		// Cap at 20 files to avoid overwhelming the agent
+		const capped = errorFiles.slice(0, 20);
+		const totalProblems = capped.reduce((sum, f) => sum + f.diagnostics.length, 0);
+
+		const sections = capped.map((f) => formatDiagnostics(f.fileName, f.diagnostics));
+
+		const prompt = [
+			`Fix the following ${totalProblems} problems across ${capped.length} file${capped.length > 1 ? "s" : ""}:`,
+			"",
+			...sections,
+			"",
+			"Fix all of these issues.",
+		].join("\n");
+
+		await this.client.sendPrompt(prompt);
+	}
+
+	/**
+	 * Add a GSD diagnostic (agent finding) to a file.
+	 * Can be used to surface agent review findings in the Problems panel.
+	 */
+	addFinding(
+		uri: vscode.Uri,
+		range: vscode.Range,
+		message: string,
+		severity: vscode.DiagnosticSeverity = vscode.DiagnosticSeverity.Warning,
+	): void {
+		const existing = this.collection.get(uri) ?? [];
+		const diagnostic = new vscode.Diagnostic(range, message, severity);
+		diagnostic.source = "GSD Agent";
+		this.collection.set(uri, [...existing, diagnostic]);
+	}
+
+	/** Clear all GSD diagnostics */
+	clearFindings(): void {
+		this.collection.clear();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+function formatDiagnostics(fileName: string, diagnostics: vscode.Diagnostic[]): string {
+	const lines = [`**${fileName}**`];
+	for (const d of diagnostics) {
+		const severity = severityLabel(d.severity);
+		const line = d.range.start.line + 1;
+		const col = d.range.start.character + 1;
+		const source = d.source ? ` [${d.source}]` : "";
+		lines.push(`  - ${severity} (line ${line}:${col}): ${d.message}${source}`);
+	}
+	return lines.join("\n");
+}
+
+function severityLabel(severity: vscode.DiagnosticSeverity): string {
+	switch (severity) {
+		case vscode.DiagnosticSeverity.Error: return "Error";
+		case vscode.DiagnosticSeverity.Warning: return "Warning";
+		case vscode.DiagnosticSeverity.Information: return "Info";
+		case vscode.DiagnosticSeverity.Hint: return "Hint";
+		default: return "Unknown";
+	}
+}
diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts
index d909c4e12..f5e494240 100644
--- a/vscode-extension/src/extension.ts
+++ b/vscode-extension/src/extension.ts
@@ -9,12 +9,24 @@ import { GsdConversationHistoryPanel } from "./conversation-history.js";
 import { GsdSlashCompletionProvider } from "./slash-completion.js";
 import { GsdCodeLensProvider } from "./code-lens.js";
 import { GsdActivityFeedProvider } from "./activity-feed.js";
+import { GsdChangeTracker } from "./change-tracker.js";
+import { GsdScmProvider } from "./scm-provider.js";
+import { GsdDiagnosticBridge } from "./diagnostics.js";
+import { GsdLineDecorationManager } from "./line-decorations.js";
+import { GsdGitIntegration } from "./git-integration.js";
+import { GsdPermissionManager } from "./permissions.js";
 
 let client: GsdClient | undefined;
 let sidebarProvider: GsdSidebarProvider | undefined;
 let fileDecorations: GsdFileDecorationProvider | undefined;
 let sessionTreeProvider: GsdSessionTreeProvider | undefined;
 let activityFeedProvider: GsdActivityFeedProvider | undefined;
+let changeTracker: GsdChangeTracker | undefined;
+let scmProvider: GsdScmProvider | undefined;
+let diagnosticBridge: GsdDiagnosticBridge | undefined;
+let lineDecorations: GsdLineDecorationManager | undefined;
+let gitIntegration: GsdGitIntegration | undefined;
+let permissionManager: GsdPermissionManager | undefined;
 
 function requireConnected(): boolean {
 	if (!client?.isConnected) {
@@ -128,6 +140,34 @@ export function activate(context: vscode.ExtensionContext): void {
 		vscode.window.registerTreeDataProvider(GsdActivityFeedProvider.viewId, activityFeedProvider),
 	);
 
+	// -- Change tracker & SCM provider -------------------------------------
+
+	changeTracker = new GsdChangeTracker(client);
+	context.subscriptions.push(changeTracker);
+
+	scmProvider = new GsdScmProvider(changeTracker, cwd);
+	context.subscriptions.push(scmProvider);
+
+	// -- Diagnostics -------------------------------------------------------
+
+	diagnosticBridge = new GsdDiagnosticBridge(client);
+	context.subscriptions.push(diagnosticBridge);
+
+	// -- Line-level decorations --------------------------------------------
+
+	lineDecorations = new GsdLineDecorationManager(changeTracker!);
+	context.subscriptions.push(lineDecorations);
+
+	// -- Git integration ---------------------------------------------------
+
+	gitIntegration = new GsdGitIntegration(changeTracker!, cwd);
+	context.subscriptions.push(gitIntegration);
+
+	// -- Permissions -------------------------------------------------------
+
+	permissionManager = new GsdPermissionManager(client);
+	context.subscriptions.push(permissionManager);
+
 	// -- Progress notifications --------------------------------------------
 
 	let currentProgress: { resolve: () => void } | undefined;
@@ -789,6 +829,135 @@ export function activate(context: vscode.ExtensionContext): void {
 		}),
 	);
 
+	// -- SCM commands -------------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.acceptAllChanges", () => {
+			changeTracker?.acceptAll();
+			vscode.window.showInformationMessage("All agent changes accepted.");
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.discardAllChanges", async () => {
+			if (!changeTracker?.hasChanges) {
+				vscode.window.showInformationMessage("No agent changes to discard.");
+				return;
+			}
+			const confirm = await vscode.window.showWarningMessage(
+				`Discard all agent changes (${changeTracker.modifiedFiles.length} files)?`,
+				{ modal: true },
+				"Discard",
+			);
+			if (confirm === "Discard") {
+				const count = await changeTracker.discardAll();
+				vscode.window.showInformationMessage(`Reverted ${count} file${count !== 1 ? "s" : ""}.`);
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.discardFileChanges", async (resourceState: vscode.SourceControlResourceState) => {
+			if (!changeTracker || !resourceState?.resourceUri) return;
+			const filePath = resourceState.resourceUri.fsPath;
+			const success = await changeTracker.discardFile(filePath);
+			if (success) {
+				vscode.window.showInformationMessage(`Reverted ${vscode.workspace.asRelativePath(filePath)}`);
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.acceptFileChanges", (resourceState: vscode.SourceControlResourceState) => {
+			if (!changeTracker || !resourceState?.resourceUri) return;
+			changeTracker.acceptFile(resourceState.resourceUri.fsPath);
+		}),
+	);
+
+	// -- Checkpoint commands ------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.restoreCheckpoint", async (checkpointId: number) => {
+			if (!changeTracker) return;
+			const checkpoint = changeTracker.checkpoints.find((c) => c.id === checkpointId);
+			if (!checkpoint) return;
+
+			const confirm = await vscode.window.showWarningMessage(
+				`Restore to "${checkpoint.label}"? This will revert files to their state at ${new Date(checkpoint.timestamp).toLocaleTimeString()}.`,
+				{ modal: true },
+				"Restore",
+			);
+			if (confirm === "Restore") {
+				const count = await changeTracker.restoreCheckpoint(checkpointId);
+				vscode.window.showInformationMessage(`Restored ${count} file${count !== 1 ? "s" : ""} to checkpoint.`);
+			}
+		}),
+	);
+
+	// -- Diagnostic commands ------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.fixProblemsInFile", async () => {
+			if (!requireConnected()) return;
+			try {
+				await diagnosticBridge!.fixProblemsInFile();
+			} catch (err) {
+				handleError(err, "Failed to fix problems");
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.fixAllProblems", async () => {
+			if (!requireConnected()) return;
+			try {
+				await diagnosticBridge!.fixAllProblems();
+			} catch (err) {
+				handleError(err, "Failed to fix problems");
+			}
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.clearDiagnostics", () => {
+			diagnosticBridge?.clearFindings();
+		}),
+	);
+
+	// -- Permission commands ------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.cycleApprovalMode", () => {
+			permissionManager?.cycleMode();
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.selectApprovalMode", () => {
+			permissionManager?.selectMode();
+		}),
+	);
+
+	// -- Git commands -------------------------------------------------------
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.commitAgentChanges", () => {
+			gitIntegration?.commitAgentChanges();
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.createAgentBranch", () => {
+			gitIntegration?.createAgentBranch();
+		}),
+	);
+
+	context.subscriptions.push(
+		vscode.commands.registerCommand("gsd.showAgentDiff", () => {
+			gitIntegration?.showAgentDiff();
+		}),
+	);
+
 	// -- Auto-start ---------------------------------------------------------
 
 	if (config.get<boolean>("autoStart", false)) {
@@ -802,9 +971,21 @@ export function deactivate(): void {
 	fileDecorations?.dispose();
 	sessionTreeProvider?.dispose();
 	activityFeedProvider?.dispose();
+	changeTracker?.dispose();
+	scmProvider?.dispose();
+	diagnosticBridge?.dispose();
+	lineDecorations?.dispose();
+	gitIntegration?.dispose();
+	permissionManager?.dispose();
 	client = undefined;
 	sidebarProvider = undefined;
 	fileDecorations = undefined;
 	sessionTreeProvider = undefined;
 	activityFeedProvider = undefined;
+	changeTracker = undefined;
+	scmProvider = undefined;
+	diagnosticBridge = undefined;
+	lineDecorations = undefined;
+	gitIntegration = undefined;
+	permissionManager = undefined;
 }
diff --git a/vscode-extension/src/git-integration.ts b/vscode-extension/src/git-integration.ts
new file mode 100644
index 000000000..dbec79dba
--- /dev/null
+++ b/vscode-extension/src/git-integration.ts
@@ -0,0 +1,122 @@
+import * as vscode from "vscode";
+import { exec } from "node:child_process";
+import type { GsdChangeTracker } from "./change-tracker.js";
+
+/**
+ * Provides git integration for agent changes — commit, branch, and diff.
+ */
+export class GsdGitIntegration implements vscode.Disposable {
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(
+		private readonly tracker: GsdChangeTracker,
+		private readonly cwd: string,
+	) {}
+
+	/**
+	 * Commit all files modified by the agent with a user-provided message.
+	 */
+	async commitAgentChanges(): Promise<void> {
+		const files = this.tracker.modifiedFiles;
+		if (files.length === 0) {
+			vscode.window.showInformationMessage("No agent changes to commit.");
+			return;
+		}
+
+		const defaultMsg = `feat: agent changes (${files.length} file${files.length !== 1 ? "s" : ""})`;
+		const message = await vscode.window.showInputBox({
+			prompt: "Commit message for agent changes",
+			value: defaultMsg,
+			placeHolder: "feat: describe the changes",
+		});
+		if (!message) return;
+
+		try {
+			// Stage the modified files
+			await this.git(`add ${files.map((f) => `"${f}"`).join(" ")}`);
+			// Commit
+			await this.git(`commit -m "${message.replace(/"/g, '\\"')}"`);
+
+			// Accept all changes (clear tracking since they're committed)
+			this.tracker.acceptAll();
+
+			vscode.window.showInformationMessage(`Committed ${files.length} file${files.length !== 1 ? "s" : ""}.`);
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			vscode.window.showErrorMessage(`Git commit failed: ${msg}`);
+		}
+	}
+
+	/**
+	 * Create a new branch for agent work and switch to it.
+	 */
+	async createAgentBranch(): Promise<void> {
+		const branchName = await vscode.window.showInputBox({
+			prompt: "Branch name for agent work",
+			placeHolder: "feat/agent-changes",
+			validateInput: (value) => {
+				if (!value.trim()) return "Branch name is required";
+				if (/\s/.test(value)) return "Branch name cannot contain spaces";
+				return null;
+			},
+		});
+		if (!branchName) return;
+
+		try {
+			await this.git(`checkout -b "${branchName}"`);
+			vscode.window.showInformationMessage(`Created and switched to branch: ${branchName}`);
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			vscode.window.showErrorMessage(`Failed to create branch: ${msg}`);
+		}
+	}
+
+	/**
+	 * Show a git diff of all agent-modified files.
+	 */
+	async showAgentDiff(): Promise<void> {
+		const files = this.tracker.modifiedFiles;
+		if (files.length === 0) {
+			vscode.window.showInformationMessage("No agent changes to diff.");
+			return;
+		}
+
+		try {
+			const diff = await this.git("diff");
+			if (!diff.trim()) {
+				// Files may be untracked — show status instead
+				const status = await this.git("status --short");
+				const channel = vscode.window.createOutputChannel("GSD Git Diff");
+				channel.appendLine("# Agent-modified files (unstaged):");
+				channel.appendLine(status);
+				channel.show();
+			} else {
+				const channel = vscode.window.createOutputChannel("GSD Git Diff");
+				channel.clear();
+				channel.appendLine(diff);
+				channel.show();
+			}
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			vscode.window.showErrorMessage(`Git diff failed: ${msg}`);
+		}
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private git(args: string): Promise<string> {
+		return new Promise((resolve, reject) => {
+			exec(`git ${args}`, { cwd: this.cwd, maxBuffer: 10 * 1024 * 1024 }, (err, stdout, stderr) => {
+				if (err) {
+					reject(new Error(stderr.trim() || err.message));
+				} else {
+					resolve(stdout);
+				}
+			});
+		});
+	}
+}
diff --git a/vscode-extension/src/gsd-client.ts b/vscode-extension/src/gsd-client.ts
index b8ae2bc35..b2a872c5e 100644
--- a/vscode-extension/src/gsd-client.ts
+++ b/vscode-extension/src/gsd-client.ts
@@ -123,11 +123,10 @@ export class GsdClient implements vscode.Disposable {
 			return;
 		}
 
-		const proc = spawn(this.binaryPath, ["--mode", "rpc", "--no-session"], {
+		const proc = spawn(this.binaryPath, ["--mode", "rpc"], {
 			cwd: this.cwd,
 			stdio: ["pipe", "pipe", "pipe"],
 			env: { ...process.env },
-			shell: process.platform === "win32",
 		});
 		this.process = proc;
 
@@ -580,10 +579,104 @@ export class GsdClient implements vscode.Disposable {
 			return;
 		}
 
+		// Extension UI request — agent needs user input
+		if (data.type === "extension_ui_request" && typeof data.id === "string") {
+			void this.handleUIRequest(data);
+			return;
+		}
+
 		// Streaming event
 		this._onEvent.fire(data as AgentEvent);
 	}
 
+	private async handleUIRequest(request: Record<string, unknown>): Promise<void> {
+		const id = request.id as string;
+		const method = request.method as string;
+
+		try {
+			switch (method) {
+				case "select": {
+					const options = (request.options as string[]) ?? [];
+					const title = String(request.title ?? "Select");
+					const allowMultiple = request.allowMultiple === true;
+
+					if (allowMultiple) {
+						const picked = await vscode.window.showQuickPick(options, {
+							title,
+							canPickMany: true,
+						});
+						if (picked) {
+							this.sendRaw({ type: "extension_ui_response", id, values: picked });
+						} else {
+							this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+						}
+					} else {
+						const picked = await vscode.window.showQuickPick(options, { title });
+						if (picked) {
+							this.sendRaw({ type: "extension_ui_response", id, value: picked });
+						} else {
+							this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+						}
+					}
+					break;
+				}
+
+				case "confirm": {
+					const title = String(request.title ?? "Confirm");
+					const message = String(request.message ?? "");
+					const result = await vscode.window.showInformationMessage(
+						`${title}: ${message}`,
+						{ modal: true },
+						"Yes",
+						"No",
+					);
+					this.sendRaw({ type: "extension_ui_response", id, confirmed: result === "Yes" });
+					break;
+				}
+
+				case "input": {
+					const title = String(request.title ?? "Input");
+					const placeholder = String(request.placeholder ?? "");
+					const value = await vscode.window.showInputBox({ title, placeHolder: placeholder });
+					if (value !== undefined) {
+						this.sendRaw({ type: "extension_ui_response", id, value });
+					} else {
+						this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+					}
+					break;
+				}
+
+				case "notify": {
+					const message = String(request.message ?? "");
+					const notifyType = String(request.notifyType ?? "info");
+					if (notifyType === "error") {
+						vscode.window.showErrorMessage(`GSD: ${message}`);
+					} else if (notifyType === "warning") {
+						vscode.window.showWarningMessage(`GSD: ${message}`);
+					} else {
+						vscode.window.showInformationMessage(`GSD: ${message}`);
+					}
+					// Notify doesn't need a response
+					break;
+				}
+
+				default:
+					// Unknown method — cancel to unblock the agent
+					this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+					break;
+			}
+		} catch {
+			// On error, cancel to unblock
+			this.sendRaw({ type: "extension_ui_response", id, cancelled: true });
+		}
+	}
+
+	private sendRaw(data: Record<string, unknown>): void {
+		if (this.process?.stdin) {
+			this.process.stdin.write(JSON.stringify(data) + "\n");
+		}
+	}
+
 	private send(command: Record<string, unknown>): Promise<RpcResponse> {
 		if (!this.process?.stdin) {
 			return Promise.reject(new Error("GSD client not started"));
diff --git a/vscode-extension/src/line-decorations.ts b/vscode-extension/src/line-decorations.ts
new file mode 100644
index 000000000..387986f79
--- /dev/null
+++ b/vscode-extension/src/line-decorations.ts
@@ -0,0 +1,130 @@
+import * as vscode from "vscode";
+import type { GsdChangeTracker } from "./change-tracker.js";
+
+/**
+ * Provides line-level editor decorations for files modified by the GSD agent.
+ * Shows subtle background highlights on changed lines and gutter icons.
+ */
+export class GsdLineDecorationManager implements vscode.Disposable {
+	private readonly addedDecoration: vscode.TextEditorDecorationType;
+	private readonly modifiedDecoration: vscode.TextEditorDecorationType;
+	private readonly gutterDecoration: vscode.TextEditorDecorationType;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly tracker: GsdChangeTracker) {
+		this.addedDecoration = vscode.window.createTextEditorDecorationType({
+			isWholeLine: true,
+			backgroundColor: "rgba(78, 201, 176, 0.07)",
+			overviewRulerColor: "rgba(78, 201, 176, 0.5)",
+			overviewRulerLane: vscode.OverviewRulerLane.Left,
+		});
+
+		this.modifiedDecoration = vscode.window.createTextEditorDecorationType({
+			isWholeLine: true,
+			backgroundColor: "rgba(204, 167, 0, 0.07)",
+			overviewRulerColor: "rgba(204, 167, 0, 0.5)",
+			overviewRulerLane: vscode.OverviewRulerLane.Left,
+		});
+
+		this.gutterDecoration = vscode.window.createTextEditorDecorationType({
+			gutterIconPath: new vscode.ThemeIcon("hubot").id, // fallback
+			gutterIconSize: "contain",
+			// Use a colored left border as a gutter indicator (more reliable than icons)
+			borderWidth: "0 0 0 3px",
+			borderStyle: "solid",
+			borderColor: "rgba(78, 201, 176, 0.4)",
+		});
+
+		this.disposables.push(
+			this.addedDecoration,
+			this.modifiedDecoration,
+			this.gutterDecoration,
+		);
+
+		// Refresh decorations when tracked files change
+		this.disposables.push(
+			tracker.onDidChange(() => this.refreshAll()),
+			vscode.window.onDidChangeActiveTextEditor(() => this.refreshAll()),
+			vscode.workspace.onDidChangeTextDocument((e) => {
+				const editor = vscode.window.activeTextEditor;
+				if (editor && e.document === editor.document) {
+					this.refreshEditor(editor);
+				}
+			}),
+		);
+	}
+
+	private refreshAll(): void {
+		for (const editor of vscode.window.visibleTextEditors) {
+			this.refreshEditor(editor);
+		}
+	}
+
+	private refreshEditor(editor: vscode.TextEditor): void {
+		const filePath = editor.document.uri.fsPath;
+		const original = this.tracker.getOriginal(filePath);
+
+		if (original === undefined) {
+			// No tracked changes for this file — clear decorations
+			editor.setDecorations(this.addedDecoration, []);
+			editor.setDecorations(this.modifiedDecoration, []);
+			editor.setDecorations(this.gutterDecoration, []);
+			return;
+		}
+
+		const currentLines = editor.document.getText().split("\n");
+		const originalLines = original.split("\n");
+		const { added, modified } = diffLines(originalLines, currentLines);
+
+		const addedRanges = added.map((line) => {
+			const range = new vscode.Range(line, 0, line, currentLines[line]?.length ?? 0);
+			return { range, hoverMessage: new vscode.MarkdownString("$(hubot) *Added by GSD Agent*") };
+		});
+
+		const modifiedRanges = modified.map((line) => {
+			const range = new vscode.Range(line, 0, line, currentLines[line]?.length ?? 0);
+			return { range, hoverMessage: new vscode.MarkdownString("$(hubot) *Modified by GSD Agent*") };
+		});
+
+		const gutterRanges = [...added, ...modified].map((line) => ({
+			range: new vscode.Range(line, 0, line, 0),
+		}));
+
+		editor.setDecorations(this.addedDecoration, addedRanges);
+		editor.setDecorations(this.modifiedDecoration, modifiedRanges);
+		editor.setDecorations(this.gutterDecoration, gutterRanges);
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+/**
+ * Simple line-level diff: compare original vs current line-by-line.
+ * Returns arrays of line numbers that were added or modified.
+ */
+function diffLines(
+	originalLines: string[],
+	currentLines: string[],
+): { added: number[]; modified: number[] } {
+	const added: number[] = [];
+	const modified: number[] = [];
+
+	const maxShared = Math.min(originalLines.length, currentLines.length);
+
+	for (let i = 0; i < maxShared; i++) {
+		if (originalLines[i] !== currentLines[i]) {
+			modified.push(i);
+		}
+	}
+
+	// Lines beyond original length are "added"
+	for (let i = originalLines.length; i < currentLines.length; i++) {
+		added.push(i);
+	}
+
+	return { added, modified };
+}
diff --git a/vscode-extension/src/permissions.ts b/vscode-extension/src/permissions.ts
new file mode 100644
index 000000000..32bcc9511
--- /dev/null
+++ b/vscode-extension/src/permissions.ts
@@ -0,0 +1,143 @@
+import * as vscode from "vscode";
+import type { GsdClient, AgentEvent } from "./gsd-client.js";
+
+type ApprovalMode = "ask" | "auto-approve" | "plan-only";
+
+/**
+ * Permission/approval system for agent actions.
+ * Can be configured to prompt before file writes, command execution, etc.
+ */
+export class GsdPermissionManager implements vscode.Disposable {
+	private _mode: ApprovalMode = "auto-approve";
+	private disposables: vscode.Disposable[] = [];
+
+	private readonly _onModeChange = new vscode.EventEmitter<ApprovalMode>();
+	readonly onModeChange = this._onModeChange.event;
+
+	constructor(private readonly client: GsdClient) {
+		// Load saved mode from configuration
+		this._mode = vscode.workspace.getConfiguration("gsd").get<ApprovalMode>("approvalMode", "auto-approve");
+
+		this.disposables.push(
+			this._onModeChange,
+			vscode.workspace.onDidChangeConfiguration((e) => {
+				if (e.affectsConfiguration("gsd.approvalMode")) {
+					this._mode = vscode.workspace.getConfiguration("gsd").get<ApprovalMode>("approvalMode", "auto-approve");
+					this._onModeChange.fire(this._mode);
+				}
+			}),
+		);
+
+		// If mode is "ask", intercept tool executions for write operations
+		if (this._mode === "ask") {
+			this.disposables.push(
+				client.onEvent((evt) => this.handleEvent(evt)),
+			);
+		}
+	}
+
+	get mode(): ApprovalMode {
+		return this._mode;
+	}
+
+	/**
+	 * Cycle through approval modes: auto-approve -> ask -> plan-only -> auto-approve
+	 */
+	async cycleMode(): Promise<void> {
+		const modes: ApprovalMode[] = ["auto-approve", "ask", "plan-only"];
+		const currentIdx = modes.indexOf(this._mode);
+		this._mode = modes[(currentIdx + 1) % modes.length];
+
+		await vscode.workspace.getConfiguration("gsd").update("approvalMode", this._mode, vscode.ConfigurationTarget.Workspace);
+		this._onModeChange.fire(this._mode);
+
+		const labels: Record<ApprovalMode, string> = {
+			"auto-approve": "Auto-Approve (agent runs freely)",
+			"ask": "Ask (prompt before file changes)",
+			"plan-only": "Plan Only (read-only, no writes)",
+		};
+		vscode.window.showInformationMessage(`Approval mode: ${labels[this._mode]}`);
+	}
+
+	/**
+	 * Show a QuickPick to select approval mode.
+	 */
+	async selectMode(): Promise<void> {
+		const items: (vscode.QuickPickItem & { mode: ApprovalMode })[] = [
+			{
+				label: "$(check) Auto-Approve",
+				description: "Agent runs freely without prompts",
+				detail: "Best for trusted workflows. The agent can read, write, and execute without asking.",
+				mode: "auto-approve",
+			},
+			{
+				label: "$(shield) Ask",
+				description: "Prompt before file changes",
+				detail: "The agent will ask for approval before writing or editing files.",
+				mode: "ask",
+			},
+			{
+				label: "$(eye) Plan Only",
+				description: "Read-only mode, no writes allowed",
+				detail: "The agent can read and analyze but cannot modify files or run commands.",
+				mode: "plan-only",
+			},
+		];
+
+		const selected = await vscode.window.showQuickPick(items, {
+			placeHolder: `Current mode: ${this._mode}`,
+		});
+
+		if (selected) {
+			this._mode = selected.mode;
+			await vscode.workspace.getConfiguration("gsd").update("approvalMode", this._mode, vscode.ConfigurationTarget.Workspace);
+			this._onModeChange.fire(this._mode);
+		}
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private async handleEvent(evt: AgentEvent): Promise<void> {
+		if (this._mode !== "ask") return;
+		if (evt.type !== "tool_execution_start") return;
+
+		const toolName = String(evt.toolName ?? "");
+		if (toolName !== "Write" && toolName !== "Edit" && toolName !== "Bash") return;
+
+		const toolInput = (evt.toolInput ?? {}) as Record<string, unknown>;
+		let description = "";
+
+		switch (toolName) {
+			case "Write":
+			case "Edit": {
+				const filePath = String(toolInput.file_path ?? "");
+				const shortPath = filePath.split(/[\\/]/).slice(-3).join("/");
+				description = `${toolName}: ${shortPath}`;
+				break;
+			}
+			case "Bash": {
+				const cmd = String(toolInput.command ?? "").slice(0, 80);
+				description = `Execute: ${cmd}`;
+				break;
+			}
+		}
+
+		// Note: In practice, the RPC protocol doesn't support blocking tool execution
+		// for approval. This notification serves as awareness — the user sees what's
+		// happening and can abort if needed. True blocking approval would require
+		// protocol changes in the RPC server.
+		vscode.window.showInformationMessage(
+			`Agent: ${description}`,
+			"OK",
+			"Abort",
+		).then((choice) => {
+			if (choice === "Abort") {
+				this.client.abort().catch(() => {});
+			}
+		});
+	}
+}
diff --git a/vscode-extension/src/plan-viewer.ts b/vscode-extension/src/plan-viewer.ts
new file mode 100644
index 000000000..a45b20978
--- /dev/null
+++ b/vscode-extension/src/plan-viewer.ts
@@ -0,0 +1,190 @@
+import * as vscode from "vscode";
+import type { GsdClient, AgentEvent } from "./gsd-client.js";
+
+interface PlanStep {
+	id: number;
+	tool: string;
+	description: string;
+	status: "pending" | "running" | "done" | "error";
+	timestamp: number;
+	duration?: number;
+}
+
+/**
+ * TreeDataProvider that shows a plan-like view of agent tool executions.
+ * Displays steps as they happen, showing what the agent is doing and
+ * what it has completed — a live execution plan.
+ */
+export class GsdPlanViewerProvider implements vscode.TreeDataProvider<PlanStep>, vscode.Disposable {
+	public static readonly viewId = "gsd-plan";
+
+	private readonly _onDidChangeTreeData = new vscode.EventEmitter<void>();
+	readonly onDidChangeTreeData = this._onDidChangeTreeData.event;
+
+	private steps: PlanStep[] = [];
+	private nextId = 0;
+	private runningTools = new Map<string, number>(); // toolUseId -> step id
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(private readonly client: GsdClient) {
+		this.disposables.push(
+			this._onDidChangeTreeData,
+			client.onEvent((evt) => this.handleEvent(evt)),
+			client.onConnectionChange((connected) => {
+				if (!connected) {
+					this.steps = [];
+					this.runningTools.clear();
+					this._onDidChangeTreeData.fire();
+				}
+			}),
+		);
+	}
+
+	getTreeItem(step: PlanStep): vscode.TreeItem {
+		const icon = stepIcon(step.status);
+		const item = new vscode.TreeItem(step.description, vscode.TreeItemCollapsibleState.None);
+		item.iconPath = icon;
+		item.description = step.duration !== undefined ? `${step.duration}ms` : step.status === "running" ? "running..." : "";
+
+		const time = new Date(step.timestamp).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit", second: "2-digit" });
+		item.tooltip = `${step.tool}: ${step.description}\nStatus: ${step.status}\nTime: ${time}`;
+
+		return item;
+	}
+
+	getChildren(): PlanStep[] {
+		return this.steps;
+	}
+
+	clear(): void {
+		this.steps = [];
+		this.runningTools.clear();
+		this._onDidChangeTreeData.fire();
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+
+	private handleEvent(evt: AgentEvent): void {
+		switch (evt.type) {
+			case "agent_start": {
+				// Don't clear — keep history visible. Add a separator.
+				if (this.steps.length > 0) {
+					this.steps.push({
+						id: this.nextId++,
+						tool: "separator",
+						description: "--- New Turn ---",
+						status: "done",
+						timestamp: Date.now(),
+					});
+				}
+				this.steps.push({
+					id: this.nextId++,
+					tool: "agent",
+					description: "Agent started",
+					status: "running",
+					timestamp: Date.now(),
+				});
+				this._onDidChangeTreeData.fire();
+				break;
+			}
+
+			case "agent_end": {
+				// Mark the agent step as done
+				const agentStep = [...this.steps].reverse().find((s) => s.tool === "agent" && s.status === "running");
+				if (agentStep) {
+					agentStep.status = "done";
+					agentStep.duration = Date.now() - agentStep.timestamp;
+					agentStep.description = "Agent finished";
+				}
+				this._onDidChangeTreeData.fire();
+				break;
+			}
+
+			case "tool_execution_start": {
+				const toolName = String(evt.toolName ?? "");
+				const toolInput = (evt.toolInput ?? {}) as Record<string, unknown>;
+				const toolUseId = String(evt.toolUseId ?? "");
+				const description = describeStep(toolName, toolInput);
+
+				const id = this.nextId++;
+				this.steps.push({
+					id,
+					tool: toolName,
+					description,
+					status: "running",
+					timestamp: Date.now(),
+				});
+
+				if (toolUseId) {
+					this.runningTools.set(toolUseId, id);
+				}
+
+				// Cap at 200 steps
+				while (this.steps.length > 200) {
+					this.steps.shift();
+				}
+
+				this._onDidChangeTreeData.fire();
+				break;
+			}
+
+			case "tool_execution_end": {
+				const toolUseId = String(evt.toolUseId ?? "");
+				const stepId = this.runningTools.get(toolUseId);
+				if (stepId !== undefined) {
+					this.runningTools.delete(toolUseId);
+					const step = this.steps.find((s) => s.id === stepId);
+					if (step) {
+						const isError = evt.error === true || evt.isError === true;
+						step.status = isError ? "error" : "done";
+						step.duration = Date.now() - step.timestamp;
+						this._onDidChangeTreeData.fire();
+					}
+				}
+				break;
+			}
+		}
+	}
+}
+
+function stepIcon(status: string): vscode.ThemeIcon {
+	switch (status) {
+		case "running":
+			return new vscode.ThemeIcon("sync~spin", new vscode.ThemeColor("charts.yellow"));
+		case "done":
+			return new vscode.ThemeIcon("pass", new vscode.ThemeColor("testing.iconPassed"));
+		case "error":
+			return new vscode.ThemeIcon("error", new vscode.ThemeColor("testing.iconFailed"));
+		default:
+			return new vscode.ThemeIcon("circle-outline");
+	}
+}
+
+function describeStep(toolName: string, input: Record<string, unknown>): string {
+	switch (toolName) {
+		case "Read": {
+			const p = String(input.file_path ?? input.path ?? "");
+			return `Read ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Write": {
+			const p = String(input.file_path ?? "");
+			return `Write ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Edit": {
+			const p = String(input.file_path ?? "");
+			return `Edit ${p.split(/[\\/]/).pop() ?? p}`;
+		}
+		case "Bash":
+			return `$ ${String(input.command ?? "").slice(0, 50)}`;
+		case "Grep":
+			return `Grep: ${String(input.pattern ?? "").slice(0, 40)}`;
+		case "Glob":
+			return `Glob: ${String(input.pattern ?? "").slice(0, 40)}`;
+		default:
+			return toolName;
+	}
+}
diff --git a/vscode-extension/src/scm-provider.ts b/vscode-extension/src/scm-provider.ts
new file mode 100644
index 000000000..2320ab6d5
--- /dev/null
+++ b/vscode-extension/src/scm-provider.ts
@@ -0,0 +1,124 @@
+import * as vscode from "vscode";
+import * as path from "node:path";
+import type { GsdChangeTracker } from "./change-tracker.js";
+
+const GSD_ORIGINAL_SCHEME = "gsd-original";
+
+/**
+ * Source Control provider that shows files modified by the GSD agent
+ * in a dedicated "GSD Agent" section of the Source Control panel.
+ * Supports QuickDiff to show before/after diffs, and accept/discard per-file.
+ */
+export class GsdScmProvider implements vscode.Disposable {
+	private readonly scm: vscode.SourceControl;
+	private readonly changesGroup: vscode.SourceControlResourceGroup;
+	private readonly contentProvider: GsdOriginalContentProvider;
+	private disposables: vscode.Disposable[] = [];
+
+	constructor(
+		private readonly tracker: GsdChangeTracker,
+		private readonly workspaceRoot: string,
+	) {
+		// Register content provider for original file contents
+		this.contentProvider = new GsdOriginalContentProvider(tracker);
+		this.disposables.push(
+			vscode.workspace.registerTextDocumentContentProvider(
+				GSD_ORIGINAL_SCHEME,
+				this.contentProvider,
+			),
+		);
+
+		// Create source control instance
+		this.scm = vscode.scm.createSourceControl(
+			"gsd",
+			"GSD Agent",
+			vscode.Uri.file(workspaceRoot),
+		);
+		this.scm.quickDiffProvider = {
+			provideOriginalResource: (uri: vscode.Uri): vscode.Uri | undefined => {
+				const filePath = uri.fsPath;
+				if (this.tracker.getOriginal(filePath) !== undefined) {
+					return uri.with({ scheme: GSD_ORIGINAL_SCHEME });
+				}
+				return undefined;
+			},
+		};
+		this.scm.inputBox.placeholder = "Describe changes to accept...";
+		this.scm.acceptInputCommand = {
+			command: "gsd.acceptAllChanges",
+			title: "Accept All",
+		};
+		this.scm.count = 0;
+		this.disposables.push(this.scm);
+
+		// Create resource group
+		this.changesGroup = this.scm.createResourceGroup("changes", "Agent Changes");
+		this.changesGroup.hideWhenEmpty = true;
+		this.disposables.push(this.changesGroup);
+
+		// Listen for change tracker updates
+		this.disposables.push(
+			tracker.onDidChange(() => this.refresh()),
+		);
+
+		this.refresh();
+	}
+
+	private refresh(): void {
+		const files = this.tracker.modifiedFiles;
+		this.changesGroup.resourceStates = files.map((filePath) => {
+			const uri = vscode.Uri.file(filePath);
+			const fileName = path.basename(filePath);
+			const relativePath = path.relative(this.workspaceRoot, filePath);
+
+			const state: vscode.SourceControlResourceState = {
+				resourceUri: uri,
+				decorations: {
+					strikeThrough: false,
+					tooltip: `Modified by GSD Agent`,
+					light: { iconPath: new vscode.ThemeIcon("edit") },
+					dark: { iconPath: new vscode.ThemeIcon("edit") },
+				},
+				command: {
+					command: "vscode.diff",
+					title: "Show Changes",
+					arguments: [
+						uri.with({ scheme: GSD_ORIGINAL_SCHEME }),
+						uri,
+						`${fileName} (GSD Agent Changes)`,
+					],
+				},
+			};
+			return state;
+		});
+		this.scm.count = files.length;
+	}
+
+	dispose(): void {
+		for (const d of this.disposables) {
+			d.dispose();
+		}
+	}
+}
+
+/**
+ * TextDocumentContentProvider that serves the original (pre-agent) content
+ * of files via the `gsd-original:` URI scheme.
+ */
+class GsdOriginalContentProvider implements vscode.TextDocumentContentProvider {
+	private readonly _onDidChange = new vscode.EventEmitter<vscode.Uri>();
+	readonly onDidChange = this._onDidChange.event;
+
+	constructor(private readonly tracker: GsdChangeTracker) {
+		tracker.onDidChange((paths) => {
+			for (const p of paths) {
+				this._onDidChange.fire(vscode.Uri.file(p).with({ scheme: GSD_ORIGINAL_SCHEME }));
+			}
+		});
+	}
+
+	provideTextDocumentContent(uri: vscode.Uri): string {
+		const filePath = uri.with({ scheme: "file" }).fsPath;
+		return this.tracker.getOriginal(filePath) ?? "";
+	}
+}
diff --git a/vscode-extension/src/session-tree.ts b/vscode-extension/src/session-tree.ts
index e61898e0a..a38413be4 100644
--- a/vscode-extension/src/session-tree.ts
+++ b/vscode-extension/src/session-tree.ts
@@ -56,18 +56,35 @@ export class GsdSessionTreeProvider implements vscode.TreeDataProvider<SessionIt
 
 			const items: SessionItem[] = [];
 			for (const file of files) {
-				// Filename format: <unixTimestampMs>_<sessionId>.jsonl
-				const match = file.match(/^(\d+)_(.+)\.jsonl$/);
-				if (!match) {
+				const sessionFile = path.join(sessionDir, file);
+
+				// Try two filename formats:
+				// 1. ISO timestamp: 2026-03-23T17-49-05-784Z_<sessionId>.jsonl
+				// 2. Unix timestamp: <unixTimestampMs>_<sessionId>.jsonl
+				const isoMatch = file.match(/^(\d{4}-\d{2}-\d{2}T[\d-]+Z)_(.+)\.jsonl$/);
+				const unixMatch = file.match(/^(\d{10,})_(.+)\.jsonl$/);
+
+				let timestamp: Date;
+				let sessionId: string;
+
+				if (isoMatch) {
+					// Convert ISO-like format (dashes instead of colons) back to parseable ISO
+					const isoStr = isoMatch[1].replace(/(\d{4}-\d{2}-\d{2}T\d{2})-(\d{2})-(\d{2})-(\d+)Z/, "$1:$2:$3.$4Z");
+					timestamp = new Date(isoStr);
+					sessionId = isoMatch[2];
+				} else if (unixMatch) {
+					timestamp = new Date(parseInt(unixMatch[1], 10));
+					sessionId = unixMatch[2];
+				} else {
 					continue;
 				}
-				const ts = parseInt(match[1], 10);
-				const sessionId = match[2];
-				const sessionFile = path.join(sessionDir, file);
+
+				if (isNaN(timestamp.getTime())) continue;
+
 				items.push({
-					label: formatDate(new Date(ts)),
+					label: formatDate(timestamp),
 					sessionFile,
-					timestamp: new Date(ts),
+					timestamp,
 					sessionId,
 					isCurrent: sessionFile === state.sessionFile,
 				});
diff --git a/vscode-extension/src/sidebar.ts b/vscode-extension/src/sidebar.ts
index 12c718633..b8bb2aee0 100644
--- a/vscode-extension/src/sidebar.ts
+++ b/vscode-extension/src/sidebar.ts
@@ -2,8 +2,17 @@ import * as vscode from "vscode";
 import type { GsdClient, SessionStats, ThinkingLevel } from "./gsd-client.js";
 
 /**
- * WebviewViewProvider that renders a sidebar panel showing connection status,
- * model info, thinking level, token usage, cost, and quick action controls.
+ * Send a message through VS Code's Chat panel so the user sees the response.
+ * Opens the Chat panel and pre-fills the @gsd participant with the message.
+ */
+async function sendViaChat(message: string): Promise<void> {
+	await vscode.commands.executeCommand("workbench.action.chat.open", { query: message });
+}
+
+/**
+ * WebviewViewProvider that renders a compact, card-based sidebar panel.
+ * Designed for information density without clutter — collapsible sections,
+ * hidden empty data, and consolidated action buttons.
  */
 export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 	public static readonly viewId = "gsd-sidebar";
@@ -106,22 +115,18 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 					await vscode.commands.executeCommand("gsd.copyLastResponse");
 					break;
 				case "autoMode":
-					if (this.client.isConnected) {
-						await this.client.sendPrompt("/gsd auto").catch(() => {});
-					}
+					await sendViaChat("@gsd /gsd auto");
 					break;
 				case "nextUnit":
-					if (this.client.isConnected) {
-						await this.client.sendPrompt("/gsd next").catch(() => {});
-					}
+					await sendViaChat("@gsd /gsd next");
 					break;
 				case "quickTask": {
 					const quickInput = await vscode.window.showInputBox({
 						prompt: "Describe the quick task",
 						placeHolder: "e.g. fix the typo in README",
 					});
-					if (quickInput && this.client.isConnected) {
-						await this.client.sendPrompt(`/gsd quick ${quickInput}`).catch(() => {});
+					if (quickInput) {
+						await sendViaChat(`@gsd /gsd quick ${quickInput}`);
 					}
 					break;
 				}
@@ -130,15 +135,13 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 						prompt: "Capture a thought",
 						placeHolder: "e.g. we should also handle the edge case for...",
 					});
-					if (thought && this.client.isConnected) {
-						await this.client.sendPrompt(`/gsd capture ${thought}`).catch(() => {});
+					if (thought) {
+						await sendViaChat(`@gsd /gsd capture ${thought}`);
 					}
 					break;
 				}
 				case "status":
-					if (this.client.isConnected) {
-						await this.client.sendPrompt("/gsd status").catch(() => {});
-					}
+					await sendViaChat("@gsd /gsd status");
 					break;
 				case "forkSession":
 					await vscode.commands.executeCommand("gsd.forkSession");
@@ -149,6 +152,9 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 				case "toggleFollowUpMode":
 					await vscode.commands.executeCommand("gsd.toggleFollowUpMode");
 					break;
+				case "showHistory":
+					await vscode.commands.executeCommand("gsd.showHistory");
+					break;
 			}
 		});
 
@@ -168,6 +174,7 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 		}
 
 		let modelName = "N/A";
+		let modelShort = "";
 		let sessionId = "N/A";
 		let sessionName = "";
 		let messageCount = 0;
@@ -189,6 +196,7 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 				modelName = state.model
 					? `${state.model.provider}/${state.model.id}`
 					: "Not set";
+				modelShort = state.model?.id ?? "";
 				sessionId = state.sessionId;
 				sessionName = state.sessionName ?? "";
 				messageCount = state.messageCount;
@@ -216,6 +224,7 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 		this.view.webview.html = this.getHtml({
 			connected,
 			modelName,
+			modelShort,
 			sessionId,
 			sessionName,
 			messageCount,
@@ -244,6 +253,7 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 	private getHtml(info: {
 		connected: boolean;
 		modelName: string;
+		modelShort: string;
 		sessionId: string;
 		sessionName: string;
 		messageCount: number;
@@ -259,57 +269,49 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 		followUpMode: "all" | "one-at-a-time";
 	}): string {
 		const statusColor = info.connected ? "#4ec9b0" : "#f44747";
-		const statusText = info.connected
-			? info.isStreaming
-				? "Processing..."
-				: info.isCompacting
-					? "Compacting..."
-					: "Connected"
-			: "Disconnected";
+		const statusLabel = info.isStreaming ? "Working" : info.isCompacting ? "Compacting" : info.connected ? "Connected" : "Disconnected";
 
-		const inputTokens = info.stats?.inputTokens?.toLocaleString() ?? "-";
-		const outputTokens = info.stats?.outputTokens?.toLocaleString() ?? "-";
-		const cacheRead = info.stats?.cacheReadTokens?.toLocaleString() ?? "-";
-		const cacheWrite = info.stats?.cacheWriteTokens?.toLocaleString() ?? "-";
-		const turnCount = info.stats?.turnCount?.toString() ?? "-";
-		const duration = info.stats?.duration !== undefined
-			? `${Math.round(info.stats.duration / 1000)}s`
-			: "-";
-		const cost = info.stats?.totalCost !== undefined ? `$${info.stats.totalCost.toFixed(4)}` : "-";
+		// Model short name for header
+		const modelDisplay = info.modelShort || "N/A";
 
-		const thinkingBadge = info.thinkingLevel !== "off"
-			? `<span class="badge">${info.thinkingLevel}</span>`
-			: `<span class="badge muted">off</span>`;
+		// Session display — name or truncated ID
+		const sessionDisplay = info.sessionName || (info.sessionId !== "N/A" ? info.sessionId.slice(0, 8) : "N/A");
 
-		const autoCompBadge = info.autoCompaction
-			? `<span class="badge">on</span>`
-			: `<span class="badge muted">off</span>`;
-
-		const autoRetryBadge = info.autoRetry
-			? `<span class="badge">on</span>`
-			: `<span class="badge muted">off</span>`;
-
-		const streamingIndicator = info.isStreaming
-			? `<div class="streaming-indicator"><span class="spinner"></span> Agent is working...</div>`
+		// Cost for header
+		const costDisplay = info.stats?.totalCost !== undefined && info.stats.totalCost > 0
+			? `$${info.stats.totalCost.toFixed(4)}`
 			: "";
 
-		// Context window usage
+		// Context window
 		const totalTokens = (info.stats?.inputTokens ?? 0) + (info.stats?.outputTokens ?? 0);
 		const contextPct = info.contextWindow > 0 ? Math.min(100, Math.round((totalTokens / info.contextWindow) * 100)) : 0;
 		const contextColor = contextPct > 80 ? "#f44747" : contextPct > 50 ? "#cca700" : "#4ec9b0";
-		const contextLabel = info.contextWindow > 0
-			? `${contextPct}% (${Math.round(totalTokens / 1000)}k / ${Math.round(info.contextWindow / 1000)}k)`
-			: "N/A";
 
-		const steeringBadge = info.steeringMode === "one-at-a-time"
-			? `<span class="badge">1-at-a-time</span>`
-			: `<span class="badge muted">all</span>`;
-		const followUpBadge = info.followUpMode === "one-at-a-time"
-			? `<span class="badge">1-at-a-time</span>`
-			: `<span class="badge muted">all</span>`;
+		// Only show stats that have real data
+		const hasStats = info.stats && (
+			(info.stats.inputTokens !== undefined && info.stats.inputTokens > 0) ||
+			(info.stats.outputTokens !== undefined && info.stats.outputTokens > 0)
+		);
 
 		const nonce = getNonce();
 
+		// Build stat rows only for non-zero values
+		let statRows = "";
+		if (hasStats && info.stats) {
+			const pairs: [string, string][] = [];
+			if (info.stats.inputTokens) pairs.push(["In", formatNum(info.stats.inputTokens)]);
+			if (info.stats.outputTokens) pairs.push(["Out", formatNum(info.stats.outputTokens)]);
+			if (info.stats.cacheReadTokens) pairs.push(["Cache R", formatNum(info.stats.cacheReadTokens)]);
+			if (info.stats.cacheWriteTokens) pairs.push(["Cache W", formatNum(info.stats.cacheWriteTokens)]);
+			if (info.stats.turnCount) pairs.push(["Turns", String(info.stats.turnCount)]);
+			if (info.stats.duration) pairs.push(["Time", `${Math.round(info.stats.duration / 1000)}s`]);
+			if (info.stats.totalCost !== undefined && info.stats.totalCost > 0) pairs.push(["Cost", `$${info.stats.totalCost.toFixed(4)}`]);
+
+			statRows = pairs.map(([k, v]) =>
+				`<span class="stat-label">${k}</span><span class="stat-value">${v}</span>`
+			).join("");
+		}
+
 		return /* html */ `<!DOCTYPE html>
 <html lang="en">
 <head>
@@ -317,291 +319,329 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 	<meta name="viewport" content="width=device-width, initial-scale=1.0">
 	<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; script-src 'nonce-${nonce}';">
 	<style>
+		* { box-sizing: border-box; margin: 0; padding: 0; }
 		body {
 			font-family: var(--vscode-font-family);
 			font-size: var(--vscode-font-size);
 			color: var(--vscode-foreground);
-			padding: 12px;
-			margin: 0;
+			padding: 8px;
 		}
-		.status-row {
+
+		/* ---- Header card ---- */
+		.header {
+			padding: 10px 12px;
+			border-radius: 6px;
+			background: var(--vscode-editor-background);
+			border: 1px solid var(--vscode-panel-border);
+			margin-bottom: 8px;
+		}
+		.header-top {
 			display: flex;
 			align-items: center;
 			gap: 8px;
-			margin-bottom: 12px;
 		}
 		.status-dot {
-			width: 10px;
-			height: 10px;
+			width: 8px;
+			height: 8px;
 			border-radius: 50%;
 			background: ${statusColor};
 			flex-shrink: 0;
 		}
-		.streaming-indicator {
+		.status-label {
+			font-size: 11px;
+			opacity: 0.7;
+			flex-shrink: 0;
+		}
+		.header-model {
+			margin-left: auto;
+			font-size: 11px;
+			font-weight: 600;
+			opacity: 0.85;
+			cursor: pointer;
+			white-space: nowrap;
+			overflow: hidden;
+			text-overflow: ellipsis;
+		}
+		.header-model:hover { opacity: 1; }
+		.header-cost {
+			font-size: 11px;
+			font-variant-numeric: tabular-nums;
+			opacity: 0.6;
+			flex-shrink: 0;
+		}
+		.header-sub {
+			display: flex;
+			align-items: center;
+			gap: 6px;
+			margin-top: 6px;
+			font-size: 11px;
+			opacity: 0.6;
+		}
+		.header-sub .sep { opacity: 0.3; }
+		.session-name {
+			cursor: pointer;
+			max-width: 120px;
+			overflow: hidden;
+			text-overflow: ellipsis;
+			white-space: nowrap;
+		}
+		.session-name:hover { opacity: 1; text-decoration: underline; }
+
+		/* ---- Streaming banner ---- */
+		.streaming {
 			display: flex;
 			align-items: center;
 			gap: 8px;
 			padding: 6px 10px;
-			margin-bottom: 12px;
-			background: var(--vscode-editor-background);
-			border-radius: 4px;
+			margin-bottom: 8px;
+			background: color-mix(in srgb, var(--vscode-focusBorder) 15%, transparent);
 			border: 1px solid var(--vscode-focusBorder);
+			border-radius: 6px;
 			font-size: 12px;
 		}
 		.spinner {
-			width: 12px;
-			height: 12px;
-			border: 2px solid var(--vscode-foreground);
+			width: 10px; height: 10px;
+			border: 2px solid var(--vscode-focusBorder);
 			border-top-color: transparent;
 			border-radius: 50%;
 			animation: spin 0.8s linear infinite;
+			flex-shrink: 0;
 		}
-		@keyframes spin {
-			to { transform: rotate(360deg); }
-		}
-		.section {
-			margin-bottom: 14px;
-		}
-		.section-title {
-			font-size: 11px;
-			text-transform: uppercase;
-			opacity: 0.6;
-			margin-bottom: 6px;
-			letter-spacing: 0.5px;
-		}
-		.info-table {
-			width: 100%;
-		}
-		.info-table td {
-			padding: 3px 0;
-			vertical-align: middle;
-		}
-		.info-table td:first-child {
-			opacity: 0.7;
-			padding-right: 12px;
-			white-space: nowrap;
-		}
-		.info-table td:last-child {
-			word-break: break-all;
-		}
-		.badge {
-			display: inline-block;
-			padding: 1px 6px;
+		@keyframes spin { to { transform: rotate(360deg); } }
+		.streaming-abort {
+			margin-left: auto;
+			font-size: 10px;
+			padding: 2px 8px;
+			border: 1px solid var(--vscode-foreground);
+			background: transparent;
+			color: var(--vscode-foreground);
 			border-radius: 3px;
-			font-size: 11px;
-			background: var(--vscode-badge-background);
-			color: var(--vscode-badge-foreground);
-		}
-		.badge.muted {
-			opacity: 0.5;
-		}
-		.badge.clickable {
 			cursor: pointer;
+			opacity: 0.6;
 		}
-		.badge.clickable:hover {
-			opacity: 0.8;
+		.streaming-abort:hover { opacity: 1; }
+
+		/* ---- Context bar (inline in header) ---- */
+		.context-bar {
+			margin-top: 8px;
 		}
-		.btn-group {
-			display: flex;
-			flex-direction: column;
-			gap: 6px;
-		}
-		.btn-row {
-			display: flex;
-			gap: 6px;
-		}
-		.btn-row button {
-			flex: 1;
-		}
-		button {
-			display: block;
+		.context-track {
 			width: 100%;
-			padding: 6px 14px;
-			border: none;
+			height: 3px;
+			background: var(--vscode-panel-border);
 			border-radius: 2px;
+			overflow: hidden;
+		}
+		.context-fill {
+			height: 100%;
+			border-radius: 2px;
+			transition: width 0.3s ease;
+		}
+		.context-text {
+			font-size: 10px;
+			opacity: 0.5;
+			margin-top: 2px;
+		}
+
+		/* ---- Collapsible section ---- */
+		.section {
+			margin-bottom: 6px;
+			border: 1px solid var(--vscode-panel-border);
+			border-radius: 6px;
+			overflow: hidden;
+		}
+		.section-header {
+			display: flex;
+			align-items: center;
+			gap: 6px;
+			padding: 6px 10px;
 			cursor: pointer;
-			font-size: var(--vscode-font-size);
-			color: var(--vscode-button-foreground);
-			background: var(--vscode-button-background);
-		}
-		button:hover {
-			background: var(--vscode-button-hoverBackground);
-		}
-		button.secondary {
-			color: var(--vscode-button-secondaryForeground);
-			background: var(--vscode-button-secondaryBackground);
-		}
-		button.secondary:hover {
-			background: var(--vscode-button-secondaryHoverBackground);
-		}
-		.token-stats {
-			display: grid;
-			grid-template-columns: 1fr 1fr;
-			gap: 4px 12px;
-			font-size: 12px;
-		}
-		.token-stats .label {
+			user-select: none;
+			font-size: 11px;
+			font-weight: 600;
+			text-transform: uppercase;
+			letter-spacing: 0.5px;
 			opacity: 0.7;
+			background: var(--vscode-editor-background);
 		}
-		.token-stats .value {
+		.section-header:hover { opacity: 1; }
+		.chevron {
+			font-size: 10px;
+			transition: transform 0.15s;
+		}
+		.section.collapsed .section-body { display: none; }
+		.section.collapsed .chevron { transform: rotate(-90deg); }
+		.section-body {
+			padding: 6px 10px 8px;
+		}
+
+		/* ---- Stats grid ---- */
+		.stats-grid {
+			display: grid;
+			grid-template-columns: auto 1fr;
+			gap: 2px 10px;
+			font-size: 11px;
+		}
+		.stat-label { opacity: 0.6; }
+		.stat-value {
 			text-align: right;
 			font-variant-numeric: tabular-nums;
 		}
-		.context-bar-outer {
-			width: 100%;
-			height: 6px;
-			background: var(--vscode-editor-background);
-			border-radius: 3px;
-			overflow: hidden;
-			margin: 4px 0 2px;
-		}
-		.context-bar-inner {
-			height: 100%;
-			border-radius: 3px;
-			transition: width 0.3s ease;
-		}
-		.context-label {
+
+		/* ---- Toggle row ---- */
+		.toggle-row {
+			display: flex;
+			align-items: center;
+			justify-content: space-between;
+			padding: 3px 0;
 			font-size: 11px;
-			opacity: 0.7;
+		}
+		.toggle-label { opacity: 0.7; }
+		.toggle-pill {
+			display: inline-block;
+			padding: 1px 8px;
+			border-radius: 10px;
+			font-size: 10px;
+			cursor: pointer;
+			transition: all 0.15s;
+			border: 1px solid transparent;
+		}
+		.toggle-pill.on {
+			background: color-mix(in srgb, var(--vscode-focusBorder) 30%, transparent);
+			border-color: var(--vscode-focusBorder);
+			color: var(--vscode-foreground);
+		}
+		.toggle-pill.off {
+			background: transparent;
+			border-color: var(--vscode-panel-border);
+			opacity: 0.5;
+		}
+		.toggle-pill:hover { opacity: 1; }
+
+		/* ---- Buttons ---- */
+		.actions {
+			display: grid;
+			grid-template-columns: 1fr 1fr;
+			gap: 4px;
+		}
+		.actions.three-col {
+			grid-template-columns: 1fr 1fr 1fr;
+		}
+		.action-btn {
+			display: flex;
+			align-items: center;
+			justify-content: center;
+			gap: 4px;
+			padding: 5px 6px;
+			border: 1px solid var(--vscode-panel-border);
+			border-radius: 4px;
+			background: transparent;
+			color: var(--vscode-foreground);
+			font-size: 11px;
+			cursor: pointer;
+			white-space: nowrap;
+			width: auto;
+		}
+		.action-btn:hover {
+			background: var(--vscode-list-hoverBackground);
+			border-color: var(--vscode-focusBorder);
+		}
+		.action-btn.primary {
+			background: var(--vscode-button-background);
+			color: var(--vscode-button-foreground);
+			border-color: var(--vscode-button-background);
+			font-weight: 600;
+		}
+		.action-btn.primary:hover {
+			background: var(--vscode-button-hoverBackground);
+		}
+		.action-btn.danger {
+			border-color: #f44747;
+			color: #f44747;
+		}
+		.action-btn.danger:hover {
+			background: color-mix(in srgb, #f44747 15%, transparent);
+		}
+		.action-btn.full {
+			grid-column: 1 / -1;
+		}
+
+		/* ---- Disconnected state ---- */
+		.disconnected {
+			text-align: center;
+			padding: 20px 12px;
+		}
+		.disconnected p {
+			opacity: 0.5;
+			font-size: 12px;
+			margin-bottom: 12px;
+		}
+		.start-btn {
+			padding: 8px 24px;
+			border: none;
+			border-radius: 4px;
+			cursor: pointer;
+			font-size: var(--vscode-font-size);
+			font-weight: 600;
+			color: var(--vscode-button-foreground);
+			background: var(--vscode-button-background);
+			width: auto;
+			display: inline-block;
+		}
+		.start-btn:hover {
+			background: var(--vscode-button-hoverBackground);
 		}
 	</style>
 </head>
 <body>
-	<div class="status-row">
-		<div class="status-dot"></div>
-		<strong>${statusText}</strong>
-	</div>
-
-	${streamingIndicator}
-
-	<div class="section">
-		<div class="section-title">Session</div>
-		<table class="info-table">
-			<tr><td>Model</td><td>${escapeHtml(info.modelName)}</td></tr>
-			<tr>
-				<td>Session</td>
-				<td>
-					${escapeHtml(info.sessionName || info.sessionId)}
-					${info.connected ? `<span class="badge clickable" data-command="setSessionName" title="Rename session" style="margin-left:4px">✎</span>` : ""}
-				</td>
-			</tr>
-			<tr><td>Messages</td><td>${info.messageCount}${info.pendingMessageCount > 0 ? ` <span class="badge muted">+${info.pendingMessageCount} pending</span>` : ""}</td></tr>
-			<tr>
-				<td>Thinking</td>
-				<td>${thinkingBadge}</td>
-			</tr>
-			<tr>
-				<td>Auto-compact</td>
-				<td>${autoCompBadge}</td>
-			</tr>
-			<tr>
-				<td>Auto-retry</td>
-				<td>${autoRetryBadge}</td>
-			</tr>
-			<tr>
-				<td>Steering</td>
-				<td><span class="badge clickable" data-command="toggleSteeringMode">${info.steeringMode === "one-at-a-time" ? "1-at-a-time" : "all"}</span></td>
-			</tr>
-			<tr>
-				<td>Follow-up</td>
-				<td><span class="badge clickable" data-command="toggleFollowUpMode">${info.followUpMode === "one-at-a-time" ? "1-at-a-time" : "all"}</span></td>
-			</tr>
-		</table>
-	</div>
-
-	${info.connected && info.stats ? `
-	<div class="section">
-		<div class="section-title">Token Usage</div>
-		<div class="token-stats">
-			<span class="label">Input</span>
-			<span class="value">${inputTokens}</span>
-			<span class="label">Output</span>
-			<span class="value">${outputTokens}</span>
-			<span class="label">Cache read</span>
-			<span class="value">${cacheRead}</span>
-			<span class="label">Cache write</span>
-			<span class="value">${cacheWrite}</span>
-			<span class="label">Turns</span>
-			<span class="value">${turnCount}</span>
-			<span class="label">Duration</span>
-			<span class="value">${duration}</span>
-			<span class="label">Cost</span>
-			<span class="value">${cost}</span>
+	${info.connected ? this.getConnectedHtml(info, {
+			statusLabel,
+			modelDisplay,
+			sessionDisplay,
+			costDisplay,
+			contextPct,
+			contextColor,
+			hasStats: !!hasStats,
+			statRows,
+			nonce,
+		}) : `
+	<div class="header">
+		<div class="header-top">
+			<div class="status-dot"></div>
+			<span class="status-label">Disconnected</span>
 		</div>
 	</div>
-
-	${info.contextWindow > 0 ? `
-	<div class="section">
-		<div class="section-title">Context Window</div>
-		<div class="context-bar-outer">
-			<div class="context-bar-inner" style="width: ${contextPct}%; background: ${contextColor};"></div>
-		</div>
-		<div class="context-label">${contextLabel}</div>
+	<div class="disconnected">
+		<p>Agent is not running</p>
+		<button class="start-btn" data-command="start">Start Agent</button>
 	</div>
-	` : ""}
-	` : ""}
-
-	${info.connected ? `
-	<div class="section">
-		<div class="section-title">Workflow</div>
-		<div class="btn-group">
-			<div class="btn-row">
-				<button data-command="autoMode">Auto</button>
-				<button class="secondary" data-command="nextUnit">Next</button>
-			</div>
-			<div class="btn-row">
-				<button class="secondary" data-command="quickTask">Quick</button>
-				<button class="secondary" data-command="capture">Capture</button>
-			</div>
-			<div class="btn-row">
-				<button class="secondary" data-command="status">Status</button>
-				<button class="secondary" data-command="forkSession">Fork</button>
-			</div>
-		</div>
-	</div>
-	` : ""}
-
-	<div class="section">
-		<div class="section-title">Controls</div>
-		<div class="btn-group">
-			${info.connected
-				? `<button data-command="stop">Stop Agent</button>
-				   <div class="btn-row">
-				     <button class="secondary" data-command="newSession">New Session</button>
-				     <button class="secondary" data-command="switchModel">Model</button>
-				   </div>
-				   <div class="btn-row">
-				     <button class="secondary" data-command="cycleThinking">Thinking</button>
-				     <button class="secondary" data-command="toggleAutoCompaction">Auto-Compact</button>
-				   </div>
-				   <div class="btn-row">
-				     <button class="secondary" data-command="toggleAutoRetry">Auto-Retry</button>
-				     <button class="secondary" data-command="copyLastResponse">Copy Response</button>
-				   </div>`
-				: `<button data-command="start">Start Agent</button>`
-			}
-		</div>
-	</div>
-
-	${info.connected ? `
-	<div class="section">
-		<div class="section-title">Actions</div>
-		<div class="btn-group">
-			<div class="btn-row">
-				<button class="secondary" data-command="compact">Compact</button>
-				<button class="secondary" data-command="exportHtml">Export</button>
-			</div>
-			<div class="btn-row">
-				<button class="secondary" data-command="abort">Abort</button>
-				<button class="secondary" data-command="listCommands">Commands</button>
-			</div>
-		</div>
-	</div>
-	` : ""}
+	`}
 
 	<script nonce="${nonce}">
 		const vscode = acquireVsCodeApi();
+		const stored = vscode.getState() || {};
+
+		// Restore collapsed state
+		document.querySelectorAll('.section').forEach(s => {
+			const id = s.dataset.section;
+			if (id && stored[id] === 'collapsed') s.classList.add('collapsed');
+		});
+
 		document.addEventListener('click', (e) => {
+			// Section toggle
+			const header = e.target.closest('.section-header');
+			if (header) {
+				const section = header.parentElement;
+				section.classList.toggle('collapsed');
+				const id = section.dataset.section;
+				if (id) {
+					const state = vscode.getState() || {};
+					state[id] = section.classList.contains('collapsed') ? 'collapsed' : 'open';
+					vscode.setState(state);
+				}
+				return;
+			}
+			// Button/command click
 			const btn = e.target.closest('[data-command]');
 			if (btn) {
 				vscode.postMessage({ command: btn.dataset.command });
@@ -611,6 +651,144 @@ export class GsdSidebarProvider implements vscode.WebviewViewProvider {
 </body>
 </html>`;
 	}
+
+	private getConnectedHtml(
+		info: {
+			connected: boolean;
+			modelName: string;
+			modelShort: string;
+			sessionId: string;
+			sessionName: string;
+			messageCount: number;
+			pendingMessageCount: number;
+			thinkingLevel: ThinkingLevel;
+			isStreaming: boolean;
+			isCompacting: boolean;
+			autoCompaction: boolean;
+			autoRetry: boolean;
+			stats: SessionStats | null;
+			contextWindow: number;
+			steeringMode: "all" | "one-at-a-time";
+			followUpMode: "all" | "one-at-a-time";
+		},
+		ui: {
+			statusLabel: string;
+			modelDisplay: string;
+			sessionDisplay: string;
+			costDisplay: string;
+			contextPct: number;
+			contextColor: string;
+			hasStats: boolean;
+			statRows: string;
+			nonce: string;
+		},
+	): string {
+		const pendingBadge = info.pendingMessageCount > 0
+			? ` <span style="opacity:0.5">+${info.pendingMessageCount}</span>`
+			: "";
+
+		return `
+	<!-- Header card -->
+	<div class="header">
+		<div class="header-top">
+			<div class="status-dot"></div>
+			<span class="status-label">${ui.statusLabel}</span>
+			<span class="header-model" data-command="switchModel" title="${escapeHtml(info.modelName)}">${escapeHtml(ui.modelDisplay)}</span>
+			${ui.costDisplay ? `<span class="header-cost">${ui.costDisplay}</span>` : ""}
+		</div>
+		<div class="header-sub">
+			<span class="session-name" data-command="setSessionName" title="${escapeHtml(info.sessionId)}">${escapeHtml(ui.sessionDisplay)}</span>
+			<span class="sep">/</span>
+			<span>${info.messageCount} msg${pendingBadge}</span>
+			<span class="sep">/</span>
+			<span data-command="cycleThinking" style="cursor:pointer" title="Click to cycle thinking level">${info.thinkingLevel === "off" ? "no think" : info.thinkingLevel}</span>
+		</div>
+		${info.contextWindow > 0 ? `
+		<div class="context-bar">
+			<div class="context-track">
+				<div class="context-fill" style="width:${ui.contextPct}%;background:${ui.contextColor}"></div>
+			</div>
+			<div class="context-text">${ui.contextPct}% context (${formatNum((info.stats?.inputTokens ?? 0) + (info.stats?.outputTokens ?? 0))} / ${formatNum(info.contextWindow)})</div>
+		</div>
+		` : ""}
+	</div>
+
+	${info.isStreaming ? `
+	<div class="streaming">
+		<span class="spinner"></span>
+		<span>Agent is working...</span>
+		<button class="streaming-abort" data-command="abort">Stop</button>
+	</div>
+	` : ""}
+
+	<!-- Workflow -->
+	<div class="section" data-section="workflow">
+		<div class="section-header"><span class="chevron">&#9660;</span> Workflow</div>
+		<div class="section-body">
+			<div class="actions">
+				<button class="action-btn primary" data-command="autoMode">Auto</button>
+				<button class="action-btn" data-command="nextUnit">Next</button>
+				<button class="action-btn" data-command="quickTask">Quick</button>
+				<button class="action-btn" data-command="capture">Capture</button>
+			</div>
+		</div>
+	</div>
+
+	${ui.hasStats ? `
+	<!-- Stats -->
+	<div class="section" data-section="stats">
+		<div class="section-header"><span class="chevron">&#9660;</span> Stats</div>
+		<div class="section-body">
+			<div class="stats-grid">${ui.statRows}</div>
+		</div>
+	</div>
+	` : ""}
+
+	<!-- Actions -->
+	<div class="section" data-section="actions">
+		<div class="section-header"><span class="chevron">&#9660;</span> Actions</div>
+		<div class="section-body">
+			<div class="actions three-col">
+				<button class="action-btn" data-command="newSession">New</button>
+				<button class="action-btn" data-command="compact">Compact</button>
+				<button class="action-btn" data-command="copyLastResponse">Copy</button>
+				<button class="action-btn" data-command="status">Status</button>
+				<button class="action-btn" data-command="fixProblemsInFile">Fix Errs</button>
+				<button class="action-btn" data-command="showHistory">History</button>
+			</div>
+			<div style="margin-top:6px">
+				<button class="action-btn danger full" data-command="stop">Stop Agent</button>
+			</div>
+		</div>
+	</div>
+
+	<!-- Settings (collapsed by default) -->
+	<div class="section collapsed" data-section="settings">
+		<div class="section-header"><span class="chevron">&#9660;</span> Settings</div>
+		<div class="section-body">
+			<div class="toggle-row">
+				<span class="toggle-label">Auto-compact</span>
+				<span class="toggle-pill ${info.autoCompaction ? "on" : "off"}" data-command="toggleAutoCompaction">${info.autoCompaction ? "on" : "off"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Auto-retry</span>
+				<span class="toggle-pill ${info.autoRetry ? "on" : "off"}" data-command="toggleAutoRetry">${info.autoRetry ? "on" : "off"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Steering</span>
+				<span class="toggle-pill ${info.steeringMode === "one-at-a-time" ? "on" : "off"}" data-command="toggleSteeringMode">${info.steeringMode === "one-at-a-time" ? "1-at-a-time" : "all"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Follow-up</span>
+				<span class="toggle-pill ${info.followUpMode === "one-at-a-time" ? "on" : "off"}" data-command="toggleFollowUpMode">${info.followUpMode === "one-at-a-time" ? "1-at-a-time" : "all"}</span>
+			</div>
+			<div class="toggle-row">
+				<span class="toggle-label">Approval</span>
+				<span class="toggle-pill on" data-command="selectApprovalMode">change</span>
+			</div>
+		</div>
+	</div>`;
+	}
 }
 
 function escapeHtml(text: string): string {
@@ -621,6 +799,12 @@ function escapeHtml(text: string): string {
 		.replace(/"/g, "&quot;");
 }
 
+function formatNum(n: number): string {
+	if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
+	if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`;
+	return String(n);
+}
+
 function getNonce(): string {
 	const chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
 	let nonce = "";
diff --git a/vscode-extension/src/slash-completion.ts b/vscode-extension/src/slash-completion.ts
index ce9885dd5..c36299d5b 100644
--- a/vscode-extension/src/slash-completion.ts
+++ b/vscode-extension/src/slash-completion.ts
@@ -77,7 +77,9 @@ export class GsdSlashCompletionProvider
 
 	private async refreshCache(): Promise<void> {
 		try {
-			this.cachedCommands = await this.client.getCommands();
+			const all = await this.client.getCommands();
+			// Only show /gsd commands — filter out unrelated extension/skill commands
+			this.cachedCommands = all.filter((cmd) => cmd.name.startsWith("gsd"));
 		} catch {
 			// Silently ignore — agent may not be ready yet.
 		}
diff --git a/web/components/gsd/app-shell.tsx b/web/components/gsd/app-shell.tsx
index 88442c53b..588c26cfd 100644
--- a/web/components/gsd/app-shell.tsx
+++ b/web/components/gsd/app-shell.tsx
@@ -87,6 +87,16 @@ function WorkspaceChrome() {
     return () => window.clearTimeout(restoreTimer)
   }, [projectPath, viewRestored])
 
+  // Reset viewRestored when projectPath changes so the restore effect can
+  // fire for the newly-selected project (fixes #2711: tab reset on switch).
+  const prevProjectPath = useRef(projectPath)
+  useEffect(() => {
+    if (prevProjectPath.current !== projectPath) {
+      prevProjectPath.current = projectPath
+      setViewRestored(false)
+    }
+  }, [projectPath])
+
   // Persist view changes to sessionStorage
   useEffect(() => {
     if (!projectPath) return
@@ -554,9 +564,18 @@ function ProjectAwareWorkspace() {
   const activeProjectCwd = useSyncExternalStore(manager.subscribe, manager.getSnapshot, manager.getSnapshot)
   const activeStore = activeProjectCwd ? manager.getActiveStore() : null
 
-  // Shut down all projects when the tab actually closes
+  // Shut down all projects when the tab actually closes.
+  // IMPORTANT: pagehide fires both on real page unload AND on mobile/Safari
+  // tab switches (bfcache entry).  When event.persisted is true the page is
+  // being cached for later reuse — the server must stay alive.  Only send
+  // the shutdown beacon when the page is truly being discarded.
   useEffect(() => {
-    const handlePageHide = () => {
+    const handlePageHide = (event: PageTransitionEvent) => {
+      if (event.persisted) {
+        // Page is entering bfcache (tab switch, app backgrounding) — keep
+        // the server alive so PTY sessions survive.
+        return
+      }
       // sendBeacon cannot set custom headers, so pass the auth token as a
       // query parameter instead (the proxy accepts `_token` as a fallback).
       const token = getAuthToken()
diff --git a/web/components/gsd/chat-mode.tsx b/web/components/gsd/chat-mode.tsx
index a715be651..f9a8dd716 100644
--- a/web/components/gsd/chat-mode.tsx
+++ b/web/components/gsd/chat-mode.tsx
@@ -2204,6 +2204,12 @@ export function ChatPane({ className, onOpenAction }: ChatPaneProps) {
 
   const showPlaceholder = timeline.length === 0 && !isStreaming
 
+  // Show an "awaiting input" indicator when the session is idle (connected,
+  // not streaming, has timeline content) so the UI does not appear stuck (#2707).
+  const showAwaitingInput = connected && !isStreaming && timeline.length > 0
+    && !state.activeToolExecution
+    && state.pendingUiRequests.length === 0
+
   // Auto-scroll ref
   const scrollRef = useRef<HTMLDivElement>(null)
   const isNearBottomRef = useRef(true)
@@ -2309,6 +2315,12 @@ export function ChatPane({ className, onOpenAction }: ChatPaneProps) {
                   return <InlineUiRequest key={item.request.id} request={item.request} />
               }
             })}
+            {showAwaitingInput && (
+              <div className="flex items-center gap-2 px-1 py-1 text-xs text-muted-foreground animate-in fade-in duration-500">
+                <span className="inline-block h-2 w-2 rounded-full bg-emerald-500/70 animate-pulse" />
+                Ready for your input
+              </div>
+            )}
             <div className="h-2" />
           </div>
         )}
diff --git a/web/components/gsd/projects-view.tsx b/web/components/gsd/projects-view.tsx
index 7cb736940..83e906889 100644
--- a/web/components/gsd/projects-view.tsx
+++ b/web/components/gsd/projects-view.tsx
@@ -371,7 +371,6 @@ export function ProjectsPanel({
     // loading toast managed by WorkspaceChrome
     onOpenChange(false)
     manager.switchProject(project.path)
-    window.dispatchEvent(new CustomEvent("gsd:navigate-view", { detail: { view: "dashboard" } }))
   }
 
   // Sort: active-gsd first, then by name
diff --git a/web/components/gsd/shell-terminal.tsx b/web/components/gsd/shell-terminal.tsx
index 22050df45..c60b92ceb 100644
--- a/web/components/gsd/shell-terminal.tsx
+++ b/web/components/gsd/shell-terminal.tsx
@@ -465,6 +465,23 @@ async function uploadAndInjectImage(file: File, sessionId: string, projectCwd?:
 
 // ─── Multi-instance terminal panel ────────────────────────────────────────────
 
+/**
+ * Derive a session ID that is scoped to the project path.  This ensures
+ * that switching projects creates a separate PTY session per project, and
+ * switching back reconnects to the *same* server-side PTY instead of
+ * spawning a new one (the server's getOrCreateSession returns the existing
+ * live session when the ID matches).
+ */
+function deriveProjectScopedSessionId(
+  projectCwd: string | undefined,
+  sessionPrefix?: string,
+  command?: string,
+): string {
+  const base = sessionPrefix ?? (command ? "gsd-default" : "default")
+  if (!projectCwd) return base
+  return `${base}:${projectCwd}`
+}
+
 export function ShellTerminal({
   className,
   command,
@@ -477,7 +494,7 @@ export function ShellTerminal({
 }: ShellTerminalProps) {
   const { resolvedTheme } = useTheme()
   const isDark = resolvedTheme !== "light"
-  const defaultId = sessionPrefix ?? (command ? "gsd-default" : "default")
+  const defaultId = deriveProjectScopedSessionId(projectCwd, sessionPrefix, command)
   const commandLabel = deriveCommandLabel(command)
   const [tabs, setTabs] = useState<TerminalTab[]>([
     { id: defaultId, label: commandLabel, connected: false },
@@ -486,6 +503,19 @@ export function ShellTerminal({
   const [isDragOver, setIsDragOver] = useState(false)
   const terminalAreaRef = useRef<HTMLDivElement>(null)
 
+  // When the project changes, the defaultId changes.  Reset tabs so the
+  // terminal reconnects to the project-scoped PTY session on the server.
+  // The server's getOrCreateSession will return the existing live session
+  // when the session ID matches, preserving terminal state.
+  const prevDefaultIdRef = useRef(defaultId)
+  useEffect(() => {
+    if (prevDefaultIdRef.current !== defaultId) {
+      prevDefaultIdRef.current = defaultId
+      setTabs([{ id: defaultId, label: commandLabel, connected: false }])
+      setActiveTabId(defaultId)
+    }
+  }, [defaultId, commandLabel])
+
   // ── Drag-and-drop handlers (native DOM, capture phase) ──────────────────
   // React synthetic events don't reliably fire through xterm's internal DOM.
   // Native capture-phase listeners intercept before xterm can swallow them —
diff --git a/web/lib/gsd-workspace-store.tsx b/web/lib/gsd-workspace-store.tsx
index a34d91cf1..3465ec91d 100644
--- a/web/lib/gsd-workspace-store.tsx
+++ b/web/lib/gsd-workspace-store.tsx
@@ -125,37 +125,7 @@ export interface BridgeRuntimeSnapshot {
   lastError: BridgeLastError | null
 }
 
-export interface WorkspaceTaskTarget {
-  id: string
-  title: string
-  done: boolean
-  planPath?: string
-  summaryPath?: string
-}
-
-export type RiskLevel = "low" | "medium" | "high"
-
-export interface WorkspaceSliceTarget {
-  id: string
-  title: string
-  done: boolean
-  planPath?: string
-  summaryPath?: string
-  uatPath?: string
-  tasksDir?: string
-  branch?: string
-  risk?: RiskLevel
-  depends?: string[]
-  demo?: string
-  tasks: WorkspaceTaskTarget[]
-}
-
-export interface WorkspaceMilestoneTarget {
-  id: string
-  title: string
-  roadmapPath?: string
-  slices: WorkspaceSliceTarget[]
-}
+export type { WorkspaceTaskTarget, RiskLevel, WorkspaceSliceTarget, WorkspaceMilestoneTarget } from "./workspace-types.js"
 
 export interface WorkspaceScopeTarget {
   scope: string
@@ -349,6 +319,7 @@ export type LiveStateInvalidationDomain = "auto" | "workspace" | "recovery" | "r
 export type LiveStateInvalidationSource = "bridge_event" | "rpc_command" | "session_manage"
 export type LiveStateInvalidationReason =
   | "agent_end"
+  | "turn_end"
   | "auto_retry_start"
   | "auto_retry_end"
   | "auto_compaction_start"
@@ -5081,10 +5052,16 @@ export class GSDWorkspaceStore {
       const nextThinking = [...this.state.liveThinkingTranscript, ""]
       const nextSegments = [...this.state.completedTurnSegments, finalSegments]
       const overflow = nextTranscript.length > MAX_TRANSCRIPT_BLOCKS ? nextTranscript.length - MAX_TRANSCRIPT_BLOCKS : 0
+      // When overflow trims the front of parallel arrays, also trim
+      // chatUserMessages to keep index-based interleaving aligned (#2707).
+      const trimmedUserMsgs = overflow > 0
+        ? this.state.chatUserMessages.slice(overflow)
+        : undefined
       this.patchState({
         liveTranscript: overflow > 0 ? nextTranscript.slice(overflow) : nextTranscript,
         liveThinkingTranscript: overflow > 0 ? nextThinking.slice(overflow) : nextThinking,
         completedTurnSegments: overflow > 0 ? nextSegments.slice(overflow) : nextSegments,
+        ...(trimmedUserMsgs !== undefined ? { chatUserMessages: trimmedUserMsgs } : {}),
         streamingAssistantText: "",
         streamingThinkingText: "",
         currentTurnSegments: [],
diff --git a/web/lib/pty-chat-parser.ts b/web/lib/pty-chat-parser.ts
index 30b53e54c..097f538d9 100644
--- a/web/lib/pty-chat-parser.ts
+++ b/web/lib/pty-chat-parser.ts
@@ -115,8 +115,8 @@ export function stripAnsi(s: string): string {
 const PROMPT_MARKERS = [
   /^❯\s*/,     // Pi default primary prompt
   /^›\s*/,     // Pi alternate prompt
-  /^>\s+/,     // Simple > prompt (some themes)
-  /^\$\s+/,    // Shell prompt fallback
+  /^>(\s+|$)/,  // Simple > prompt (some themes) — bare ">" or "> text"
+  /^\$(\s+|$)/, // Shell prompt fallback — bare "$" or "$ text"
 ]
 
 /**
@@ -304,6 +304,15 @@ export class PtyChatParser {
    */
   private _completionEmitted = false
 
+  /**
+   * True when the parser has seen a prompt boundary and is waiting for user
+   * input.  The next non-system, non-prompt, non-TUI content line after the
+   * prompt is classified as role="user" instead of "assistant".
+   * Reset to false once that user line arrives (or when a new assistant
+   * message explicitly starts via a different signal).
+   */
+  private _awaitingInput = false
+
   constructor(source = "default") {
     this._source = source
   }
@@ -329,6 +338,15 @@ export class PtyChatParser {
     return [...this._messages]
   }
 
+  /**
+   * Returns true when the parser has detected a prompt boundary and is
+   * waiting for user input.  Chat UIs can use this to show an "awaiting
+   * input" indicator so the session does not appear stuck.
+   */
+  isAwaitingInput(): boolean {
+    return this._awaitingInput
+  }
+
   /**
    * Flush any trailing partial buffer even if it does not end with a newline.
    * Useful for terminal UIs that leave the final status line unterminated.
@@ -373,6 +391,7 @@ export class PtyChatParser {
     this._lastHeaderText = ""
     this._lastInputAt = 0
     this._completionEmitted = false
+    this._awaitingInput = false
     if (this._completionTimer) {
       clearTimeout(this._completionTimer)
       this._completionTimer = null
@@ -489,6 +508,11 @@ export class PtyChatParser {
       if (userText.length > 0) {
         const msg = this._startMessage("user", userText)
         this._completeMessage(msg) // user lines are typically single-line
+        this._awaitingInput = false
+      } else {
+        // Bare prompt with no inline user text — mark as awaiting input
+        // so the next content line is classified as user input.
+        this._awaitingInput = true
       }
       return
     }
@@ -533,6 +557,21 @@ export class PtyChatParser {
       this._lastHeaderText = trimmed
     }
 
+    // ── Awaiting input → classify as user ──────────────────────────────────
+    // After a bare prompt line (e.g. "❯ \n"), the next content line is
+    // the user's typed input echoed back by the PTY (without prompt prefix).
+    if (this._awaitingInput) {
+      this._awaitingInput = false
+      const msg = this._startMessage("user", trimmed)
+      this._completeMessage(msg)
+      console.debug(
+        "[pty-chat-parser] user input detected (post-prompt echo) id=%s source=%s",
+        msg.id,
+        this._source,
+      )
+      return
+    }
+
     // ── Regular content line → assistant ────────────────────────────────────
     if (
       this._activeMessage === null ||
diff --git a/web/lib/workspace-status.ts b/web/lib/workspace-status.ts
index 7fffa498c..7578b0042 100644
--- a/web/lib/workspace-status.ts
+++ b/web/lib/workspace-status.ts
@@ -2,7 +2,7 @@ import type {
   WorkspaceMilestoneTarget,
   WorkspaceSliceTarget,
   WorkspaceTaskTarget,
-} from "./gsd-workspace-store"
+} from "./workspace-types.js"
 
 export type ItemStatus = "done" | "in-progress" | "pending"
 
@@ -10,13 +10,27 @@ export function getMilestoneStatus(
   milestone: WorkspaceMilestoneTarget,
   active: { milestoneId?: string },
 ): ItemStatus {
-  if (milestone.slices.length > 0 && milestone.slices.every((slice) => slice.done)) {
+  // Prefer authoritative milestone status from GSD state registry (#2807)
+  if (milestone.status) {
+    switch (milestone.status) {
+      case "complete":
+        return "done"
+      case "active":
+        return "in-progress"
+      case "pending":
+      case "parked":
+        return "pending"
+    }
+  }
+
+  // Fallback: infer from slice completion (legacy / no status field)
+  if (milestone.slices.length > 0 && milestone.slices.every((slice: WorkspaceSliceTarget) => slice.done)) {
     return "done"
   }
   if (active.milestoneId === milestone.id) {
     return "in-progress"
   }
-  return milestone.slices.some((slice) => slice.done) ? "in-progress" : "pending"
+  return milestone.slices.some((slice: WorkspaceSliceTarget) => slice.done) ? "in-progress" : "pending"
 }
 
 export function getSliceStatus(
diff --git a/web/lib/workspace-types.ts b/web/lib/workspace-types.ts
new file mode 100644
index 000000000..5cfa99450
--- /dev/null
+++ b/web/lib/workspace-types.ts
@@ -0,0 +1,35 @@
+export interface WorkspaceTaskTarget {
+  id: string
+  title: string
+  done: boolean
+  planPath?: string
+  summaryPath?: string
+}
+
+export type RiskLevel = "low" | "medium" | "high"
+
+export interface WorkspaceSliceTarget {
+  id: string
+  title: string
+  done: boolean
+  planPath?: string
+  summaryPath?: string
+  uatPath?: string
+  tasksDir?: string
+  branch?: string
+  risk?: RiskLevel
+  depends?: string[]
+  demo?: string
+  tasks: WorkspaceTaskTarget[]
+}
+
+export interface WorkspaceMilestoneTarget {
+  id: string
+  title: string
+  roadmapPath?: string
+  /** Authoritative milestone lifecycle status from the GSD state registry. */
+  status?: "complete" | "active" | "pending" | "parked"
+  /** Milestone validation verdict, when validation has been performed. */
+  validationVerdict?: "pass" | "needs-attention" | "needs-remediation"
+  slices: WorkspaceSliceTarget[]
+}